code.delx.au - gnu-emacs/blob - src/syntax.c

   1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
   2    Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2011
   3                  Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20
  21 #include <config.h>
  22 #include <ctype.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "commands.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "keymap.h"
  29 #include "regex.h"
  30
  31 /* Make syntax table lookup grant data in gl_state.  */
  32 #define SYNTAX_ENTRY_VIA_PROPERTY
  33
  34 #include "syntax.h"
  35 #include "intervals.h"
  36 #include "category.h"
  37
  38 /* Then there are seven single-bit flags that have the following meanings:
  39   1. This character is the first of a two-character comment-start sequence.
  40   2. This character is the second of a two-character comment-start sequence.
  41   3. This character is the first of a two-character comment-end sequence.
  42   4. This character is the second of a two-character comment-end sequence.
  43   5. This character is a prefix, for backward-prefix-chars.
  44   6. The char is part of a delimiter for comments of style "b".
  45   7. This character is part of a nestable comment sequence.
  46   8. The char is part of a delimiter for comments of style "c".
  47   Note that any two-character sequence whose first character has flag 1
  48   and whose second character has flag 2 will be interpreted as a comment start.
  49
  50   bit 6 and 8 are used to discriminate between different comment styles.
  51   Languages such as C++ allow two orthogonal syntax start/end pairs
  52   and bit 6 is used to determine whether a comment-end or Scommentend
  53   ends style a or b.  Comment markers can start style a, b, c, or bc.
  54   Style a is always the default.
  55   For 2-char comment markers, the style b flag is only looked up on the second
  56   char of the comment marker and on the first char of the comment ender.
  57   For style c (like to for the nested flag), the flag can be placed on any
  58   one of the chars.
  59   */
  60
  61 /* These macros extract specific flags from an integer
  62    that holds the syntax code and the flags.  */
  63
  64 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
  65
  66 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
  67
  68 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
  69
  70 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
  71
  72 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
  73
  74 #define SYNTAX_FLAGS_COMMENT_STYLEB(flags) (((flags) >> 21) & 1)
  75 #define SYNTAX_FLAGS_COMMENT_STYLEC(flags) (((flags) >> 22) & 2)
  76 /* FLAGS should be the flags of the main char of the comment marker, e.g.
  77    the second for comstart and the first for comend.  */
  78 #define SYNTAX_FLAGS_COMMENT_STYLE(flags, other_flags) \
  79   (SYNTAX_FLAGS_COMMENT_STYLEB (flags) \
  80    | SYNTAX_FLAGS_COMMENT_STYLEC (flags) \
  81    | SYNTAX_FLAGS_COMMENT_STYLEC (other_flags))
  82
  83 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
  84
  85 /* These macros extract a particular flag for a given character.  */
  86
  87 #define SYNTAX_COMEND_FIRST(c) \
  88   (SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c)))
  89 #define SYNTAX_PREFIX(c) (SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c)))
  90
  91 /* We use these constants in place for comment-style and
  92    string-ender-char to distinguish  comments/strings started by
  93    comment_fence and string_fence codes.  */
  94
  95 #define ST_COMMENT_STYLE (256 + 1)
  96 #define ST_STRING_STYLE (256 + 2)
  97
  98 static Lisp_Object Qsyntax_table_p;
  99 static Lisp_Object Qsyntax_table, Qscan_error;
 100
 101 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
 102    if not compiled with GCC.  No need to mark it, since it is used
 103    only very temporarily.  */
 104 Lisp_Object syntax_temp;
 105
 106 /* This is the internal form of the parse state used in parse-partial-sexp.  */
 107
 108 struct lisp_parse_state
 109   {
 110     int depth;     /* Depth at end of parsing.  */
 111     int instring;  /* -1 if not within string, else desired terminator.  */
 112     int incomment; /* -1 if in unnestable comment else comment nesting */
 113     int comstyle;  /* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
 114     int quoted;    /* Nonzero if just after an escape char at end of parsing */
 115     int mindepth;  /* Minimum depth seen while scanning.  */
 116     /* Char number of most recent start-of-expression at current level */
 117     EMACS_INT thislevelstart;
 118     /* Char number of start of containing expression */
 119     EMACS_INT prevlevelstart;
 120     EMACS_INT location;      /* Char number at which parsing stopped.  */
 121     EMACS_INT comstr_start;  /* Position of last comment/string starter.  */
 122     Lisp_Object levelstarts; /* Char numbers of starts-of-expression
 123                                 of levels (starting from outermost).  */
 124   };
 125 \f
 126 /* These variables are a cache for finding the start of a defun.
 127    find_start_pos is the place for which the defun start was found.
 128    find_start_value is the defun start position found for it.
 129    find_start_value_byte is the corresponding byte position.
 130    find_start_buffer is the buffer it was found in.
 131    find_start_begv is the BEGV value when it was found.
 132    find_start_modiff is the value of MODIFF when it was found.  */
 133
 134 static EMACS_INT find_start_pos;
 135 static EMACS_INT find_start_value;
 136 static EMACS_INT find_start_value_byte;
 137 static struct buffer *find_start_buffer;
 138 static EMACS_INT find_start_begv;
 139 static int find_start_modiff;
 140
 141
 142 INFUN (Fsyntax_table_p, 1);
 143 static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
 144 static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
 145 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
 146 static void scan_sexps_forward (struct lisp_parse_state *,
 147                                 EMACS_INT, EMACS_INT, EMACS_INT, int,
 148                                 int, Lisp_Object, int);
 149 static int in_classes (int, Lisp_Object);
 150 \f
 151 /* Whether the syntax of the character C has the prefix flag set.  */
 152 int syntax_prefix_flag_p (int c)
 153 {
 154   return SYNTAX_PREFIX (c);
 155 }
 156
 157 struct gl_state_s gl_state;             /* Global state of syntax parser.  */
 158
 159 #define INTERVALS_AT_ONCE 10            /* 1 + max-number of intervals
 160                                            to scan to property-change.  */
 161
 162 /* Update gl_state to an appropriate interval which contains CHARPOS.  The
 163    sign of COUNT give the relative position of CHARPOS wrt the previously
 164    valid interval.  If INIT, only [be]_property fields of gl_state are
 165    valid at start, the rest is filled basing on OBJECT.
 166
 167    `gl_state.*_i' are the intervals, and CHARPOS is further in the search
 168    direction than the intervals - or in an interval.  We update the
 169    current syntax-table basing on the property of this interval, and
 170    update the interval to start further than CHARPOS - or be
 171    NULL_INTERVAL.  We also update lim_property to be the next value of
 172    charpos to call this subroutine again - or be before/after the
 173    start/end of OBJECT.  */
 174
 175 void
 176 update_syntax_table (EMACS_INT charpos, int count, int init,
 177                      Lisp_Object object)
 178 {
 179   Lisp_Object tmp_table;
 180   unsigned cnt = 0;
 181   int invalidate = 1;
 182   INTERVAL i;
 183
 184   if (init)
 185     {
 186       gl_state.old_prop = Qnil;
 187       gl_state.start = gl_state.b_property;
 188       gl_state.stop = gl_state.e_property;
 189       i = interval_of (charpos, object);
 190       gl_state.backward_i = gl_state.forward_i = i;
 191       invalidate = 0;
 192       if (NULL_INTERVAL_P (i))
 193         return;
 194       /* interval_of updates only ->position of the return value, so
 195          update the parents manually to speed up update_interval.  */
 196       while (!NULL_PARENT (i))
 197         {
 198           if (AM_RIGHT_CHILD (i))
 199             INTERVAL_PARENT (i)->position = i->position
 200               - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
 201               - TOTAL_LENGTH (INTERVAL_PARENT (i))
 202               + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
 203           else
 204             INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
 205               + TOTAL_LENGTH (i);
 206           i = INTERVAL_PARENT (i);
 207         }
 208       i = gl_state.forward_i;
 209       gl_state.b_property = i->position - gl_state.offset;
 210       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 211       goto update;
 212     }
 213   i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
 214
 215   /* We are guaranteed to be called with CHARPOS either in i,
 216      or further off.  */
 217   if (NULL_INTERVAL_P (i))
 218     error ("Error in syntax_table logic for to-the-end intervals");
 219   else if (charpos < i->position)               /* Move left.  */
 220     {
 221       if (count > 0)
 222         error ("Error in syntax_table logic for intervals <-");
 223       /* Update the interval.  */
 224       i = update_interval (i, charpos);
 225       if (INTERVAL_LAST_POS (i) != gl_state.b_property)
 226         {
 227           invalidate = 0;
 228           gl_state.forward_i = i;
 229           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 230         }
 231     }
 232   else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
 233     {
 234       if (count < 0)
 235         error ("Error in syntax_table logic for intervals ->");
 236       /* Update the interval.  */
 237       i = update_interval (i, charpos);
 238       if (i->position != gl_state.e_property)
 239         {
 240           invalidate = 0;
 241           gl_state.backward_i = i;
 242           gl_state.b_property = i->position - gl_state.offset;
 243         }
 244     }
 245
 246   update:
 247   tmp_table = textget (i->plist, Qsyntax_table);
 248
 249   if (invalidate)
 250     invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
 251
 252   if (invalidate)               /* Did not get to adjacent interval.  */
 253     {                           /* with the same table => */
 254                                 /* invalidate the old range.  */
 255       if (count > 0)
 256         {
 257           gl_state.backward_i = i;
 258           gl_state.b_property = i->position - gl_state.offset;
 259         }
 260       else
 261         {
 262           gl_state.forward_i = i;
 263           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 264         }
 265     }
 266
 267   if (!EQ (tmp_table, gl_state.old_prop))
 268     {
 269       gl_state.current_syntax_table = tmp_table;
 270       gl_state.old_prop = tmp_table;
 271       if (EQ (Fsyntax_table_p (tmp_table), Qt))
 272         {
 273           gl_state.use_global = 0;
 274         }
 275       else if (CONSP (tmp_table))
 276         {
 277           gl_state.use_global = 1;
 278           gl_state.global_code = tmp_table;
 279         }
 280       else
 281         {
 282           gl_state.use_global = 0;
 283           gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
 284         }
 285     }
 286
 287   while (!NULL_INTERVAL_P (i))
 288     {
 289       if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
 290         {
 291           if (count > 0)
 292             {
 293               gl_state.e_property = i->position - gl_state.offset;
 294               gl_state.forward_i = i;
 295             }
 296           else
 297             {
 298               gl_state.b_property
 299                 = i->position + LENGTH (i) - gl_state.offset;
 300               gl_state.backward_i = i;
 301             }
 302           return;
 303         }
 304       else if (cnt == INTERVALS_AT_ONCE)
 305         {
 306           if (count > 0)
 307             {
 308               gl_state.e_property
 309                 = i->position + LENGTH (i) - gl_state.offset
 310                 /* e_property at EOB is not set to ZV but to ZV+1, so that
 311                    we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
 312                    having to check eob between the two.  */
 313                 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
 314               gl_state.forward_i = i;
 315             }
 316           else
 317             {
 318               gl_state.b_property = i->position - gl_state.offset;
 319               gl_state.backward_i = i;
 320             }
 321           return;
 322         }
 323       cnt++;
 324       i = count > 0 ? next_interval (i) : previous_interval (i);
 325     }
 326   eassert (NULL_INTERVAL_P (i)); /* This property goes to the end.  */
 327   if (count > 0)
 328     gl_state.e_property = gl_state.stop;
 329   else
 330     gl_state.b_property = gl_state.start;
 331 }
 332 \f
 333 /* Returns TRUE if char at CHARPOS is quoted.
 334    Global syntax-table data should be set up already to be good at CHARPOS
 335    or after.  On return global syntax data is good for lookup at CHARPOS. */
 336
 337 static int
 338 char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
 339 {
 340   register enum syntaxcode code;
 341   register EMACS_INT beg = BEGV;
 342   register int quoted = 0;
 343   EMACS_INT orig = charpos;
 344
 345   while (charpos > beg)
 346     {
 347       int c;
 348       DEC_BOTH (charpos, bytepos);
 349
 350       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
 351       c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
 352       code = SYNTAX (c);
 353       if (! (code == Scharquote || code == Sescape))
 354         break;
 355
 356       quoted = !quoted;
 357     }
 358
 359   UPDATE_SYNTAX_TABLE (orig);
 360   return quoted;
 361 }
 362
 363 /* Return the bytepos one character before BYTEPOS.
 364    We assume that BYTEPOS is not at the start of the buffer.  */
 365
 366 static INLINE EMACS_INT
 367 dec_bytepos (EMACS_INT bytepos)
 368 {
 369   if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
 370     return bytepos - 1;
 371
 372   DEC_POS (bytepos);
 373   return bytepos;
 374 }
 375 \f
 376 /* Return a defun-start position before POS and not too far before.
 377    It should be the last one before POS, or nearly the last.
 378
 379    When open_paren_in_column_0_is_defun_start is nonzero,
 380    only the beginning of the buffer is treated as a defun-start.
 381
 382    We record the information about where the scan started
 383    and what its result was, so that another call in the same area
 384    can return the same value very quickly.
 385
 386    There is no promise at which position the global syntax data is
 387    valid on return from the subroutine, so the caller should explicitly
 388    update the global data.  */
 389
 390 static EMACS_INT
 391 find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
 392 {
 393   EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
 394
 395   if (!open_paren_in_column_0_is_defun_start)
 396     {
 397       find_start_value_byte = BEGV_BYTE;
 398       return BEGV;
 399     }
 400
 401   /* Use previous finding, if it's valid and applies to this inquiry.  */
 402   if (current_buffer == find_start_buffer
 403       /* Reuse the defun-start even if POS is a little farther on.
 404          POS might be in the next defun, but that's ok.
 405          Our value may not be the best possible, but will still be usable.  */
 406       && pos <= find_start_pos + 1000
 407       && pos >= find_start_value
 408       && BEGV == find_start_begv
 409       && MODIFF == find_start_modiff)
 410     return find_start_value;
 411
 412   /* Back up to start of line.  */
 413   scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
 414
 415   /* We optimize syntax-table lookup for rare updates.  Thus we accept
 416      only those `^\s(' which are good in global _and_ text-property
 417      syntax-tables.  */
 418   SETUP_BUFFER_SYNTAX_TABLE ();
 419   while (PT > BEGV)
 420     {
 421       int c;
 422
 423       /* Open-paren at start of line means we may have found our
 424          defun-start.  */
 425       c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 426       if (SYNTAX (c) == Sopen)
 427         {
 428           SETUP_SYNTAX_TABLE (PT + 1, -1);      /* Try again... */
 429           c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 430           if (SYNTAX (c) == Sopen)
 431             break;
 432           /* Now fallback to the default value.  */
 433           SETUP_BUFFER_SYNTAX_TABLE ();
 434         }
 435       /* Move to beg of previous line.  */
 436       scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
 437     }
 438
 439   /* Record what we found, for the next try.  */
 440   find_start_value = PT;
 441   find_start_value_byte = PT_BYTE;
 442   find_start_buffer = current_buffer;
 443   find_start_modiff = MODIFF;
 444   find_start_begv = BEGV;
 445   find_start_pos = pos;
 446
 447   TEMP_SET_PT_BOTH (opoint, opoint_byte);
 448
 449   return find_start_value;
 450 }
 451 \f
 452 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
 453
 454 static int
 455 prev_char_comend_first (EMACS_INT pos, EMACS_INT pos_byte)
 456 {
 457   int c, val;
 458
 459   DEC_BOTH (pos, pos_byte);
 460   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 461   c = FETCH_CHAR (pos_byte);
 462   val = SYNTAX_COMEND_FIRST (c);
 463   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 464   return val;
 465 }
 466
 467 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE.  */
 468
 469 /* static int
 470  * prev_char_comstart_first (pos, pos_byte)
 471  *      int pos, pos_byte;
 472  * {
 473  *   int c, val;
 474  *
 475  *   DEC_BOTH (pos, pos_byte);
 476  *   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 477  *   c = FETCH_CHAR (pos_byte);
 478  *   val = SYNTAX_COMSTART_FIRST (c);
 479  *   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 480  *   return val;
 481  * } */
 482
 483 /* Checks whether charpos FROM is at the end of a comment.
 484    FROM_BYTE is the bytepos corresponding to FROM.
 485    Do not move back before STOP.
 486
 487    Return a positive value if we find a comment ending at FROM/FROM_BYTE;
 488    return -1 otherwise.
 489
 490    If successful, store the charpos of the comment's beginning
 491    into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
 492
 493    Global syntax data remains valid for backward search starting at
 494    the returned value (or at FROM, if the search was not successful).  */
 495
 496 static int
 497 back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
 498 {
 499   /* Look back, counting the parity of string-quotes,
 500      and recording the comment-starters seen.
 501      When we reach a safe place, assume that's not in a string;
 502      then step the main scan to the earliest comment-starter seen
 503      an even number of string quotes away from the safe place.
 504
 505      OFROM[I] is position of the earliest comment-starter seen
 506      which is I+2X quotes from the comment-end.
 507      PARITY is current parity of quotes from the comment end.  */
 508   int string_style = -1;        /* Presumed outside of any string. */
 509   int string_lossage = 0;
 510   /* Not a real lossage: indicates that we have passed a matching comment
 511      starter plus a non-matching comment-ender, meaning that any matching
 512      comment-starter we might see later could be a false positive (hidden
 513      inside another comment).
 514      Test case:  { a (* b } c (* d *) */
 515   int comment_lossage = 0;
 516   EMACS_INT comment_end = from;
 517   EMACS_INT comment_end_byte = from_byte;
 518   EMACS_INT comstart_pos = 0;
 519   EMACS_INT comstart_byte IF_LINT (= 0);
 520   /* Place where the containing defun starts,
 521      or 0 if we didn't come across it yet.  */
 522   EMACS_INT defun_start = 0;
 523   EMACS_INT defun_start_byte = 0;
 524   register enum syntaxcode code;
 525   int nesting = 1;              /* current comment nesting */
 526   int c;
 527   int syntax = 0;
 528
 529   /* FIXME: A }} comment-ender style leads to incorrect behavior
 530      in the case of {{ c }}} because we ignore the last two chars which are
 531      assumed to be comment-enders although they aren't.  */
 532
 533   /* At beginning of range to scan, we're outside of strings;
 534      that determines quote parity to the comment-end.  */
 535   while (from != stop)
 536     {
 537       EMACS_INT temp_byte;
 538       int prev_syntax, com2start, com2end;
 539       int comstart;
 540
 541       /* Move back and examine a character.  */
 542       DEC_BOTH (from, from_byte);
 543       UPDATE_SYNTAX_TABLE_BACKWARD (from);
 544
 545       prev_syntax = syntax;
 546       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 547       syntax = SYNTAX_WITH_FLAGS (c);
 548       code = SYNTAX (c);
 549
 550       /* Check for 2-char comment markers.  */
 551       com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
 552                    && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
 553                    && (comstyle
 554                        == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
 555                    && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
 556                        || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
 557       com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
 558                  && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
 559       comstart = (com2start || code == Scomment);
 560
 561       /* Nasty cases with overlapping 2-char comment markers:
 562          - snmp-mode: -- c -- foo -- c --
 563                       --- c --
 564                       ------ c --
 565          - c-mode:    *||*
 566                       |* *|* *|
 567                       |*| |* |*|
 568                       ///   */
 569
 570       /* If a 2-char comment sequence partly overlaps with another,
 571          we don't try to be clever.  E.g. |*| in C, or }% in modes that
 572          have %..\n and %{..}%.  */
 573       if (from > stop && (com2end || comstart))
 574         {
 575           EMACS_INT next = from, next_byte = from_byte;
 576           int next_c, next_syntax;
 577           DEC_BOTH (next, next_byte);
 578           UPDATE_SYNTAX_TABLE_BACKWARD (next);
 579           next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
 580           next_syntax = SYNTAX_WITH_FLAGS (next_c);
 581           if (((comstart || comnested)
 582                && SYNTAX_FLAGS_COMEND_SECOND (syntax)
 583                && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
 584               || ((com2end || comnested)
 585                   && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
 586                   && (comstyle
 587                       == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
 588                   && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
 589             goto lossage;
 590           /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
 591         }
 592
 593       if (com2start && comstart_pos == 0)
 594         /* We're looking at a comment starter.  But it might be a comment
 595            ender as well (see snmp-mode).  The first time we see one, we
 596            need to consider it as a comment starter,
 597            and the subsequent times as a comment ender.  */
 598         com2end = 0;
 599
 600       /* Turn a 2-char comment sequences into the appropriate syntax.  */
 601       if (com2end)
 602         code = Sendcomment;
 603       else if (com2start)
 604         code = Scomment;
 605       /* Ignore comment starters of a different style.  */
 606       else if (code == Scomment
 607                && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
 608                    || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
 609         continue;
 610
 611       /* Ignore escaped characters, except comment-enders.  */
 612       if (code != Sendcomment && char_quoted (from, from_byte))
 613         continue;
 614
 615       switch (code)
 616         {
 617         case Sstring_fence:
 618         case Scomment_fence:
 619           c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
 620         case Sstring:
 621           /* Track parity of quotes.  */
 622           if (string_style == -1)
 623             /* Entering a string.  */
 624             string_style = c;
 625           else if (string_style == c)
 626             /* Leaving the string.  */
 627             string_style = -1;
 628           else
 629             /* If we have two kinds of string delimiters.
 630                There's no way to grok this scanning backwards.  */
 631             string_lossage = 1;
 632           break;
 633
 634         case Scomment:
 635           /* We've already checked that it is the relevant comstyle.  */
 636           if (string_style != -1 || comment_lossage || string_lossage)
 637             /* There are odd string quotes involved, so let's be careful.
 638                Test case in Pascal: " { " a { " } */
 639             goto lossage;
 640
 641           if (!comnested)
 642             {
 643               /* Record best comment-starter so far.  */
 644               comstart_pos = from;
 645               comstart_byte = from_byte;
 646             }
 647           else if (--nesting <= 0)
 648             /* nested comments have to be balanced, so we don't need to
 649                keep looking for earlier ones.  We use here the same (slightly
 650                incorrect) reasoning as below:  since it is followed by uniform
 651                paired string quotes, this comment-start has to be outside of
 652                strings, else the comment-end itself would be inside a string. */
 653             goto done;
 654           break;
 655
 656         case Sendcomment:
 657           if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
 658               && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
 659                   || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
 660             /* This is the same style of comment ender as ours. */
 661             {
 662               if (comnested)
 663                 nesting++;
 664               else
 665                 /* Anything before that can't count because it would match
 666                    this comment-ender rather than ours.  */
 667                 from = stop;    /* Break out of the loop.  */
 668             }
 669           else if (comstart_pos != 0 || c != '\n')
 670             /* We're mixing comment styles here, so we'd better be careful.
 671                The (comstart_pos != 0 || c != '\n') check is not quite correct
 672                (we should just always set comment_lossage), but removing it
 673                would imply that any multiline comment in C would go through
 674                lossage, which seems overkill.
 675                The failure should only happen in the rare cases such as
 676                  { (* } *)   */
 677             comment_lossage = 1;
 678           break;
 679
 680         case Sopen:
 681           /* Assume a defun-start point is outside of strings.  */
 682           if (open_paren_in_column_0_is_defun_start
 683               && (from == stop
 684                   || (temp_byte = dec_bytepos (from_byte),
 685                       FETCH_CHAR (temp_byte) == '\n')))
 686             {
 687               defun_start = from;
 688               defun_start_byte = from_byte;
 689               from = stop;      /* Break out of the loop.  */
 690             }
 691           break;
 692
 693         default:
 694           break;
 695         }
 696     }
 697
 698   if (comstart_pos == 0)
 699     {
 700       from = comment_end;
 701       from_byte = comment_end_byte;
 702       UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
 703     }
 704   /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
 705      or `done'), then we've found the beginning of the non-nested comment.  */
 706   else if (1)   /* !comnested */
 707     {
 708       from = comstart_pos;
 709       from_byte = comstart_byte;
 710       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 711     }
 712   else
 713     {
 714       struct lisp_parse_state state;
 715     lossage:
 716       /* We had two kinds of string delimiters mixed up
 717          together.  Decode this going forwards.
 718          Scan fwd from a known safe place (beginning-of-defun)
 719          to the one in question; this records where we
 720          last passed a comment starter.  */
 721       /* If we did not already find the defun start, find it now.  */
 722       if (defun_start == 0)
 723         {
 724           defun_start = find_defun_start (comment_end, comment_end_byte);
 725           defun_start_byte = find_start_value_byte;
 726         }
 727       do
 728         {
 729           scan_sexps_forward (&state,
 730                               defun_start, defun_start_byte,
 731                               comment_end, -10000, 0, Qnil, 0);
 732           defun_start = comment_end;
 733           if (state.incomment == (comnested ? 1 : -1)
 734               && state.comstyle == comstyle)
 735             from = state.comstr_start;
 736           else
 737             {
 738               from = comment_end;
 739               if (state.incomment)
 740                 /* If comment_end is inside some other comment, maybe ours
 741                    is nested, so we need to try again from within the
 742                    surrounding comment.  Example: { a (* " *)  */
 743                 {
 744                   /* FIXME: We should advance by one or two chars. */
 745                   defun_start = state.comstr_start + 2;
 746                   defun_start_byte = CHAR_TO_BYTE (defun_start);
 747                 }
 748             }
 749         } while (defun_start < comment_end);
 750
 751       from_byte = CHAR_TO_BYTE (from);
 752       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 753     }
 754
 755  done:
 756   *charpos_ptr = from;
 757   *bytepos_ptr = from_byte;
 758
 759   return (from == comment_end) ? -1 : from;
 760 }
 761 \f
 762 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
 763        doc: /* Return t if OBJECT is a syntax table.
 764 Currently, any char-table counts as a syntax table.  */)
 765   (Lisp_Object object)
 766 {
 767   if (CHAR_TABLE_P (object)
 768       && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
 769     return Qt;
 770   return Qnil;
 771 }
 772
 773 static void
 774 check_syntax_table (Lisp_Object obj)
 775 {
 776   CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
 777               Qsyntax_table_p, obj);
 778 }
 779
 780 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
 781        doc: /* Return the current syntax table.
 782 This is the one specified by the current buffer.  */)
 783   (void)
 784 {
 785   return BVAR (current_buffer, syntax_table);
 786 }
 787
 788 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
 789    Sstandard_syntax_table, 0, 0, 0,
 790        doc: /* Return the standard syntax table.
 791 This is the one used for new buffers.  */)
 792   (void)
 793 {
 794   return Vstandard_syntax_table;
 795 }
 796
 797 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
 798        doc: /* Construct a new syntax table and return it.
 799 It is a copy of the TABLE, which defaults to the standard syntax table.  */)
 800   (Lisp_Object table)
 801 {
 802   Lisp_Object copy;
 803
 804   if (!NILP (table))
 805     check_syntax_table (table);
 806   else
 807     table = Vstandard_syntax_table;
 808
 809   copy = Fcopy_sequence (table);
 810
 811   /* Only the standard syntax table should have a default element.
 812      Other syntax tables should inherit from parents instead.  */
 813   XCHAR_TABLE (copy)->defalt = Qnil;
 814
 815   /* Copied syntax tables should all have parents.
 816      If we copied one with no parent, such as the standard syntax table,
 817      use the standard syntax table as the copy's parent.  */
 818   if (NILP (XCHAR_TABLE (copy)->parent))
 819     Fset_char_table_parent (copy, Vstandard_syntax_table);
 820   return copy;
 821 }
 822
 823 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
 824        doc: /* Select a new syntax table for the current buffer.
 825 One argument, a syntax table.  */)
 826   (Lisp_Object table)
 827 {
 828   int idx;
 829   check_syntax_table (table);
 830   BVAR (current_buffer, syntax_table) = table;
 831   /* Indicate that this buffer now has a specified syntax table.  */
 832   idx = PER_BUFFER_VAR_IDX (syntax_table);
 833   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
 834   return table;
 835 }
 836 \f
 837 /* Convert a letter which signifies a syntax code
 838  into the code it signifies.
 839  This is used by modify-syntax-entry, and other things.  */
 840
 841 unsigned char syntax_spec_code[0400] =
 842   { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 843     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 844     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 845     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 846     (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
 847         (char) Smath, 0377, 0377, (char) Squote,
 848     (char) Sopen, (char) Sclose, 0377, 0377,
 849         0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
 850     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 851     0377, 0377, 0377, 0377,
 852         (char) Scomment, 0377, (char) Sendcomment, 0377,
 853     (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
 854     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 855     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 856     0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
 857     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
 858     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 859     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 860     0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
 861   };
 862
 863 /* Indexed by syntax code, give the letter that describes it.  */
 864
 865 char syntax_code_spec[16] =
 866   {
 867     ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
 868     '!', '|'
 869   };
 870
 871 /* Indexed by syntax code, give the object (cons of syntax code and
 872    nil) to be stored in syntax table.  Since these objects can be
 873    shared among syntax tables, we generate them in advance.  By
 874    sharing objects, the function `describe-syntax' can give a more
 875    compact listing.  */
 876 static Lisp_Object Vsyntax_code_object;
 877
 878 \f
 879 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
 880        doc: /* Return the syntax code of CHARACTER, described by a character.
 881 For example, if CHARACTER is a word constituent, the
 882 character `w' (119) is returned.
 883 The characters that correspond to various syntax codes
 884 are listed in the documentation of `modify-syntax-entry'.  */)
 885   (Lisp_Object character)
 886 {
 887   int char_int;
 888   CHECK_CHARACTER (character);
 889   char_int = XINT (character);
 890   SETUP_BUFFER_SYNTAX_TABLE ();
 891   return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
 892 }
 893
 894 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
 895        doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
 896   (Lisp_Object character)
 897 {
 898   int char_int, code;
 899   CHECK_NUMBER (character);
 900   char_int = XINT (character);
 901   SETUP_BUFFER_SYNTAX_TABLE ();
 902   code = SYNTAX (char_int);
 903   if (code == Sopen || code == Sclose)
 904     return SYNTAX_MATCH (char_int);
 905   return Qnil;
 906 }
 907
 908 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
 909        doc: /* Convert a syntax specification STRING into syntax cell form.
 910 STRING should be a string as it is allowed as argument of
 911 `modify-syntax-entry'.  Value is the equivalent cons cell
 912 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
 913 text property.  */)
 914   (Lisp_Object string)
 915 {
 916   register const unsigned char *p;
 917   register enum syntaxcode code;
 918   int val;
 919   Lisp_Object match;
 920
 921   CHECK_STRING (string);
 922
 923   p = SDATA (string);
 924   code = (enum syntaxcode) syntax_spec_code[*p++];
 925   if (((int) code & 0377) == 0377)
 926     error ("Invalid syntax description letter: %c", p[-1]);
 927
 928   if (code == Sinherit)
 929     return Qnil;
 930
 931   if (*p)
 932     {
 933       int len;
 934       int character = STRING_CHAR_AND_LENGTH (p, len);
 935       XSETINT (match, character);
 936       if (XFASTINT (match) == ' ')
 937         match = Qnil;
 938       p += len;
 939     }
 940   else
 941     match = Qnil;
 942
 943   val = (int) code;
 944   while (*p)
 945     switch (*p++)
 946       {
 947       case '1':
 948         val |= 1 << 16;
 949         break;
 950
 951       case '2':
 952         val |= 1 << 17;
 953         break;
 954
 955       case '3':
 956         val |= 1 << 18;
 957         break;
 958
 959       case '4':
 960         val |= 1 << 19;
 961         break;
 962
 963       case 'p':
 964         val |= 1 << 20;
 965         break;
 966
 967       case 'b':
 968         val |= 1 << 21;
 969         break;
 970
 971       case 'n':
 972         val |= 1 << 22;
 973         break;
 974
 975       case 'c':
 976         val |= 1 << 23;
 977         break;
 978       }
 979
 980   if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
 981     return XVECTOR (Vsyntax_code_object)->contents[val];
 982   else
 983     /* Since we can't use a shared object, let's make a new one.  */
 984     return Fcons (make_number (val), match);
 985 }
 986
 987 /* I really don't know why this is interactive
 988    help-form should at least be made useful whilst reading the second arg.  */
 989 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
 990   "cSet syntax for character: \nsSet syntax for %s to: ",
 991        doc: /* Set syntax for character CHAR according to string NEWENTRY.
 992 The syntax is changed only for table SYNTAX-TABLE, which defaults to
 993  the current buffer's syntax table.
 994 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
 995 in the range MIN to MAX are changed.
 996 The first character of NEWENTRY should be one of the following:
 997   Space or -  whitespace syntax.    w   word constituent.
 998   _           symbol constituent.   .   punctuation.
 999   (           open-parenthesis.     )   close-parenthesis.
1000   "           string quote.         \\   escape.
1001   $           paired delimiter.     '   expression quote or prefix operator.
1002   <           comment starter.      >   comment ender.
1003   /           character-quote.      @   inherit from `standard-syntax-table'.
1004   |           generic string fence. !   generic comment fence.
1005
1006 Only single-character comment start and end sequences are represented thus.
1007 Two-character sequences are represented as described below.
1008 The second character of NEWENTRY is the matching parenthesis,
1009  used only if the first character is `(' or `)'.
1010 Any additional characters are flags.
1011 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1012  1 means CHAR is the start of a two-char comment start sequence.
1013  2 means CHAR is the second character of such a sequence.
1014  3 means CHAR is the start of a two-char comment end sequence.
1015  4 means CHAR is the second character of such a sequence.
1016
1017 There can be several orthogonal comment sequences.  This is to support
1018 language modes such as C++.  By default, all comment sequences are of style
1019 a, but you can set the comment sequence style to b (on the second character
1020 of a comment-start, and the first character of a comment-end sequence) and/or
1021 c (on any of its chars) using this flag:
1022  b means CHAR is part of comment sequence b.
1023  c means CHAR is part of comment sequence c.
1024  n means CHAR is part of a nestable comment sequence.
1025
1026  p means CHAR is a prefix character for `backward-prefix-chars';
1027    such characters are treated as whitespace when they occur
1028    between expressions.
1029 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
1030   (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1031 {
1032   if (CONSP (c))
1033     {
1034       CHECK_CHARACTER_CAR (c);
1035       CHECK_CHARACTER_CDR (c);
1036     }
1037   else
1038     CHECK_CHARACTER (c);
1039
1040   if (NILP (syntax_table))
1041     syntax_table = BVAR (current_buffer, syntax_table);
1042   else
1043     check_syntax_table (syntax_table);
1044
1045   newentry = Fstring_to_syntax (newentry);
1046   if (CONSP (c))
1047     SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1048   else
1049     SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1050
1051   /* We clear the regexp cache, since character classes can now have
1052      different values from those in the compiled regexps.*/
1053   clear_regexp_cache ();
1054
1055   return Qnil;
1056 }
1057 \f
1058 /* Dump syntax table to buffer in human-readable format */
1059
1060 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1061        Sinternal_describe_syntax_value, 1, 1, 0,
1062        doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
1063   (Lisp_Object syntax)
1064 {
1065   register enum syntaxcode code;
1066   int syntax_code;
1067   char desc, start1, start2, end1, end2, prefix,
1068     comstyleb, comstylec, comnested;
1069   char str[2];
1070   Lisp_Object first, match_lisp, value = syntax;
1071
1072   if (NILP (value))
1073     {
1074       insert_string ("default");
1075       return syntax;
1076     }
1077
1078   if (CHAR_TABLE_P (value))
1079     {
1080       insert_string ("deeper char-table ...");
1081       return syntax;
1082     }
1083
1084   if (!CONSP (value))
1085     {
1086       insert_string ("invalid");
1087       return syntax;
1088     }
1089
1090   first = XCAR (value);
1091   match_lisp = XCDR (value);
1092
1093   if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1094     {
1095       insert_string ("invalid");
1096       return syntax;
1097     }
1098
1099   syntax_code = XINT (first);
1100   code = (enum syntaxcode) (syntax_code & 0377);
1101   start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1102   start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1103   end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1104   end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1105   prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1106   comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1107   comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1108   comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1109
1110   if ((int) code < 0 || (int) code >= (int) Smax)
1111     {
1112       insert_string ("invalid");
1113       return syntax;
1114     }
1115   desc = syntax_code_spec[(int) code];
1116
1117   str[0] = desc, str[1] = 0;
1118   insert (str, 1);
1119
1120   if (NILP (match_lisp))
1121     insert (" ", 1);
1122   else
1123     insert_char (XINT (match_lisp));
1124
1125   if (start1)
1126     insert ("1", 1);
1127   if (start2)
1128     insert ("2", 1);
1129
1130   if (end1)
1131     insert ("3", 1);
1132   if (end2)
1133     insert ("4", 1);
1134
1135   if (prefix)
1136     insert ("p", 1);
1137   if (comstyleb)
1138     insert ("b", 1);
1139   if (comstylec)
1140     insert ("c", 1);
1141   if (comnested)
1142     insert ("n", 1);
1143
1144   insert_string ("\twhich means: ");
1145
1146   switch (SWITCH_ENUM_CAST (code))
1147     {
1148     case Swhitespace:
1149       insert_string ("whitespace"); break;
1150     case Spunct:
1151       insert_string ("punctuation"); break;
1152     case Sword:
1153       insert_string ("word"); break;
1154     case Ssymbol:
1155       insert_string ("symbol"); break;
1156     case Sopen:
1157       insert_string ("open"); break;
1158     case Sclose:
1159       insert_string ("close"); break;
1160     case Squote:
1161       insert_string ("prefix"); break;
1162     case Sstring:
1163       insert_string ("string"); break;
1164     case Smath:
1165       insert_string ("math"); break;
1166     case Sescape:
1167       insert_string ("escape"); break;
1168     case Scharquote:
1169       insert_string ("charquote"); break;
1170     case Scomment:
1171       insert_string ("comment"); break;
1172     case Sendcomment:
1173       insert_string ("endcomment"); break;
1174     case Sinherit:
1175       insert_string ("inherit"); break;
1176     case Scomment_fence:
1177       insert_string ("comment fence"); break;
1178     case Sstring_fence:
1179       insert_string ("string fence"); break;
1180     default:
1181       insert_string ("invalid");
1182       return syntax;
1183     }
1184
1185   if (!NILP (match_lisp))
1186     {
1187       insert_string (", matches ");
1188       insert_char (XINT (match_lisp));
1189     }
1190
1191   if (start1)
1192     insert_string (",\n\t  is the first character of a comment-start sequence");
1193   if (start2)
1194     insert_string (",\n\t  is the second character of a comment-start sequence");
1195
1196   if (end1)
1197     insert_string (",\n\t  is the first character of a comment-end sequence");
1198   if (end2)
1199     insert_string (",\n\t  is the second character of a comment-end sequence");
1200   if (comstyleb)
1201     insert_string (" (comment style b)");
1202   if (comstylec)
1203     insert_string (" (comment style c)");
1204   if (comnested)
1205     insert_string (" (nestable)");
1206
1207   if (prefix)
1208     insert_string (",\n\t  is a prefix character for `backward-prefix-chars'");
1209
1210   return syntax;
1211 }
1212 \f
1213 /* Return the position across COUNT words from FROM.
1214    If that many words cannot be found before the end of the buffer, return 0.
1215    COUNT negative means scan backward and stop at word beginning.  */
1216
1217 EMACS_INT
1218 scan_words (register EMACS_INT from, register EMACS_INT count)
1219 {
1220   register EMACS_INT beg = BEGV;
1221   register EMACS_INT end = ZV;
1222   register EMACS_INT from_byte = CHAR_TO_BYTE (from);
1223   register enum syntaxcode code;
1224   int ch0, ch1;
1225   Lisp_Object func, pos;
1226
1227   immediate_quit = 1;
1228   QUIT;
1229
1230   SETUP_SYNTAX_TABLE (from, count);
1231
1232   while (count > 0)
1233     {
1234       while (1)
1235         {
1236           if (from == end)
1237             {
1238               immediate_quit = 0;
1239               return 0;
1240             }
1241           UPDATE_SYNTAX_TABLE_FORWARD (from);
1242           ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1243           code = SYNTAX (ch0);
1244           INC_BOTH (from, from_byte);
1245           if (words_include_escapes
1246               && (code == Sescape || code == Scharquote))
1247             break;
1248           if (code == Sword)
1249             break;
1250         }
1251       /* Now CH0 is a character which begins a word and FROM is the
1252          position of the next character.  */
1253       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1254       if (! NILP (Ffboundp (func)))
1255         {
1256           pos = call2 (func, make_number (from - 1), make_number (end));
1257           if (INTEGERP (pos) && XINT (pos) > from)
1258             {
1259               from = XINT (pos);
1260               from_byte = CHAR_TO_BYTE (from);
1261             }
1262         }
1263       else
1264         {
1265           while (1)
1266             {
1267               if (from == end) break;
1268               UPDATE_SYNTAX_TABLE_FORWARD (from);
1269               ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1270               code = SYNTAX (ch1);
1271               if ((code != Sword
1272                    && (! words_include_escapes
1273                        || (code != Sescape && code != Scharquote)))
1274                   || word_boundary_p (ch0, ch1))
1275                 break;
1276               INC_BOTH (from, from_byte);
1277               ch0 = ch1;
1278             }
1279         }
1280       count--;
1281     }
1282   while (count < 0)
1283     {
1284       while (1)
1285         {
1286           if (from == beg)
1287             {
1288               immediate_quit = 0;
1289               return 0;
1290             }
1291           DEC_BOTH (from, from_byte);
1292           UPDATE_SYNTAX_TABLE_BACKWARD (from);
1293           ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1294           code = SYNTAX (ch1);
1295           if (words_include_escapes
1296               && (code == Sescape || code == Scharquote))
1297             break;
1298           if (code == Sword)
1299             break;
1300         }
1301       /* Now CH1 is a character which ends a word and FROM is the
1302          position of it.  */
1303       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1304       if (! NILP (Ffboundp (func)))
1305         {
1306           pos = call2 (func, make_number (from), make_number (beg));
1307           if (INTEGERP (pos) && XINT (pos) < from)
1308             {
1309               from = XINT (pos);
1310               from_byte = CHAR_TO_BYTE (from);
1311             }
1312         }
1313       else
1314         {
1315           while (1)
1316             {
1317               if (from == beg)
1318                 break;
1319               DEC_BOTH (from, from_byte);
1320               UPDATE_SYNTAX_TABLE_BACKWARD (from);
1321               ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1322               code = SYNTAX (ch0);
1323               if ((code != Sword
1324                    && (! words_include_escapes
1325                        || (code != Sescape && code != Scharquote)))
1326                   || word_boundary_p (ch0, ch1))
1327                 {
1328                   INC_BOTH (from, from_byte);
1329                   break;
1330                 }
1331               ch1 = ch0;
1332             }
1333         }
1334       count++;
1335     }
1336
1337   immediate_quit = 0;
1338
1339   return from;
1340 }
1341
1342 DEFUE ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1343        doc: /* Move point forward ARG words (backward if ARG is negative).
1344 Normally returns t.
1345 If an edge of the buffer or a field boundary is reached, point is left there
1346 and the function returns nil.  Field boundaries are not noticed if
1347 `inhibit-field-text-motion' is non-nil.  */)
1348   (Lisp_Object arg)
1349 {
1350   Lisp_Object tmp;
1351   int orig_val, val;
1352
1353   if (NILP (arg))
1354     XSETFASTINT (arg, 1);
1355   else
1356     CHECK_NUMBER (arg);
1357
1358   val = orig_val = scan_words (PT, XINT (arg));
1359   if (! orig_val)
1360     val = XINT (arg) > 0 ? ZV : BEGV;
1361
1362   /* Avoid jumping out of an input field.  */
1363   tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1364                              Qt, Qnil, Qnil);
1365   val = XFASTINT (tmp);
1366
1367   SET_PT (val);
1368   return val == orig_val ? Qt : Qnil;
1369 }
1370 \f
1371 DEFUE ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1372        doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1373 STRING is like the inside of a `[...]' in a regular expression
1374 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1375  (but not at the end of a range; quoting is never needed there).
1376 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1377 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1378 Char classes, e.g. `[:alpha:]', are supported.
1379
1380 Returns the distance traveled, either zero or positive.  */)
1381   (Lisp_Object string, Lisp_Object lim)
1382 {
1383   return skip_chars (1, string, lim, 1);
1384 }
1385
1386 DEFUE ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1387        doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1388 See `skip-chars-forward' for details.
1389 Returns the distance traveled, either zero or negative.  */)
1390   (Lisp_Object string, Lisp_Object lim)
1391 {
1392   return skip_chars (0, string, lim, 1);
1393 }
1394
1395 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1396        doc: /* Move point forward across chars in specified syntax classes.
1397 SYNTAX is a string of syntax code characters.
1398 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1399 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1400 This function returns the distance traveled, either zero or positive.  */)
1401   (Lisp_Object syntax, Lisp_Object lim)
1402 {
1403   return skip_syntaxes (1, syntax, lim);
1404 }
1405
1406 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1407        doc: /* Move point backward across chars in specified syntax classes.
1408 SYNTAX is a string of syntax code characters.
1409 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1410 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1411 This function returns the distance traveled, either zero or negative.  */)
1412   (Lisp_Object syntax, Lisp_Object lim)
1413 {
1414   return skip_syntaxes (0, syntax, lim);
1415 }
1416
1417 static Lisp_Object
1418 skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
1419 {
1420   register unsigned int c;
1421   unsigned char fastmap[0400];
1422   /* Store the ranges of non-ASCII characters.  */
1423   int *char_ranges IF_LINT (= NULL);
1424   int n_char_ranges = 0;
1425   int negate = 0;
1426   register EMACS_INT i, i_byte;
1427   /* Set to 1 if the current buffer is multibyte and the region
1428      contains non-ASCII chars.  */
1429   int multibyte;
1430   /* Set to 1 if STRING is multibyte and it contains non-ASCII
1431      chars.  */
1432   int string_multibyte;
1433   EMACS_INT size_byte;
1434   const unsigned char *str;
1435   int len;
1436   Lisp_Object iso_classes;
1437
1438   CHECK_STRING (string);
1439   iso_classes = Qnil;
1440
1441   if (NILP (lim))
1442     XSETINT (lim, forwardp ? ZV : BEGV);
1443   else
1444     CHECK_NUMBER_COERCE_MARKER (lim);
1445
1446   /* In any case, don't allow scan outside bounds of buffer.  */
1447   if (XINT (lim) > ZV)
1448     XSETFASTINT (lim, ZV);
1449   if (XINT (lim) < BEGV)
1450     XSETFASTINT (lim, BEGV);
1451
1452   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1453                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1454   string_multibyte = SBYTES (string) > SCHARS (string);
1455
1456   memset (fastmap, 0, sizeof fastmap);
1457
1458   str = SDATA (string);
1459   size_byte = SBYTES (string);
1460
1461   i_byte = 0;
1462   if (i_byte < size_byte
1463       && SREF (string, 0) == '^')
1464     {
1465       negate = 1; i_byte++;
1466     }
1467
1468   /* Find the characters specified and set their elements of fastmap.
1469      Handle backslashes and ranges specially.
1470
1471      If STRING contains non-ASCII characters, setup char_ranges for
1472      them and use fastmap only for their leading codes.  */
1473
1474   if (! string_multibyte)
1475     {
1476       int string_has_eight_bit = 0;
1477
1478       /* At first setup fastmap.  */
1479       while (i_byte < size_byte)
1480         {
1481           c = str[i_byte++];
1482
1483           if (handle_iso_classes && c == '['
1484               && i_byte < size_byte
1485               && str[i_byte] == ':')
1486             {
1487               const unsigned char *class_beg = str + i_byte + 1;
1488               const unsigned char *class_end = class_beg;
1489               const unsigned char *class_limit = str + size_byte - 2;
1490               /* Leave room for the null.  */
1491               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1492               re_wctype_t cc;
1493
1494               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1495                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1496
1497               while (class_end < class_limit
1498                      && *class_end >= 'a' && *class_end <= 'z')
1499                 class_end++;
1500
1501               if (class_end == class_beg
1502                   || *class_end != ':' || class_end[1] != ']')
1503                 goto not_a_class_name;
1504
1505               memcpy (class_name, class_beg, class_end - class_beg);
1506               class_name[class_end - class_beg] = 0;
1507
1508               cc = re_wctype (class_name);
1509               if (cc == 0)
1510                 error ("Invalid ISO C character class");
1511
1512               iso_classes = Fcons (make_number (cc), iso_classes);
1513
1514               i_byte = class_end + 2 - str;
1515               continue;
1516             }
1517
1518         not_a_class_name:
1519           if (c == '\\')
1520             {
1521               if (i_byte == size_byte)
1522                 break;
1523
1524               c = str[i_byte++];
1525             }
1526           /* Treat `-' as range character only if another character
1527              follows.  */
1528           if (i_byte + 1 < size_byte
1529               && str[i_byte] == '-')
1530             {
1531               unsigned int c2;
1532
1533               /* Skip over the dash.  */
1534               i_byte++;
1535
1536               /* Get the end of the range.  */
1537               c2 = str[i_byte++];
1538               if (c2 == '\\'
1539                   && i_byte < size_byte)
1540                 c2 = str[i_byte++];
1541
1542               if (c <= c2)
1543                 {
1544                   unsigned lim2 = c2 + 1;
1545                   while (c < lim2)
1546                     fastmap[c++] = 1;
1547                   if (! ASCII_CHAR_P (c2))
1548                     string_has_eight_bit = 1;
1549                 }
1550             }
1551           else
1552             {
1553               fastmap[c] = 1;
1554               if (! ASCII_CHAR_P (c))
1555                 string_has_eight_bit = 1;
1556             }
1557         }
1558
1559       /* If the current range is multibyte and STRING contains
1560          eight-bit chars, arrange fastmap and setup char_ranges for
1561          the corresponding multibyte chars.  */
1562       if (multibyte && string_has_eight_bit)
1563         {
1564           unsigned char fastmap2[0400];
1565           int range_start_byte, range_start_char;
1566
1567           memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1568           memset (fastmap + 0200, 0, 0200);
1569           /* We are sure that this loop stops.  */
1570           for (i = 0200; ! fastmap2[i]; i++);
1571           c = BYTE8_TO_CHAR (i);
1572           fastmap[CHAR_LEADING_CODE (c)] = 1;
1573           range_start_byte = i;
1574           range_start_char = c;
1575           char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
1576           for (i = 129; i < 0400; i++)
1577             {
1578               c = BYTE8_TO_CHAR (i);
1579               fastmap[CHAR_LEADING_CODE (c)] = 1;
1580               if (i - range_start_byte != c - range_start_char)
1581                 {
1582                   char_ranges[n_char_ranges++] = range_start_char;
1583                   char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1584                                                   + range_start_char);
1585                   range_start_byte = i;
1586                   range_start_char = c;
1587                 }
1588             }
1589           char_ranges[n_char_ranges++] = range_start_char;
1590           char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1591                                           + range_start_char);
1592         }
1593     }
1594   else                          /* STRING is multibyte */
1595     {
1596       char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1597
1598       while (i_byte < size_byte)
1599         {
1600           unsigned char leading_code;
1601
1602           leading_code = str[i_byte];
1603           c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1604           i_byte += len;
1605
1606           if (handle_iso_classes && c == '['
1607               && i_byte < size_byte
1608               && STRING_CHAR (str + i_byte) == ':')
1609             {
1610               const unsigned char *class_beg = str + i_byte + 1;
1611               const unsigned char *class_end = class_beg;
1612               const unsigned char *class_limit = str + size_byte - 2;
1613               /* Leave room for the null.        */
1614               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1615               re_wctype_t cc;
1616
1617               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1618                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1619
1620               while (class_end < class_limit
1621                      && *class_end >= 'a' && *class_end <= 'z')
1622                 class_end++;
1623
1624               if (class_end == class_beg
1625                   || *class_end != ':' || class_end[1] != ']')
1626                 goto not_a_class_name_multibyte;
1627
1628               memcpy (class_name, class_beg, class_end - class_beg);
1629               class_name[class_end - class_beg] = 0;
1630
1631               cc = re_wctype (class_name);
1632               if (cc == 0)
1633                 error ("Invalid ISO C character class");
1634
1635               iso_classes = Fcons (make_number (cc), iso_classes);
1636
1637               i_byte = class_end + 2 - str;
1638               continue;
1639             }
1640
1641         not_a_class_name_multibyte:
1642           if (c == '\\')
1643             {
1644               if (i_byte == size_byte)
1645                 break;
1646
1647               leading_code = str[i_byte];
1648               c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1649               i_byte += len;
1650             }
1651           /* Treat `-' as range character only if another character
1652              follows.  */
1653           if (i_byte + 1 < size_byte
1654               && str[i_byte] == '-')
1655             {
1656               unsigned int c2;
1657               unsigned char leading_code2;
1658
1659               /* Skip over the dash.  */
1660               i_byte++;
1661
1662               /* Get the end of the range.  */
1663               leading_code2 = str[i_byte];
1664               c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1665               i_byte += len;
1666
1667               if (c2 == '\\'
1668                   && i_byte < size_byte)
1669                 {
1670                   leading_code2 = str[i_byte];
1671                   c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
1672                   i_byte += len;
1673                 }
1674
1675               if (c > c2)
1676                 continue;
1677               if (ASCII_CHAR_P (c))
1678                 {
1679                   while (c <= c2 && c < 0x80)
1680                     fastmap[c++] = 1;
1681                   leading_code = CHAR_LEADING_CODE (c);
1682                 }
1683               if (! ASCII_CHAR_P (c))
1684                 {
1685                   unsigned lim2 = leading_code2 + 1;
1686                   while (leading_code < lim2)
1687                     fastmap[leading_code++] = 1;
1688                   if (c <= c2)
1689                     {
1690                       char_ranges[n_char_ranges++] = c;
1691                       char_ranges[n_char_ranges++] = c2;
1692                     }
1693                 }
1694             }
1695           else
1696             {
1697               if (ASCII_CHAR_P (c))
1698                 fastmap[c] = 1;
1699               else
1700                 {
1701                   fastmap[leading_code] = 1;
1702                   char_ranges[n_char_ranges++] = c;
1703                   char_ranges[n_char_ranges++] = c;
1704                 }
1705             }
1706         }
1707
1708       /* If the current range is unibyte and STRING contains non-ASCII
1709          chars, arrange fastmap for the corresponding unibyte
1710          chars.  */
1711
1712       if (! multibyte && n_char_ranges > 0)
1713         {
1714           memset (fastmap + 0200, 0, 0200);
1715           for (i = 0; i < n_char_ranges; i += 2)
1716             {
1717               int c1 = char_ranges[i];
1718               unsigned lim2 = char_ranges[i + 1] + 1;
1719
1720               for (; c1 < lim2; c1++)
1721                 {
1722                   int b = CHAR_TO_BYTE_SAFE (c1);
1723                   if (b >= 0)
1724                     fastmap[b] = 1;
1725                 }
1726             }
1727         }
1728     }
1729
1730   /* If ^ was the first character, complement the fastmap.  */
1731   if (negate)
1732     {
1733       if (! multibyte)
1734         for (i = 0; i < sizeof fastmap; i++)
1735           fastmap[i] ^= 1;
1736       else
1737         {
1738           for (i = 0; i < 0200; i++)
1739             fastmap[i] ^= 1;
1740           /* All non-ASCII chars possibly match.  */
1741           for (; i < sizeof fastmap; i++)
1742             fastmap[i] = 1;
1743         }
1744     }
1745
1746   {
1747     EMACS_INT start_point = PT;
1748     EMACS_INT pos = PT;
1749     EMACS_INT pos_byte = PT_BYTE;
1750     unsigned char *p = PT_ADDR, *endp, *stop;
1751
1752     if (forwardp)
1753       {
1754         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1755         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1756       }
1757     else
1758       {
1759         endp = CHAR_POS_ADDR (XINT (lim));
1760         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1761       }
1762
1763     immediate_quit = 1;
1764     /* This code may look up syntax tables using macros that rely on the
1765        gl_state object.  To make sure this object is not out of date,
1766        let's initialize it manually.
1767        We ignore syntax-table text-properties for now, since that's
1768        what we've done in the past.  */
1769     SETUP_BUFFER_SYNTAX_TABLE ();
1770     if (forwardp)
1771       {
1772         if (multibyte)
1773           while (1)
1774             {
1775               int nbytes;
1776
1777               if (p >= stop)
1778                 {
1779                   if (p >= endp)
1780                     break;
1781                   p = GAP_END_ADDR;
1782                   stop = endp;
1783                 }
1784               c = STRING_CHAR_AND_LENGTH (p, nbytes);
1785               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1786                 {
1787                   if (negate)
1788                     break;
1789                   else
1790                     goto fwd_ok;
1791                 }
1792
1793               if (! fastmap[*p])
1794                 break;
1795               if (! ASCII_CHAR_P (c))
1796                 {
1797                   /* As we are looking at a multibyte character, we
1798                      must look up the character in the table
1799                      CHAR_RANGES.  If there's no data in the table,
1800                      that character is not what we want to skip.  */
1801
1802                   /* The following code do the right thing even if
1803                      n_char_ranges is zero (i.e. no data in
1804                      CHAR_RANGES).  */
1805                   for (i = 0; i < n_char_ranges; i += 2)
1806                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1807                       break;
1808                   if (!(negate ^ (i < n_char_ranges)))
1809                     break;
1810                 }
1811             fwd_ok:
1812               p += nbytes, pos++, pos_byte += nbytes;
1813             }
1814         else
1815           while (1)
1816             {
1817               if (p >= stop)
1818                 {
1819                   if (p >= endp)
1820                     break;
1821                   p = GAP_END_ADDR;
1822                   stop = endp;
1823                 }
1824
1825               if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1826                 {
1827                   if (negate)
1828                     break;
1829                   else
1830                     goto fwd_unibyte_ok;
1831                 }
1832
1833               if (!fastmap[*p])
1834                 break;
1835             fwd_unibyte_ok:
1836               p++, pos++, pos_byte++;
1837             }
1838       }
1839     else
1840       {
1841         if (multibyte)
1842           while (1)
1843             {
1844               unsigned char *prev_p;
1845
1846               if (p <= stop)
1847                 {
1848                   if (p <= endp)
1849                     break;
1850                   p = GPT_ADDR;
1851                   stop = endp;
1852                 }
1853               prev_p = p;
1854               while (--p >= stop && ! CHAR_HEAD_P (*p));
1855               c = STRING_CHAR (p);
1856
1857               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1858                 {
1859                   if (negate)
1860                     break;
1861                   else
1862                     goto back_ok;
1863                 }
1864
1865               if (! fastmap[*p])
1866                 break;
1867               if (! ASCII_CHAR_P (c))
1868                 {
1869                   /* See the comment in the previous similar code.  */
1870                   for (i = 0; i < n_char_ranges; i += 2)
1871                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1872                       break;
1873                   if (!(negate ^ (i < n_char_ranges)))
1874                     break;
1875                 }
1876             back_ok:
1877               pos--, pos_byte -= prev_p - p;
1878             }
1879         else
1880           while (1)
1881             {
1882               if (p <= stop)
1883                 {
1884                   if (p <= endp)
1885                     break;
1886                   p = GPT_ADDR;
1887                   stop = endp;
1888                 }
1889
1890               if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1891                 {
1892                   if (negate)
1893                     break;
1894                   else
1895                     goto back_unibyte_ok;
1896                 }
1897
1898               if (!fastmap[p[-1]])
1899                 break;
1900             back_unibyte_ok:
1901               p--, pos--, pos_byte--;
1902             }
1903       }
1904
1905     SET_PT_BOTH (pos, pos_byte);
1906     immediate_quit = 0;
1907
1908     return make_number (PT - start_point);
1909   }
1910 }
1911
1912
1913 static Lisp_Object
1914 skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
1915 {
1916   register unsigned int c;
1917   unsigned char fastmap[0400];
1918   int negate = 0;
1919   register EMACS_INT i, i_byte;
1920   int multibyte;
1921   EMACS_INT size_byte;
1922   unsigned char *str;
1923
1924   CHECK_STRING (string);
1925
1926   if (NILP (lim))
1927     XSETINT (lim, forwardp ? ZV : BEGV);
1928   else
1929     CHECK_NUMBER_COERCE_MARKER (lim);
1930
1931   /* In any case, don't allow scan outside bounds of buffer.  */
1932   if (XINT (lim) > ZV)
1933     XSETFASTINT (lim, ZV);
1934   if (XINT (lim) < BEGV)
1935     XSETFASTINT (lim, BEGV);
1936
1937   if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1938     return make_number (0);
1939
1940   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1941                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1942
1943   memset (fastmap, 0, sizeof fastmap);
1944
1945   if (SBYTES (string) > SCHARS (string))
1946     /* As this is very rare case (syntax spec is ASCII only), don't
1947        consider efficiency.  */
1948     string = string_make_unibyte (string);
1949
1950   str = SDATA (string);
1951   size_byte = SBYTES (string);
1952
1953   i_byte = 0;
1954   if (i_byte < size_byte
1955       && SREF (string, 0) == '^')
1956     {
1957       negate = 1; i_byte++;
1958     }
1959
1960   /* Find the syntaxes specified and set their elements of fastmap.  */
1961
1962   while (i_byte < size_byte)
1963     {
1964       c = str[i_byte++];
1965       fastmap[syntax_spec_code[c]] = 1;
1966     }
1967
1968   /* If ^ was the first character, complement the fastmap.  */
1969   if (negate)
1970     for (i = 0; i < sizeof fastmap; i++)
1971       fastmap[i] ^= 1;
1972
1973   {
1974     EMACS_INT start_point = PT;
1975     EMACS_INT pos = PT;
1976     EMACS_INT pos_byte = PT_BYTE;
1977     unsigned char *p = PT_ADDR, *endp, *stop;
1978
1979     if (forwardp)
1980       {
1981         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1982         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1983       }
1984     else
1985       {
1986         endp = CHAR_POS_ADDR (XINT (lim));
1987         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1988       }
1989
1990     immediate_quit = 1;
1991     SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1992     if (forwardp)
1993       {
1994         if (multibyte)
1995           {
1996             while (1)
1997               {
1998                 int nbytes;
1999
2000                 if (p >= stop)
2001                   {
2002                     if (p >= endp)
2003                       break;
2004                     p = GAP_END_ADDR;
2005                     stop = endp;
2006                   }
2007                 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2008                 if (! fastmap[(int) SYNTAX (c)])
2009                   break;
2010                 p += nbytes, pos++, pos_byte += nbytes;
2011                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2012               }
2013           }
2014         else
2015           {
2016             while (1)
2017               {
2018                 if (p >= stop)
2019                   {
2020                     if (p >= endp)
2021                       break;
2022                     p = GAP_END_ADDR;
2023                     stop = endp;
2024                   }
2025                 if (! fastmap[(int) SYNTAX (*p)])
2026                   break;
2027                 p++, pos++, pos_byte++;
2028                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2029               }
2030           }
2031       }
2032     else
2033       {
2034         if (multibyte)
2035           {
2036             while (1)
2037               {
2038                 unsigned char *prev_p;
2039
2040                 if (p <= stop)
2041                   {
2042                     if (p <= endp)
2043                       break;
2044                     p = GPT_ADDR;
2045                     stop = endp;
2046                   }
2047                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2048                 prev_p = p;
2049                 while (--p >= stop && ! CHAR_HEAD_P (*p));
2050                 c = STRING_CHAR (p);
2051                 if (! fastmap[(int) SYNTAX (c)])
2052                   break;
2053                 pos--, pos_byte -= prev_p - p;
2054               }
2055           }
2056         else
2057           {
2058             while (1)
2059               {
2060                 if (p <= stop)
2061                   {
2062                     if (p <= endp)
2063                       break;
2064                     p = GPT_ADDR;
2065                     stop = endp;
2066                   }
2067                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2068                 if (! fastmap[(int) SYNTAX (p[-1])])
2069                   break;
2070                 p--, pos--, pos_byte--;
2071               }
2072           }
2073       }
2074
2075     SET_PT_BOTH (pos, pos_byte);
2076     immediate_quit = 0;
2077
2078     return make_number (PT - start_point);
2079   }
2080 }
2081
2082 /* Return 1 if character C belongs to one of the ISO classes
2083    in the list ISO_CLASSES.  Each class is represented by an
2084    integer which is its type according to re_wctype.  */
2085
2086 static int
2087 in_classes (int c, Lisp_Object iso_classes)
2088 {
2089   int fits_class = 0;
2090
2091   while (CONSP (iso_classes))
2092     {
2093       Lisp_Object elt;
2094       elt = XCAR (iso_classes);
2095       iso_classes = XCDR (iso_classes);
2096
2097       if (re_iswctype (c, XFASTINT (elt)))
2098         fits_class = 1;
2099     }
2100
2101   return fits_class;
2102 }
2103 \f
2104 /* Jump over a comment, assuming we are at the beginning of one.
2105    FROM is the current position.
2106    FROM_BYTE is the bytepos corresponding to FROM.
2107    Do not move past STOP (a charpos).
2108    The comment over which we have to jump is of style STYLE
2109      (either SYNTAX_FLAGS_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2110    NESTING should be positive to indicate the nesting at the beginning
2111      for nested comments and should be zero or negative else.
2112      ST_COMMENT_STYLE cannot be nested.
2113    PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2114      (or 0 If the search cannot start in the middle of a two-character).
2115
2116    If successful, return 1 and store the charpos of the comment's end
2117    into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2118    Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2119    corresponding bytepos into *BYTEPOS_PTR and the current nesting
2120    (as defined for state.incomment) in *INCOMMENT_PTR.
2121
2122    The comment end is the last character of the comment rather than the
2123      character just after the comment.
2124
2125    Global syntax data is assumed to initially be valid for FROM and
2126    remains valid for forward search starting at the returned position. */
2127
2128 static int
2129 forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2130               int nesting, int style, int prev_syntax,
2131               EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2132               int *incomment_ptr)
2133 {
2134   register int c, c1;
2135   register enum syntaxcode code;
2136   register int syntax, other_syntax;
2137
2138   if (nesting <= 0) nesting = -1;
2139
2140   /* Enter the loop in the middle so that we find
2141      a 2-char comment ender if we start in the middle of it.  */
2142   syntax = prev_syntax;
2143   if (syntax != 0) goto forw_incomment;
2144
2145   while (1)
2146     {
2147       if (from == stop)
2148         {
2149           *incomment_ptr = nesting;
2150           *charpos_ptr = from;
2151           *bytepos_ptr = from_byte;
2152           return 0;
2153         }
2154       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2155       syntax = SYNTAX_WITH_FLAGS (c);
2156       code = syntax & 0xff;
2157       if (code == Sendcomment
2158           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2159           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2160               (nesting > 0 && --nesting == 0) : nesting < 0))
2161         /* we have encountered a comment end of the same style
2162            as the comment sequence which began this comment
2163            section */
2164         break;
2165       if (code == Scomment_fence
2166           && style == ST_COMMENT_STYLE)
2167         /* we have encountered a comment end of the same style
2168            as the comment sequence which began this comment
2169            section.  */
2170         break;
2171       if (nesting > 0
2172           && code == Scomment
2173           && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2174           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2175         /* we have encountered a nested comment of the same style
2176            as the comment sequence which began this comment section */
2177         nesting++;
2178       INC_BOTH (from, from_byte);
2179       UPDATE_SYNTAX_TABLE_FORWARD (from);
2180
2181     forw_incomment:
2182       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2183           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2184               other_syntax = SYNTAX_WITH_FLAGS (c1),
2185               SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2186           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2187           && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2188                SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2189               ? nesting > 0 : nesting < 0))
2190         {
2191           if (--nesting <= 0)
2192             /* we have encountered a comment end of the same style
2193                as the comment sequence which began this comment
2194                section */
2195             break;
2196           else
2197             {
2198               INC_BOTH (from, from_byte);
2199               UPDATE_SYNTAX_TABLE_FORWARD (from);
2200             }
2201         }
2202       if (nesting > 0
2203           && from < stop
2204           && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2205           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2206               other_syntax = SYNTAX_WITH_FLAGS (c1),
2207               SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2208               && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2209           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2210               SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2211         /* we have encountered a nested comment of the same style
2212            as the comment sequence which began this comment
2213            section */
2214         {
2215           INC_BOTH (from, from_byte);
2216           UPDATE_SYNTAX_TABLE_FORWARD (from);
2217           nesting++;
2218         }
2219     }
2220   *charpos_ptr = from;
2221   *bytepos_ptr = from_byte;
2222   return 1;
2223 }
2224
2225 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2226        doc: /*
2227 Move forward across up to COUNT comments.  If COUNT is negative, move backward.
2228 Stop scanning if we find something other than a comment or whitespace.
2229 Set point to where scanning stops.
2230 If COUNT comments are found as expected, with nothing except whitespace
2231 between them, return t; otherwise return nil.  */)
2232   (Lisp_Object count)
2233 {
2234   register EMACS_INT from;
2235   EMACS_INT from_byte;
2236   register EMACS_INT stop;
2237   register int c, c1;
2238   register enum syntaxcode code;
2239   int comstyle = 0;         /* style of comment encountered */
2240   int comnested = 0;        /* whether the comment is nestable or not */
2241   int found;
2242   EMACS_INT count1;
2243   EMACS_INT out_charpos, out_bytepos;
2244   int dummy;
2245
2246   CHECK_NUMBER (count);
2247   count1 = XINT (count);
2248   stop = count1 > 0 ? ZV : BEGV;
2249
2250   immediate_quit = 1;
2251   QUIT;
2252
2253   from = PT;
2254   from_byte = PT_BYTE;
2255
2256   SETUP_SYNTAX_TABLE (from, count1);
2257   while (count1 > 0)
2258     {
2259       do
2260         {
2261           int comstart_first, syntax, other_syntax;
2262
2263           if (from == stop)
2264             {
2265               SET_PT_BOTH (from, from_byte);
2266               immediate_quit = 0;
2267               return Qnil;
2268             }
2269           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2270           syntax = SYNTAX_WITH_FLAGS (c);
2271           code = SYNTAX (c);
2272           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2273           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2274           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2275           INC_BOTH (from, from_byte);
2276           UPDATE_SYNTAX_TABLE_FORWARD (from);
2277           if (from < stop && comstart_first
2278               && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2279                   other_syntax = SYNTAX_WITH_FLAGS (c1),
2280                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2281             {
2282               /* We have encountered a comment start sequence and we
2283                  are ignoring all text inside comments.  We must record
2284                  the comment style this sequence begins so that later,
2285                  only a comment end of the same style actually ends
2286                  the comment section.  */
2287               code = Scomment;
2288               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2289               comnested
2290                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2291               INC_BOTH (from, from_byte);
2292               UPDATE_SYNTAX_TABLE_FORWARD (from);
2293             }
2294         }
2295       while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2296
2297       if (code == Scomment_fence)
2298         comstyle = ST_COMMENT_STYLE;
2299       else if (code != Scomment)
2300         {
2301           immediate_quit = 0;
2302           DEC_BOTH (from, from_byte);
2303           SET_PT_BOTH (from, from_byte);
2304           return Qnil;
2305         }
2306       /* We're at the start of a comment.  */
2307       found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2308                             &out_charpos, &out_bytepos, &dummy);
2309       from = out_charpos; from_byte = out_bytepos;
2310       if (!found)
2311         {
2312           immediate_quit = 0;
2313           SET_PT_BOTH (from, from_byte);
2314           return Qnil;
2315         }
2316       INC_BOTH (from, from_byte);
2317       UPDATE_SYNTAX_TABLE_FORWARD (from);
2318       /* We have skipped one comment.  */
2319       count1--;
2320     }
2321
2322   while (count1 < 0)
2323     {
2324       while (1)
2325         {
2326           int quoted, syntax;
2327
2328           if (from <= stop)
2329             {
2330               SET_PT_BOTH (BEGV, BEGV_BYTE);
2331               immediate_quit = 0;
2332               return Qnil;
2333             }
2334
2335           DEC_BOTH (from, from_byte);
2336           /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
2337           quoted = char_quoted (from, from_byte);
2338           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2339           syntax = SYNTAX_WITH_FLAGS (c);
2340           code = SYNTAX (c);
2341           comstyle = 0;
2342           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2343           if (code == Sendcomment)
2344             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2345           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2346               && prev_char_comend_first (from, from_byte)
2347               && !char_quoted (from - 1, dec_bytepos (from_byte)))
2348             {
2349               int other_syntax;
2350               /* We must record the comment style encountered so that
2351                  later, we can match only the proper comment begin
2352                  sequence of the same style.  */
2353               DEC_BOTH (from, from_byte);
2354               code = Sendcomment;
2355               /* Calling char_quoted, above, set up global syntax position
2356                  at the new value of FROM.  */
2357               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2358               other_syntax = SYNTAX_WITH_FLAGS (c1);
2359               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2360               comnested
2361                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2362             }
2363
2364           if (code == Scomment_fence)
2365             {
2366               /* Skip until first preceding unquoted comment_fence.  */
2367               int fence_found = 0;
2368               EMACS_INT ini = from, ini_byte = from_byte;
2369
2370               while (1)
2371                 {
2372                   DEC_BOTH (from, from_byte);
2373                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2374                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2375                   if (SYNTAX (c) == Scomment_fence
2376                       && !char_quoted (from, from_byte))
2377                     {
2378                       fence_found = 1;
2379                       break;
2380                     }
2381                   else if (from == stop)
2382                     break;
2383                 }
2384               if (fence_found == 0)
2385                 {
2386                   from = ini;           /* Set point to ini + 1.  */
2387                   from_byte = ini_byte;
2388                   goto leave;
2389                 }
2390               else
2391                 /* We have skipped one comment.  */
2392                 break;
2393             }
2394           else if (code == Sendcomment)
2395             {
2396               found = back_comment (from, from_byte, stop, comnested, comstyle,
2397                                     &out_charpos, &out_bytepos);
2398               if (found == -1)
2399                 {
2400                   if (c == '\n')
2401                     /* This end-of-line is not an end-of-comment.
2402                        Treat it like a whitespace.
2403                        CC-mode (and maybe others) relies on this behavior.  */
2404                     ;
2405                   else
2406                     {
2407                       /* Failure: we should go back to the end of this
2408                          not-quite-endcomment.  */
2409                       if (SYNTAX (c) != code)
2410                         /* It was a two-char Sendcomment.  */
2411                         INC_BOTH (from, from_byte);
2412                       goto leave;
2413                     }
2414                 }
2415               else
2416                 {
2417                   /* We have skipped one comment.  */
2418                   from = out_charpos, from_byte = out_bytepos;
2419                   break;
2420                 }
2421             }
2422           else if (code != Swhitespace || quoted)
2423             {
2424             leave:
2425               immediate_quit = 0;
2426               INC_BOTH (from, from_byte);
2427               SET_PT_BOTH (from, from_byte);
2428               return Qnil;
2429             }
2430         }
2431
2432       count1++;
2433     }
2434
2435   SET_PT_BOTH (from, from_byte);
2436   immediate_quit = 0;
2437   return Qt;
2438 }
2439 \f
2440 /* Return syntax code of character C if C is an ASCII character
2441    or `multibyte_symbol_p' is zero.  Otherwise, return Ssymbol.  */
2442
2443 #define SYNTAX_WITH_MULTIBYTE_CHECK(c)          \
2444   ((ASCII_CHAR_P (c) || !multibyte_symbol_p)    \
2445    ? SYNTAX (c) : Ssymbol)
2446
2447 static Lisp_Object
2448 scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
2449 {
2450   Lisp_Object val;
2451   register EMACS_INT stop = count > 0 ? ZV : BEGV;
2452   register int c, c1;
2453   int stringterm;
2454   int quoted;
2455   int mathexit = 0;
2456   register enum syntaxcode code, temp_code;
2457   int min_depth = depth;    /* Err out if depth gets less than this.  */
2458   int comstyle = 0;         /* style of comment encountered */
2459   int comnested = 0;        /* whether the comment is nestable or not */
2460   EMACS_INT temp_pos;
2461   EMACS_INT last_good = from;
2462   int found;
2463   EMACS_INT from_byte;
2464   EMACS_INT out_bytepos, out_charpos;
2465   int temp, dummy;
2466   int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2467
2468   if (depth > 0) min_depth = 0;
2469
2470   if (from > ZV) from = ZV;
2471   if (from < BEGV) from = BEGV;
2472
2473   from_byte = CHAR_TO_BYTE (from);
2474
2475   immediate_quit = 1;
2476   QUIT;
2477
2478   SETUP_SYNTAX_TABLE (from, count);
2479   while (count > 0)
2480     {
2481       while (from < stop)
2482         {
2483           int comstart_first, prefix, syntax, other_syntax;
2484           UPDATE_SYNTAX_TABLE_FORWARD (from);
2485           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2486           syntax = SYNTAX_WITH_FLAGS (c);
2487           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2488           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2489           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2490           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2491           prefix = SYNTAX_FLAGS_PREFIX (syntax);
2492           if (depth == min_depth)
2493             last_good = from;
2494           INC_BOTH (from, from_byte);
2495           UPDATE_SYNTAX_TABLE_FORWARD (from);
2496           if (from < stop && comstart_first
2497               && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2498                   other_syntax = SYNTAX_WITH_FLAGS (c),
2499                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2500               && parse_sexp_ignore_comments)
2501             {
2502               /* we have encountered a comment start sequence and we
2503                  are ignoring all text inside comments.  We must record
2504                  the comment style this sequence begins so that later,
2505                  only a comment end of the same style actually ends
2506                  the comment section */
2507               code = Scomment;
2508               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2509               comnested
2510                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2511               INC_BOTH (from, from_byte);
2512               UPDATE_SYNTAX_TABLE_FORWARD (from);
2513             }
2514
2515           if (prefix)
2516             continue;
2517
2518           switch (SWITCH_ENUM_CAST (code))
2519             {
2520             case Sescape:
2521             case Scharquote:
2522               if (from == stop)
2523                 goto lose;
2524               INC_BOTH (from, from_byte);
2525               /* treat following character as a word constituent */
2526             case Sword:
2527             case Ssymbol:
2528               if (depth || !sexpflag) break;
2529               /* This word counts as a sexp; return at end of it.  */
2530               while (from < stop)
2531                 {
2532                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2533
2534                   /* Some compilers can't handle this inside the switch.  */
2535                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2536                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2537                   switch (temp)
2538                     {
2539                     case Scharquote:
2540                     case Sescape:
2541                       INC_BOTH (from, from_byte);
2542                       if (from == stop)
2543                         goto lose;
2544                       break;
2545                     case Sword:
2546                     case Ssymbol:
2547                     case Squote:
2548                       break;
2549                     default:
2550                       goto done;
2551                     }
2552                   INC_BOTH (from, from_byte);
2553                 }
2554               goto done;
2555
2556             case Scomment_fence:
2557               comstyle = ST_COMMENT_STYLE;
2558               /* FALLTHROUGH */
2559             case Scomment:
2560               if (!parse_sexp_ignore_comments) break;
2561               UPDATE_SYNTAX_TABLE_FORWARD (from);
2562               found = forw_comment (from, from_byte, stop,
2563                                     comnested, comstyle, 0,
2564                                     &out_charpos, &out_bytepos, &dummy);
2565               from = out_charpos, from_byte = out_bytepos;
2566               if (!found)
2567                 {
2568                   if (depth == 0)
2569                     goto done;
2570                   goto lose;
2571                 }
2572               INC_BOTH (from, from_byte);
2573               UPDATE_SYNTAX_TABLE_FORWARD (from);
2574               break;
2575
2576             case Smath:
2577               if (!sexpflag)
2578                 break;
2579               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2580                 {
2581                   INC_BOTH (from, from_byte);
2582                 }
2583               if (mathexit)
2584                 {
2585                   mathexit = 0;
2586                   goto close1;
2587                 }
2588               mathexit = 1;
2589
2590             case Sopen:
2591               if (!++depth) goto done;
2592               break;
2593
2594             case Sclose:
2595             close1:
2596               if (!--depth) goto done;
2597               if (depth < min_depth)
2598                 xsignal3 (Qscan_error,
2599                           build_string ("Containing expression ends prematurely"),
2600                           make_number (last_good), make_number (from));
2601               break;
2602
2603             case Sstring:
2604             case Sstring_fence:
2605               temp_pos = dec_bytepos (from_byte);
2606               stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2607               while (1)
2608                 {
2609                   if (from >= stop)
2610                     goto lose;
2611                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2612                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2613                   if (code == Sstring
2614                       ? (c == stringterm
2615                          && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2616                       : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2617                     break;
2618
2619                   /* Some compilers can't handle this inside the switch.  */
2620                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2621                   switch (temp)
2622                     {
2623                     case Scharquote:
2624                     case Sescape:
2625                       INC_BOTH (from, from_byte);
2626                     }
2627                   INC_BOTH (from, from_byte);
2628                 }
2629               INC_BOTH (from, from_byte);
2630               if (!depth && sexpflag) goto done;
2631               break;
2632             default:
2633               /* Ignore whitespace, punctuation, quote, endcomment.  */
2634               break;
2635             }
2636         }
2637
2638       /* Reached end of buffer.  Error if within object, return nil if between */
2639       if (depth)
2640         goto lose;
2641
2642       immediate_quit = 0;
2643       return Qnil;
2644
2645       /* End of object reached */
2646     done:
2647       count--;
2648     }
2649
2650   while (count < 0)
2651     {
2652       while (from > stop)
2653         {
2654           int syntax;
2655           DEC_BOTH (from, from_byte);
2656           UPDATE_SYNTAX_TABLE_BACKWARD (from);
2657           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2658           syntax= SYNTAX_WITH_FLAGS (c);
2659           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2660           if (depth == min_depth)
2661             last_good = from;
2662           comstyle = 0;
2663           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2664           if (code == Sendcomment)
2665             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2666           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2667               && prev_char_comend_first (from, from_byte)
2668               && parse_sexp_ignore_comments)
2669             {
2670               /* We must record the comment style encountered so that
2671                  later, we can match only the proper comment begin
2672                  sequence of the same style.  */
2673               int c2, other_syntax;
2674               DEC_BOTH (from, from_byte);
2675               UPDATE_SYNTAX_TABLE_BACKWARD (from);
2676               code = Sendcomment;
2677               c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2678               other_syntax = SYNTAX_WITH_FLAGS (c2);
2679               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2680               comnested
2681                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2682             }
2683
2684           /* Quoting turns anything except a comment-ender
2685              into a word character.  Note that this cannot be true
2686              if we decremented FROM in the if-statement above.  */
2687           if (code != Sendcomment && char_quoted (from, from_byte))
2688             {
2689               DEC_BOTH (from, from_byte);
2690               code = Sword;
2691             }
2692           else if (SYNTAX_FLAGS_PREFIX (syntax))
2693             continue;
2694
2695           switch (SWITCH_ENUM_CAST (code))
2696             {
2697             case Sword:
2698             case Ssymbol:
2699             case Sescape:
2700             case Scharquote:
2701               if (depth || !sexpflag) break;
2702               /* This word counts as a sexp; count object finished
2703                  after passing it.  */
2704               while (from > stop)
2705                 {
2706                   temp_pos = from_byte;
2707                   if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2708                     DEC_POS (temp_pos);
2709                   else
2710                     temp_pos--;
2711                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2712                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2713                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2714                   /* Don't allow comment-end to be quoted.  */
2715                   if (temp_code == Sendcomment)
2716                     goto done2;
2717                   quoted = char_quoted (from - 1, temp_pos);
2718                   if (quoted)
2719                     {
2720                       DEC_BOTH (from, from_byte);
2721                       temp_pos = dec_bytepos (temp_pos);
2722                       UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2723                     }
2724                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2725                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2726                   if (! (quoted || temp_code == Sword
2727                          || temp_code == Ssymbol
2728                          || temp_code == Squote))
2729                     goto done2;
2730                   DEC_BOTH (from, from_byte);
2731                 }
2732               goto done2;
2733
2734             case Smath:
2735               if (!sexpflag)
2736                 break;
2737               temp_pos = dec_bytepos (from_byte);
2738               UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2739               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2740                 DEC_BOTH (from, from_byte);
2741               if (mathexit)
2742                 {
2743                   mathexit = 0;
2744                   goto open2;
2745                 }
2746               mathexit = 1;
2747
2748             case Sclose:
2749               if (!++depth) goto done2;
2750               break;
2751
2752             case Sopen:
2753             open2:
2754               if (!--depth) goto done2;
2755               if (depth < min_depth)
2756                 xsignal3 (Qscan_error,
2757                           build_string ("Containing expression ends prematurely"),
2758                           make_number (last_good), make_number (from));
2759               break;
2760
2761             case Sendcomment:
2762               if (!parse_sexp_ignore_comments)
2763                 break;
2764               found = back_comment (from, from_byte, stop, comnested, comstyle,
2765                                     &out_charpos, &out_bytepos);
2766               /* FIXME:  if found == -1, then it really wasn't a comment-end.
2767                  For single-char Sendcomment, we can't do much about it apart
2768                  from skipping the char.
2769                  For 2-char endcomments, we could try again, taking both
2770                  chars as separate entities, but it's a lot of trouble
2771                  for very little gain, so we don't bother either.  -sm */
2772               if (found != -1)
2773                 from = out_charpos, from_byte = out_bytepos;
2774               break;
2775
2776             case Scomment_fence:
2777             case Sstring_fence:
2778               while (1)
2779                 {
2780                   if (from == stop)
2781                     goto lose;
2782                   DEC_BOTH (from, from_byte);
2783                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2784                   if (!char_quoted (from, from_byte)
2785                       && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2786                           SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2787                     break;
2788                 }
2789               if (code == Sstring_fence && !depth && sexpflag) goto done2;
2790               break;
2791
2792             case Sstring:
2793               stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2794               while (1)
2795                 {
2796                   if (from == stop)
2797                     goto lose;
2798                   DEC_BOTH (from, from_byte);
2799                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2800                   if (!char_quoted (from, from_byte)
2801                       && (stringterm
2802                           == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
2803                       && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2804                     break;
2805                 }
2806               if (!depth && sexpflag) goto done2;
2807               break;
2808             default:
2809               /* Ignore whitespace, punctuation, quote, endcomment.  */
2810               break;
2811             }
2812         }
2813
2814       /* Reached start of buffer.  Error if within object, return nil if between */
2815       if (depth)
2816         goto lose;
2817
2818       immediate_quit = 0;
2819       return Qnil;
2820
2821     done2:
2822       count++;
2823     }
2824
2825
2826   immediate_quit = 0;
2827   XSETFASTINT (val, from);
2828   return val;
2829
2830  lose:
2831   xsignal3 (Qscan_error,
2832             build_string ("Unbalanced parentheses"),
2833             make_number (last_good), make_number (from));
2834 }
2835
2836 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2837        doc: /* Scan from character number FROM by COUNT lists.
2838 Returns the character number of the position thus found.
2839
2840 If DEPTH is nonzero, paren depth begins counting from that value,
2841 only places where the depth in parentheses becomes zero
2842 are candidates for stopping; COUNT such places are counted.
2843 Thus, a positive value for DEPTH means go out levels.
2844
2845 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2846
2847 If the beginning or end of (the accessible part of) the buffer is reached
2848 and the depth is wrong, an error is signaled.
2849 If the depth is right but the count is not used up, nil is returned.  */)
2850   (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
2851 {
2852   CHECK_NUMBER (from);
2853   CHECK_NUMBER (count);
2854   CHECK_NUMBER (depth);
2855
2856   return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2857 }
2858
2859 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2860        doc: /* Scan from character number FROM by COUNT balanced expressions.
2861 If COUNT is negative, scan backwards.
2862 Returns the character number of the position thus found.
2863
2864 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2865
2866 If the beginning or end of (the accessible part of) the buffer is reached
2867 in the middle of a parenthetical grouping, an error is signaled.
2868 If the beginning or end is reached between groupings
2869 but before count is used up, nil is returned.  */)
2870   (Lisp_Object from, Lisp_Object count)
2871 {
2872   CHECK_NUMBER (from);
2873   CHECK_NUMBER (count);
2874
2875   return scan_lists (XINT (from), XINT (count), 0, 1);
2876 }
2877
2878 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2879        0, 0, 0,
2880        doc: /* Move point backward over any number of chars with prefix syntax.
2881 This includes chars with "quote" or "prefix" syntax (' or p).  */)
2882   (void)
2883 {
2884   EMACS_INT beg = BEGV;
2885   EMACS_INT opoint = PT;
2886   EMACS_INT opoint_byte = PT_BYTE;
2887   EMACS_INT pos = PT;
2888   EMACS_INT pos_byte = PT_BYTE;
2889   int c;
2890
2891   if (pos <= beg)
2892     {
2893       SET_PT_BOTH (opoint, opoint_byte);
2894
2895       return Qnil;
2896     }
2897
2898   SETUP_SYNTAX_TABLE (pos, -1);
2899
2900   DEC_BOTH (pos, pos_byte);
2901
2902   while (!char_quoted (pos, pos_byte)
2903          /* Previous statement updates syntax table.  */
2904          && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2905              || SYNTAX_PREFIX (c)))
2906     {
2907       opoint = pos;
2908       opoint_byte = pos_byte;
2909
2910       if (pos + 1 > beg)
2911         DEC_BOTH (pos, pos_byte);
2912     }
2913
2914   SET_PT_BOTH (opoint, opoint_byte);
2915
2916   return Qnil;
2917 }
2918 \f
2919 /* Parse forward from FROM / FROM_BYTE to END,
2920    assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2921    and return a description of the state of the parse at END.
2922    If STOPBEFORE is nonzero, stop at the start of an atom.
2923    If COMMENTSTOP is 1, stop at the start of a comment.
2924    If COMMENTSTOP is -1, stop at the start or end of a comment,
2925    after the beginning of a string, or after the end of a string.  */
2926
2927 static void
2928 scan_sexps_forward (struct lisp_parse_state *stateptr,
2929                     EMACS_INT from, EMACS_INT from_byte, EMACS_INT end,
2930                     int targetdepth, int stopbefore,
2931                     Lisp_Object oldstate, int commentstop)
2932 {
2933   struct lisp_parse_state state;
2934
2935   register enum syntaxcode code;
2936   int c1;
2937   int comnested;
2938   struct level { int last, prev; };
2939   struct level levelstart[100];
2940   register struct level *curlevel = levelstart;
2941   struct level *endlevel = levelstart + 100;
2942   register int depth;   /* Paren depth of current scanning location.
2943                            level - levelstart equals this except
2944                            when the depth becomes negative.  */
2945   int mindepth;         /* Lowest DEPTH value seen.  */
2946   int start_quoted = 0;         /* Nonzero means starting after a char quote */
2947   Lisp_Object tem;
2948   EMACS_INT prev_from;          /* Keep one character before FROM.  */
2949   EMACS_INT prev_from_byte;
2950   int prev_from_syntax;
2951   int boundary_stop = commentstop == -1;
2952   int nofence;
2953   int found;
2954   EMACS_INT out_bytepos, out_charpos;
2955   int temp;
2956
2957   prev_from = from;
2958   prev_from_byte = from_byte;
2959   if (from != BEGV)
2960     DEC_BOTH (prev_from, prev_from_byte);
2961
2962   /* Use this macro instead of `from++'.  */
2963 #define INC_FROM                                \
2964 do { prev_from = from;                          \
2965      prev_from_byte = from_byte;                \
2966      temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);   \
2967      prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2968      INC_BOTH (from, from_byte);                \
2969      if (from < end)                            \
2970        UPDATE_SYNTAX_TABLE_FORWARD (from);      \
2971   } while (0)
2972
2973   immediate_quit = 1;
2974   QUIT;
2975
2976   if (NILP (oldstate))
2977     {
2978       depth = 0;
2979       state.instring = -1;
2980       state.incomment = 0;
2981       state.comstyle = 0;       /* comment style a by default.  */
2982       state.comstr_start = -1;  /* no comment/string seen.  */
2983     }
2984   else
2985     {
2986       tem = Fcar (oldstate);
2987       if (!NILP (tem))
2988         depth = XINT (tem);
2989       else
2990         depth = 0;
2991
2992       oldstate = Fcdr (oldstate);
2993       oldstate = Fcdr (oldstate);
2994       oldstate = Fcdr (oldstate);
2995       tem = Fcar (oldstate);
2996       /* Check whether we are inside string_fence-style string: */
2997       state.instring = (!NILP (tem)
2998                         ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2999                         : -1);
3000
3001       oldstate = Fcdr (oldstate);
3002       tem = Fcar (oldstate);
3003       state.incomment = (!NILP (tem)
3004                          ? (INTEGERP (tem) ? XINT (tem) : -1)
3005                          : 0);
3006
3007       oldstate = Fcdr (oldstate);
3008       tem = Fcar (oldstate);
3009       start_quoted = !NILP (tem);
3010
3011       /* if the eighth element of the list is nil, we are in comment
3012          style a.  If it is non-nil, we are in comment style b */
3013       oldstate = Fcdr (oldstate);
3014       oldstate = Fcdr (oldstate);
3015       tem = Fcar (oldstate);
3016       state.comstyle = (NILP (tem)
3017                         ? 0
3018                         : (EQ (tem, Qsyntax_table)
3019                            ? ST_COMMENT_STYLE
3020                            : INTEGERP (tem) ? XINT (tem) : 1));
3021
3022       oldstate = Fcdr (oldstate);
3023       tem = Fcar (oldstate);
3024       state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
3025       oldstate = Fcdr (oldstate);
3026       tem = Fcar (oldstate);
3027       while (!NILP (tem))               /* >= second enclosing sexps.  */
3028         {
3029           /* curlevel++->last ran into compiler bug on Apollo */
3030           curlevel->last = XINT (Fcar (tem));
3031           if (++curlevel == endlevel)
3032             curlevel--; /* error ("Nesting too deep for parser"); */
3033           curlevel->prev = -1;
3034           curlevel->last = -1;
3035           tem = Fcdr (tem);
3036         }
3037     }
3038   state.quoted = 0;
3039   mindepth = depth;
3040
3041   curlevel->prev = -1;
3042   curlevel->last = -1;
3043
3044   SETUP_SYNTAX_TABLE (prev_from, 1);
3045   temp = FETCH_CHAR (prev_from_byte);
3046   prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3047   UPDATE_SYNTAX_TABLE_FORWARD (from);
3048
3049   /* Enter the loop at a place appropriate for initial state.  */
3050
3051   if (state.incomment)
3052     goto startincomment;
3053   if (state.instring >= 0)
3054     {
3055       nofence = state.instring != ST_STRING_STYLE;
3056       if (start_quoted)
3057         goto startquotedinstring;
3058       goto startinstring;
3059     }
3060   else if (start_quoted)
3061     goto startquoted;
3062
3063   while (from < end)
3064     {
3065       int syntax;
3066       INC_FROM;
3067       code = prev_from_syntax & 0xff;
3068
3069       if (from < end
3070           && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3071           && (c1 = FETCH_CHAR (from_byte),
3072               syntax = SYNTAX_WITH_FLAGS (c1),
3073               SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3074         /* Duplicate code to avoid a complex if-expression
3075            which causes trouble for the SGI compiler.  */
3076         {
3077           /* Record the comment style we have entered so that only
3078              the comment-end sequence of the same style actually
3079              terminates the comment section.  */
3080           state.comstyle
3081             = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3082           comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3083           comnested = comnested || SYNTAX_FLAGS_COMMENT_NESTED (syntax);
3084           state.incomment = comnested ? 1 : -1;
3085           state.comstr_start = prev_from;
3086           INC_FROM;
3087           code = Scomment;
3088         }
3089       else if (code == Scomment_fence)
3090         {
3091           /* Record the comment style we have entered so that only
3092              the comment-end sequence of the same style actually
3093              terminates the comment section.  */
3094           state.comstyle = ST_COMMENT_STYLE;
3095           state.incomment = -1;
3096           state.comstr_start = prev_from;
3097           code = Scomment;
3098         }
3099       else if (code == Scomment)
3100         {
3101           state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3102           state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3103                              1 : -1);
3104           state.comstr_start = prev_from;
3105         }
3106
3107       if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3108         continue;
3109       switch (SWITCH_ENUM_CAST (code))
3110         {
3111         case Sescape:
3112         case Scharquote:
3113           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3114           curlevel->last = prev_from;
3115         startquoted:
3116           if (from == end) goto endquoted;
3117           INC_FROM;
3118           goto symstarted;
3119           /* treat following character as a word constituent */
3120         case Sword:
3121         case Ssymbol:
3122           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3123           curlevel->last = prev_from;
3124         symstarted:
3125           while (from < end)
3126             {
3127               /* Some compilers can't handle this inside the switch.  */
3128               temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3129               temp = SYNTAX (temp);
3130               switch (temp)
3131                 {
3132                 case Scharquote:
3133                 case Sescape:
3134                   INC_FROM;
3135                   if (from == end) goto endquoted;
3136                   break;
3137                 case Sword:
3138                 case Ssymbol:
3139                 case Squote:
3140                   break;
3141                 default:
3142                   goto symdone;
3143                 }
3144               INC_FROM;
3145             }
3146         symdone:
3147           curlevel->prev = curlevel->last;
3148           break;
3149
3150         case Scomment_fence: /* Can't happen because it's handled above.  */
3151         case Scomment:
3152           if (commentstop || boundary_stop) goto done;
3153         startincomment:
3154           /* The (from == BEGV) test was to enter the loop in the middle so
3155              that we find a 2-char comment ender even if we start in the
3156              middle of it.  We don't want to do that if we're just at the
3157              beginning of the comment (think of (*) ... (*)).  */
3158           found = forw_comment (from, from_byte, end,
3159                                 state.incomment, state.comstyle,
3160                                 (from == BEGV || from < state.comstr_start + 3)
3161                                 ? 0 : prev_from_syntax,
3162                                 &out_charpos, &out_bytepos, &state.incomment);
3163           from = out_charpos; from_byte = out_bytepos;
3164           /* Beware!  prev_from and friends are invalid now.
3165              Luckily, the `done' doesn't use them and the INC_FROM
3166              sets them to a sane value without looking at them. */
3167           if (!found) goto done;
3168           INC_FROM;
3169           state.incomment = 0;
3170           state.comstyle = 0;   /* reset the comment style */
3171           if (boundary_stop) goto done;
3172           break;
3173
3174         case Sopen:
3175           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3176           depth++;
3177           /* curlevel++->last ran into compiler bug on Apollo */
3178           curlevel->last = prev_from;
3179           if (++curlevel == endlevel)
3180             curlevel--; /* error ("Nesting too deep for parser"); */
3181           curlevel->prev = -1;
3182           curlevel->last = -1;
3183           if (targetdepth == depth) goto done;
3184           break;
3185
3186         case Sclose:
3187           depth--;
3188           if (depth < mindepth)
3189             mindepth = depth;
3190           if (curlevel != levelstart)
3191             curlevel--;
3192           curlevel->prev = curlevel->last;
3193           if (targetdepth == depth) goto done;
3194           break;
3195
3196         case Sstring:
3197         case Sstring_fence:
3198           state.comstr_start = from - 1;
3199           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3200           curlevel->last = prev_from;
3201           state.instring = (code == Sstring
3202                             ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3203                             : ST_STRING_STYLE);
3204           if (boundary_stop) goto done;
3205         startinstring:
3206           {
3207             nofence = state.instring != ST_STRING_STYLE;
3208
3209             while (1)
3210               {
3211                 int c;
3212
3213                 if (from >= end) goto done;
3214                 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3215                 /* Some compilers can't handle this inside the switch.  */
3216                 temp = SYNTAX (c);
3217
3218                 /* Check TEMP here so that if the char has
3219                    a syntax-table property which says it is NOT
3220                    a string character, it does not end the string.  */
3221                 if (nofence && c == state.instring && temp == Sstring)
3222                   break;
3223
3224                 switch (temp)
3225                   {
3226                   case Sstring_fence:
3227                     if (!nofence) goto string_end;
3228                     break;
3229                   case Scharquote:
3230                   case Sescape:
3231                     INC_FROM;
3232                   startquotedinstring:
3233                     if (from >= end) goto endquoted;
3234                   }
3235                 INC_FROM;
3236               }
3237           }
3238         string_end:
3239           state.instring = -1;
3240           curlevel->prev = curlevel->last;
3241           INC_FROM;
3242           if (boundary_stop) goto done;
3243           break;
3244
3245         case Smath:
3246           /* FIXME: We should do something with it.  */
3247           break;
3248         default:
3249           /* Ignore whitespace, punctuation, quote, endcomment.  */
3250           break;
3251         }
3252     }
3253   goto done;
3254
3255  stop:   /* Here if stopping before start of sexp. */
3256   from = prev_from;    /* We have just fetched the char that starts it; */
3257   goto done; /* but return the position before it. */
3258
3259  endquoted:
3260   state.quoted = 1;
3261  done:
3262   state.depth = depth;
3263   state.mindepth = mindepth;
3264   state.thislevelstart = curlevel->prev;
3265   state.prevlevelstart
3266     = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3267   state.location = from;
3268   state.levelstarts = Qnil;
3269   while (curlevel > levelstart)
3270     state.levelstarts = Fcons (make_number ((--curlevel)->last),
3271                                state.levelstarts);
3272   immediate_quit = 0;
3273
3274   *stateptr = state;
3275 }
3276
3277 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3278        doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3279 Parsing stops at TO or when certain criteria are met;
3280  point is set to where parsing stops.
3281 If fifth arg OLDSTATE is omitted or nil,
3282  parsing assumes that FROM is the beginning of a function.
3283 Value is a list of elements describing final state of parsing:
3284  0. depth in parens.
3285  1. character address of start of innermost containing list; nil if none.
3286  2. character address of start of last complete sexp terminated.
3287  3. non-nil if inside a string.
3288     (it is the character that will terminate the string,
3289      or t if the string should be terminated by a generic string delimiter.)
3290  4. nil if outside a comment, t if inside a non-nestable comment,
3291     else an integer (the current comment nesting).
3292  5. t if following a quote character.
3293  6. the minimum paren-depth encountered during this scan.
3294  7. style of comment, if any.
3295  8. character address of start of comment or string; nil if not in one.
3296  9. Intermediate data for continuation of parsing (subject to change).
3297 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3298 in parentheses becomes equal to TARGETDEPTH.
3299 Fourth arg STOPBEFORE non-nil means stop when come to
3300  any character that starts a sexp.
3301 Fifth arg OLDSTATE is a list like what this function returns.
3302  It is used to initialize the state of the parse.  Elements number 1, 2, 6
3303  and 8 are ignored.
3304 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3305  If it is symbol `syntax-table', stop after the start of a comment or a
3306  string, or after end of a comment or a string.  */)
3307   (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3308 {
3309   struct lisp_parse_state state;
3310   int target;
3311
3312   if (!NILP (targetdepth))
3313     {
3314       CHECK_NUMBER (targetdepth);
3315       target = XINT (targetdepth);
3316     }
3317   else
3318     target = -100000;           /* We won't reach this depth */
3319
3320   validate_region (&from, &to);
3321   scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3322                       XINT (to),
3323                       target, !NILP (stopbefore), oldstate,
3324                       (NILP (commentstop)
3325                        ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3326
3327   SET_PT (state.location);
3328
3329   return Fcons (make_number (state.depth),
3330            Fcons (state.prevlevelstart < 0
3331                   ? Qnil : make_number (state.prevlevelstart),
3332              Fcons (state.thislevelstart < 0
3333                     ? Qnil : make_number (state.thislevelstart),
3334                Fcons (state.instring >= 0
3335                       ? (state.instring == ST_STRING_STYLE
3336                          ? Qt : make_number (state.instring)) : Qnil,
3337                  Fcons (state.incomment < 0 ? Qt :
3338                         (state.incomment == 0 ? Qnil :
3339                          make_number (state.incomment)),
3340                    Fcons (state.quoted ? Qt : Qnil,
3341                      Fcons (make_number (state.mindepth),
3342                        Fcons ((state.comstyle
3343                                ? (state.comstyle == ST_COMMENT_STYLE
3344                                   ? Qsyntax_table
3345                                   : make_number (state.comstyle))
3346                                : Qnil),
3347                               Fcons (((state.incomment
3348                                        || (state.instring >= 0))
3349                                       ? make_number (state.comstr_start)
3350                                       : Qnil),
3351                                      Fcons (state.levelstarts, Qnil))))))))));
3352 }
3353 \f
3354 void
3355 init_syntax_once (void)
3356 {
3357   register int i, c;
3358   Lisp_Object temp;
3359
3360   /* This has to be done here, before we call Fmake_char_table.  */
3361   Qsyntax_table = intern_c_string ("syntax-table");
3362   staticpro (&Qsyntax_table);
3363
3364   /* Intern_C_String this now in case it isn't already done.
3365      Setting this variable twice is harmless.
3366      But don't staticpro it here--that is done in alloc.c.  */
3367   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3368
3369   /* Create objects which can be shared among syntax tables.  */
3370   Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3371   for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3372     XVECTOR (Vsyntax_code_object)->contents[i]
3373       = Fcons (make_number (i), Qnil);
3374
3375   /* Now we are ready to set up this property, so we can
3376      create syntax tables.  */
3377   Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3378
3379   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3380
3381   Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3382
3383   /* Control characters should not be whitespace.  */
3384   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3385   for (i = 0; i <= ' ' - 1; i++)
3386     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3387   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3388
3389   /* Except that a few really are whitespace.  */
3390   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3391   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3392   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3393   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3394   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3395   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3396
3397   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3398   for (i = 'a'; i <= 'z'; i++)
3399     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3400   for (i = 'A'; i <= 'Z'; i++)
3401     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3402   for (i = '0'; i <= '9'; i++)
3403     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3404
3405   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3406   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3407
3408   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3409                         Fcons (make_number (Sopen), make_number (')')));
3410   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3411                         Fcons (make_number (Sclose), make_number ('(')));
3412   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3413                         Fcons (make_number (Sopen), make_number (']')));
3414   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3415                         Fcons (make_number (Sclose), make_number ('[')));
3416   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3417                         Fcons (make_number (Sopen), make_number ('}')));
3418   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3419                         Fcons (make_number (Sclose), make_number ('{')));
3420   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3421                         Fcons (make_number ((int) Sstring), Qnil));
3422   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3423                         Fcons (make_number ((int) Sescape), Qnil));
3424
3425   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3426   for (i = 0; i < 10; i++)
3427     {
3428       c = "_-+*/&|<>="[i];
3429       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3430     }
3431
3432   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3433   for (i = 0; i < 12; i++)
3434     {
3435       c = ".,;:?!#@~^'`"[i];
3436       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3437     }
3438
3439   /* All multibyte characters have syntax `word' by default.  */
3440   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3441   char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3442 }
3443
3444 void
3445 syms_of_syntax (void)
3446 {
3447   Qsyntax_table_p = intern_c_string ("syntax-table-p");
3448   staticpro (&Qsyntax_table_p);
3449
3450   staticpro (&Vsyntax_code_object);
3451
3452   staticpro (&gl_state.object);
3453   staticpro (&gl_state.global_code);
3454   staticpro (&gl_state.current_syntax_table);
3455   staticpro (&gl_state.old_prop);
3456
3457   /* Defined in regex.c */
3458   staticpro (&re_match_object);
3459
3460   Qscan_error = intern_c_string ("scan-error");
3461   staticpro (&Qscan_error);
3462   Fput (Qscan_error, Qerror_conditions,
3463         pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
3464   Fput (Qscan_error, Qerror_message,
3465         make_pure_c_string ("Scan error"));
3466
3467   DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3468                doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
3469
3470   DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3471                doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3472 Otherwise, that text property is simply ignored.
3473 See the info node `(elisp)Syntax Properties' for a description of the
3474 `syntax-table' property.  */);
3475
3476   words_include_escapes = 0;
3477   DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3478                doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
3479
3480   DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3481                doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
3482   multibyte_syntax_as_symbol = 0;
3483
3484   DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3485                open_paren_in_column_0_is_defun_start,
3486                doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun.  */);
3487   open_paren_in_column_0_is_defun_start = 1;
3488
3489
3490   DEFVAR_LISP ("find-word-boundary-function-table",
3491                Vfind_word_boundary_function_table,
3492                doc: /*
3493 Char table of functions to search for the word boundary.
3494 Each function is called with two arguments; POS and LIMIT.
3495 POS and LIMIT are character positions in the current buffer.
3496
3497 If POS is less than LIMIT, POS is at the first character of a word,
3498 and the return value of a function is a position after the last
3499 character of that word.
3500
3501 If POS is not less than LIMIT, POS is at the last character of a word,
3502 and the return value of a function is a position at the first
3503 character of that word.
3504
3505 In both cases, LIMIT bounds the search. */);
3506   Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3507
3508   defsubr (&Ssyntax_table_p);
3509   defsubr (&Ssyntax_table);
3510   defsubr (&Sstandard_syntax_table);
3511   defsubr (&Scopy_syntax_table);
3512   defsubr (&Sset_syntax_table);
3513   defsubr (&Schar_syntax);
3514   defsubr (&Smatching_paren);
3515   defsubr (&Sstring_to_syntax);
3516   defsubr (&Smodify_syntax_entry);
3517   defsubr (&Sinternal_describe_syntax_value);
3518
3519   defsubr (&Sforward_word);
3520
3521   defsubr (&Sskip_chars_forward);
3522   defsubr (&Sskip_chars_backward);
3523   defsubr (&Sskip_syntax_forward);
3524   defsubr (&Sskip_syntax_backward);
3525
3526   defsubr (&Sforward_comment);
3527   defsubr (&Sscan_lists);
3528   defsubr (&Sscan_sexps);
3529   defsubr (&Sbackward_prefix_chars);
3530   defsubr (&Sparse_partial_sexp);
3531 }