]> code.delx.au - gnu-emacs/blobdiff - src/syntax.h
2002-08-10 Andrew Choi <akochoi@shaw.ca>
[gnu-emacs] / src / syntax.h
index bcd90562d299f36892db2d1a63b2b923a2909595..4cbd2e2edf9070b6452daf85c90d03ea0de347ec 100644 (file)
@@ -1,5 +1,5 @@
 /* Declarations having to do with GNU Emacs syntax tables.
-   Copyright (C) 1985, 1993, 1994, 1997 Free Software Foundation, Inc.
+   Copyright (C) 1985, 93, 94, 97, 1998 Free Software Foundation, Inc.
 
 This file is part of GNU Emacs.
 
@@ -20,8 +20,7 @@ Boston, MA 02111-1307, USA.  */
 
 
 extern Lisp_Object Qsyntax_table_p;
-extern Lisp_Object Fsyntax_table_p (), Fsyntax_table (), Fset_syntax_table ();
-extern void update_syntax_table ();
+extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
 
 /* The standard syntax table is stored where it will automatically
    be used in all new buffers.  */
@@ -50,9 +49,9 @@ enum syntaxcode
     Sendcomment, /* for a comment-ending character */
     Sinherit,    /* use the standard syntax table for this character */
     Scomment_fence, /* Starts/ends comment which is delimited on the
-                      other side by a char with the same syntaxcode.  */
+                      other side by any char with the same syntaxcode.  */
     Sstring_fence,  /* Starts/ends string which is delimited on the
-                      other side by a char with the same syntaxcode.  */
+                      other side by any char with the same syntaxcode.  */
     Smax        /* Upper bound on codes that are meaningful */
   };
 
@@ -81,7 +80,7 @@ enum syntaxcode
      temp; })
 #else
 extern Lisp_Object syntax_temp;
-extern Lisp_Object syntax_parent_lookup ();
+extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
 
 #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c)           \
   (syntax_temp = XCHAR_TABLE (table)->contents[(c)],   \
@@ -106,11 +105,12 @@ extern Lisp_Object syntax_parent_lookup ();
 #  define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
 #endif
 
-#define SYNTAX_ENTRY_INT(c)                                            \
-  ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS                          \
+#define SYNTAX_ENTRY_INT(c)                            \
+  ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS                  \
    ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
-                                (unsigned char) (c))           \
-   : Faref (CURRENT_SYNTAX_TABLE, make_number ((c))))
+                                (unsigned char) (c))   \
+   : Faref (CURRENT_SYNTAX_TABLE,                      \
+           make_number (c)))
 
 /* Extract the information from the entry for character C
    in the current syntax table.  */
@@ -120,48 +120,50 @@ extern Lisp_Object syntax_parent_lookup ();
   ({ Lisp_Object temp;                                                 \
      temp = SYNTAX_ENTRY (c);                                          \
      (CONSP (temp)                                                     \
-      ? (enum syntaxcode) (XINT (XCONS (temp)->car) & 0xff)            \
+      ? (enum syntaxcode) (XINT (XCAR (temp)) & 0xff)          \
       : Swhitespace); })
 
 #define SYNTAX_WITH_FLAGS(c)                                           \
   ({ Lisp_Object temp;                                                 \
      temp = SYNTAX_ENTRY (c);                                          \
      (CONSP (temp)                                                     \
-      ? XINT (XCONS (temp)->car)                                       \
+      ? XINT (XCAR (temp))                                     \
       : (int) Swhitespace); })
 
 #define SYNTAX_MATCH(c)                                                        \
   ({ Lisp_Object temp;                                                 \
      temp = SYNTAX_ENTRY (c);                                          \
      (CONSP (temp)                                                     \
-      ? XCONS (temp)->cdr                                              \
+      ? XCDR (temp)                                            \
       : Qnil); })
 #else
 #define SYNTAX(c)                                                      \
   (syntax_temp = SYNTAX_ENTRY ((c)),                                   \
    (CONSP (syntax_temp)                                                        \
-    ? (enum syntaxcode) (XINT (XCONS (syntax_temp)->car) & 0xff)       \
+    ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff)     \
     : Swhitespace))
 
 #define SYNTAX_WITH_FLAGS(c)                                           \
   (syntax_temp = SYNTAX_ENTRY ((c)),                                   \
    (CONSP (syntax_temp)                                                        \
-    ? XINT (XCONS (syntax_temp)->car)                                  \
+    ? XINT (XCAR (syntax_temp))                                        \
     : (int) Swhitespace))
 
 #define SYNTAX_MATCH(c)                                                        \
   (syntax_temp = SYNTAX_ENTRY ((c)),                                   \
    (CONSP (syntax_temp)                                                        \
-    ? XCONS (syntax_temp)->cdr                                         \
+    ? XCDR (syntax_temp)                                               \
     : Qnil))
 #endif
 
-/* Then there are six single-bit flags that have the following meanings:
+/* Then there are seven single-bit flags that have the following meanings:
   1. This character is the first of a two-character comment-start sequence.
   2. This character is the second of a two-character comment-start sequence.
   3. This character is the first of a two-character comment-end sequence.
   4. This character is the second of a two-character comment-end sequence.
   5. This character is a prefix, for backward-prefix-chars.
+  6. see below
+  7. This character is part of a nestable comment sequence.
   Note that any two-character sequence whose first character has flag 1
   and whose second character has flag 2 will be interpreted as a comment start.
 
@@ -172,6 +174,8 @@ extern Lisp_Object syntax_parent_lookup ();
   Style a is always the default.
   */
 
+/* These macros extract a particular flag for a given character.  */
+
 #define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
 
 #define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
@@ -182,9 +186,27 @@ extern Lisp_Object syntax_parent_lookup ();
 
 #define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
 
-/* extract the comment style bit from the syntax table entry */
 #define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
 
+#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
+
+/* These macros extract specific flags from an integer
+   that holds the syntax code and the flags.  */
+
+#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
+
+#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
+
+#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
+
+#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
+
+#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
+
+#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
+
+#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
+
 /* This array, indexed by a character, contains the syntax code which that
  character signifies (as a char).  For example,
  (enum syntaxcode) syntax_spec_code['w'] is Sword.  */
@@ -195,25 +217,61 @@ extern unsigned char syntax_spec_code[0400];
 
 extern char syntax_code_spec[16];
 
-/* Make syntax table state (gl_state) good for POS, assuming it is
-   currently good for a position before POS.  */
-#define UPDATE_SYNTAX_TABLE_FORWARD(pos)                               \
-               ((pos) >= gl_state.e_property ?                         \
-                ( update_syntax_table ((pos), 1, 0), 1 ) : 0)
-
-
-/* Make syntax table state (gl_state) good for POS, assuming it is
-   currently good for a position after POS.  */
-#define UPDATE_SYNTAX_TABLE_BACKWARD(pos)                              \
-               ((pos) <= gl_state.b_property ?                         \
-                ( update_syntax_table ((pos), -1, 0), 1 ) : 0)
-
-/* Make syntax table good for POS. */
-#define UPDATE_SYNTAX_TABLE(pos)                                       \
-               ((pos) <= gl_state.b_property ?                         \
-                ( update_syntax_table ((pos), -1, 0), 1 ) :            \
-                ( (pos) >= gl_state.e_property ?                       \
-                  ( update_syntax_table ((pos), 1, 0), 1 ) : 0))
+/* Convert the byte offset BYTEPOS into a character position,
+   for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
+
+   The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
+   These macros do nothing when parse_sexp_lookup_properties is 0,
+   so we return 0 in that case, for speed.  */
+
+#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos)                             \
+  (! parse_sexp_lookup_properties                                      \
+   ? 0                                                                 \
+   : STRINGP (gl_state.object)                                         \
+   ? string_byte_to_char (gl_state.object, (bytepos))                  \
+   : BUFFERP (gl_state.object)                                         \
+   ? buf_bytepos_to_charpos (XBUFFER (gl_state.object),                        \
+                            (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1      \
+   : NILP (gl_state.object)                                            \
+   ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1               \
+   : (bytepos))
+
+/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
+   currently good for a position before CHARPOS.  */
+
+#define UPDATE_SYNTAX_TABLE_FORWARD(charpos)                   \
+  (parse_sexp_lookup_properties                                        \
+   && (charpos) >= gl_state.e_property                         \
+   ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,  \
+                          gl_state.object),                    \
+      1)                                                       \
+   : 0)
+
+/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
+   currently good for a position after CHARPOS.  */
+
+#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos)                  \
+  (parse_sexp_lookup_properties                                        \
+   && (charpos) < gl_state.b_property                          \
+   ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
+                          gl_state.object),                    \
+      1)                                                       \
+   : 0)
+
+/* Make syntax table good for CHARPOS.  */
+
+#define UPDATE_SYNTAX_TABLE(charpos)                           \
+  (parse_sexp_lookup_properties                                        \
+   && (charpos) < gl_state.b_property                          \
+   ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
+                          gl_state.object),                    \
+      1)                                                       \
+   : (parse_sexp_lookup_properties                             \
+      && (charpos) >= gl_state.e_property                      \
+      ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
+                             gl_state.object),                 \
+        1)                                                     \
+      : 0))
 
 /* This macro should be called with FROM at the start of forward
    search, or after the last position of the backward search.  It
@@ -223,40 +281,70 @@ extern char syntax_code_spec[16];
    Sign of COUNT gives the direction of the search.
  */
 
-#define SETUP_SYNTAX_TABLE(from,count)                                 \
-  gl_state.b_property = BEGV - 1;                                      \
-  gl_state.e_property = ZV + 1;                                                \
-  gl_state.use_global = 0;                                             \
-  gl_state.current_syntax_table = current_buffer->syntax_table;                \
-  if (parse_sexp_lookup_properties)                                    \
-      update_syntax_table ((count) > 0 ? (from) : (from) - 1, (count), 1, Qnil);
+#define SETUP_SYNTAX_TABLE(FROM, COUNT)                                        \
+if (1)                                                                 \
+  {                                                                    \
+    gl_state.b_property = BEGV;                                                \
+    gl_state.e_property = ZV + 1;                                      \
+    gl_state.object = Qnil;                                            \
+    gl_state.use_global = 0;                                           \
+    gl_state.offset = 0;                                               \
+    gl_state.current_syntax_table = current_buffer->syntax_table;      \
+    if (parse_sexp_lookup_properties)                                  \
+      if ((COUNT) > 0 || (FROM) > BEGV)                                        \
+        update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
+                            1, Qnil);                                  \
+  }                                                                    \
+else
 
 /* Same as above, but in OBJECT.  If OBJECT is nil, use current buffer.
-   If it is t, ignore properties altogether. */
-
-#define SETUP_SYNTAX_TABLE_FOR_OBJECT(object, from, count)             \
-  if (BUFFERP (object))                                                        \
-    {                                                                  \
-      gl_state.b_property = BEGV - 1;                                  \
-      gl_state.e_property = ZV;                                                \
-    }                                                                  \
-  else if (EQ (object, Qt))                                            \
-    {                                                                  \
-      gl_state.b_property = - 1;                                       \
-      gl_state.e_property = 1500000000;                                        \
-    }                                                                  \
-  else                                                                 \
-    {                                                                  \
-      gl_state.b_property = -1;                                                \
-      gl_state.e_property = 1 + XSTRING (object)->size;                        \
-    }                                                                  \
-  gl_state.use_global = 0;                                             \
-  gl_state.current_syntax_table = current_buffer->syntax_table;                \
-  if (parse_sexp_lookup_properties)                                    \
-      update_syntax_table (count > 0 ? (from) : (from) - 1, count, 1, object);
+   If it is t, ignore properties altogether.
+
+   This is meant for regex.c to use.  For buffers, regex.c passes arguments
+   to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
+   So if it is a buffer, we set the offset field to BEGV.  */
+
+#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT)             \
+if (1)                                                                 \
+  {                                                                    \
+    gl_state.object = (OBJECT);                                                \
+    if (BUFFERP (gl_state.object))                                     \
+      {                                                                        \
+       struct buffer *buf = XBUFFER (gl_state.object);                 \
+       gl_state.b_property = 1;                                        \
+       gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;        \
+       gl_state.offset = BUF_BEGV (buf) - 1;                           \
+      }                                                                        \
+    else if (NILP (gl_state.object))                                   \
+      {                                                                        \
+       gl_state.b_property = 1;                                        \
+       gl_state.e_property = ZV - BEGV + 1;                            \
+       gl_state.offset = BEGV - 1;                                     \
+      }                                                                        \
+    else if (EQ (gl_state.object, Qt))                                 \
+      {                                                                        \
+       gl_state.b_property = 0;                                        \
+       gl_state.e_property = 1500000000;                               \
+       gl_state.offset = 0;                                            \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       gl_state.b_property = 0;                                        \
+       gl_state.e_property = 1 + SCHARS (gl_state.object);             \
+       gl_state.offset = 0;                                            \
+      }                                                                        \
+    gl_state.use_global = 0;                                           \
+    gl_state.current_syntax_table = current_buffer->syntax_table;      \
+    if (parse_sexp_lookup_properties)                                  \
+      update_syntax_table (((FROM) + gl_state.offset                   \
+                           + (COUNT > 0 ? 0 :  -1)),                   \
+                          COUNT, 1, gl_state.object);                  \
+  }                                                                    \
+else
 
 struct gl_state_s
 {
+  Lisp_Object object;                  /* The object we are scanning. */
   int start;                           /* Where to stop. */
   int stop;                            /* Where to stop. */
   int use_global;                      /* Whether to use global_code
@@ -264,8 +352,7 @@ struct gl_state_s
   Lisp_Object global_code;             /* Syntax code of current char. */
   Lisp_Object current_syntax_table;    /* Syntax table for current pos. */
   Lisp_Object old_prop;                        /* Syntax-table prop at prev pos. */
-  int b_property;                      /* Last index where c_s_t is 
-                                          not valid. */
+  int b_property;                      /* First index where c_s_t is valid. */
   int e_property;                      /* First index where c_s_t is
                                           not valid. */
   INTERVAL forward_i;                  /* Where to start lookup on forward */
@@ -275,10 +362,12 @@ struct gl_state_s
                                           and possibly at the
                                           intervals too, depending
                                           on: */
-  char left_ok;
-  char right_ok;
+  /* Offset for positions specified to UPDATE_SYNTAX_TABLE.  */
+  int offset;
 };
 
 extern struct gl_state_s gl_state;
 extern int parse_sexp_lookup_properties;
-extern INTERVAL interval_of();
+extern INTERVAL interval_of P_ ((int, Lisp_Object));
+
+extern int scan_words P_ ((int, int));