]> code.delx.au - gnu-emacs/blob - src/syntax.h
Merge from emacs--devo--0
[gnu-emacs] / src / syntax.h
1 /* Declarations having to do with GNU Emacs syntax tables.
2 Copyright (C) 1985, 1993, 1994, 1997, 1998, 2001, 2002, 2003, 2004,
3 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22
23 extern Lisp_Object Qsyntax_table_p;
24 extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
25
26 /* The standard syntax table is stored where it will automatically
27 be used in all new buffers. */
28 #define Vstandard_syntax_table buffer_defaults.syntax_table
29
30 /* A syntax table is a chartable whose elements are cons cells
31 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
32 is not a kind of parenthesis.
33
34 The low 8 bits of CODE+FLAGS is a code, as follows: */
35
36 enum syntaxcode
37 {
38 Swhitespace, /* for a whitespace character */
39 Spunct, /* for random punctuation characters */
40 Sword, /* for a word constituent */
41 Ssymbol, /* symbol constituent but not word constituent */
42 Sopen, /* for a beginning delimiter */
43 Sclose, /* for an ending delimiter */
44 Squote, /* for a prefix character like Lisp ' */
45 Sstring, /* for a string-grouping character like Lisp " */
46 Smath, /* for delimiters like $ in Tex. */
47 Sescape, /* for a character that begins a C-style escape */
48 Scharquote, /* for a character that quotes the following character */
49 Scomment, /* for a comment-starting character */
50 Sendcomment, /* for a comment-ending character */
51 Sinherit, /* use the standard syntax table for this character */
52 Scomment_fence, /* Starts/ends comment which is delimited on the
53 other side by any char with the same syntaxcode. */
54 Sstring_fence, /* Starts/ends string which is delimited on the
55 other side by any char with the same syntaxcode. */
56 Smax /* Upper bound on codes that are meaningful */
57 };
58
59 /* Set the syntax entry VAL for char C in table TABLE. */
60
61 #define SET_RAW_SYNTAX_ENTRY(table, c, val) \
62 CHAR_TABLE_SET ((table), c, (val))
63
64 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
65 RANGE is a cons (FROM . TO) specifying the range of characters. */
66
67 #define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
68 Fset_char_table_range ((table), (range), (val))
69
70 /* SYNTAX_ENTRY fetches the information from the entry for character C
71 in syntax table TABLE, or from globally kept data (gl_state).
72 Does inheritance. */
73 /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
74 position, it is either the buffer's syntax table, or syntax table
75 found in text properties. */
76
77 #ifdef SYNTAX_ENTRY_VIA_PROPERTY
78 # define SYNTAX_ENTRY(c) \
79 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
80 # define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
81 #else
82 # define SYNTAX_ENTRY SYNTAX_ENTRY_INT
83 # define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
84 #endif
85
86 #define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
87
88 /* Extract the information from the entry for character C
89 in the current syntax table. */
90
91 #ifdef __GNUC__
92 #define SYNTAX(c) \
93 ({ Lisp_Object _syntax_temp; \
94 _syntax_temp = SYNTAX_ENTRY (c); \
95 (CONSP (_syntax_temp) \
96 ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \
97 : Swhitespace); })
98
99 #define SYNTAX_WITH_FLAGS(c) \
100 ({ Lisp_Object _syntax_temp; \
101 _syntax_temp = SYNTAX_ENTRY (c); \
102 (CONSP (_syntax_temp) \
103 ? XINT (XCAR (_syntax_temp)) \
104 : (int) Swhitespace); })
105
106 #define SYNTAX_MATCH(c) \
107 ({ Lisp_Object _syntax_temp; \
108 _syntax_temp = SYNTAX_ENTRY (c); \
109 (CONSP (_syntax_temp) \
110 ? XCDR (_syntax_temp) \
111 : Qnil); })
112 #else
113 extern Lisp_Object syntax_temp;
114 #define SYNTAX(c) \
115 (syntax_temp = SYNTAX_ENTRY ((c)), \
116 (CONSP (syntax_temp) \
117 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
118 : Swhitespace))
119
120 #define SYNTAX_WITH_FLAGS(c) \
121 (syntax_temp = SYNTAX_ENTRY ((c)), \
122 (CONSP (syntax_temp) \
123 ? XINT (XCAR (syntax_temp)) \
124 : (int) Swhitespace))
125
126 #define SYNTAX_MATCH(c) \
127 (syntax_temp = SYNTAX_ENTRY ((c)), \
128 (CONSP (syntax_temp) \
129 ? XCDR (syntax_temp) \
130 : Qnil))
131 #endif
132
133 /* Then there are seven single-bit flags that have the following meanings:
134 1. This character is the first of a two-character comment-start sequence.
135 2. This character is the second of a two-character comment-start sequence.
136 3. This character is the first of a two-character comment-end sequence.
137 4. This character is the second of a two-character comment-end sequence.
138 5. This character is a prefix, for backward-prefix-chars.
139 6. see below
140 7. This character is part of a nestable comment sequence.
141 Note that any two-character sequence whose first character has flag 1
142 and whose second character has flag 2 will be interpreted as a comment start.
143
144 bit 6 is used to discriminate between two different comment styles.
145 Languages such as C++ allow two orthogonal syntax start/end pairs
146 and bit 6 is used to determine whether a comment-end or Scommentend
147 ends style a or b. Comment start sequences can start style a or b.
148 Style a is always the default.
149 */
150
151 /* These macros extract a particular flag for a given character. */
152
153 #define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
154
155 #define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
156
157 #define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
158
159 #define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
160
161 #define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
162
163 #define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
164
165 #define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
166
167 /* These macros extract specific flags from an integer
168 that holds the syntax code and the flags. */
169
170 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
171
172 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
173
174 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
175
176 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
177
178 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
179
180 #define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
181
182 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
183
184 /* This array, indexed by a character, contains the syntax code which that
185 character signifies (as a char). For example,
186 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
187
188 extern unsigned char syntax_spec_code[0400];
189
190 /* Indexed by syntax code, give the letter that describes it. */
191
192 extern char syntax_code_spec[16];
193
194 /* Convert the byte offset BYTEPOS into a character position,
195 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
196
197 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
198 These macros do nothing when parse_sexp_lookup_properties is 0,
199 so we return 0 in that case, for speed. */
200
201 #define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
202 (! parse_sexp_lookup_properties \
203 ? 0 \
204 : STRINGP (gl_state.object) \
205 ? string_byte_to_char (gl_state.object, (bytepos)) \
206 : BUFFERP (gl_state.object) \
207 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
208 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
209 : NILP (gl_state.object) \
210 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
211 : (bytepos))
212
213 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
214 currently good for a position before CHARPOS. */
215
216 #define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
217 (parse_sexp_lookup_properties \
218 && (charpos) >= gl_state.e_property \
219 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
220 gl_state.object), \
221 1) \
222 : 0)
223
224 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
225 currently good for a position after CHARPOS. */
226
227 #define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
228 (parse_sexp_lookup_properties \
229 && (charpos) < gl_state.b_property \
230 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
231 gl_state.object), \
232 1) \
233 : 0)
234
235 /* Make syntax table good for CHARPOS. */
236
237 #define UPDATE_SYNTAX_TABLE(charpos) \
238 (parse_sexp_lookup_properties \
239 && (charpos) < gl_state.b_property \
240 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
241 gl_state.object), \
242 1) \
243 : (parse_sexp_lookup_properties \
244 && (charpos) >= gl_state.e_property \
245 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
246 gl_state.object), \
247 1) \
248 : 0))
249
250 /* This macro should be called with FROM at the start of forward
251 search, or after the last position of the backward search. It
252 makes sure that the first char is picked up with correct table, so
253 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
254 call.
255 Sign of COUNT gives the direction of the search.
256 */
257
258 #define SETUP_SYNTAX_TABLE(FROM, COUNT) \
259 if (1) \
260 { \
261 gl_state.b_property = BEGV; \
262 gl_state.e_property = ZV + 1; \
263 gl_state.object = Qnil; \
264 gl_state.use_global = 0; \
265 gl_state.offset = 0; \
266 gl_state.current_syntax_table = current_buffer->syntax_table; \
267 if (parse_sexp_lookup_properties) \
268 if ((COUNT) > 0 || (FROM) > BEGV) \
269 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
270 1, Qnil); \
271 } \
272 else
273
274 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
275 If it is t, ignore properties altogether.
276
277 This is meant for regex.c to use. For buffers, regex.c passes arguments
278 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
279 So if it is a buffer, we set the offset field to BEGV. */
280
281 #define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
282 if (1) \
283 { \
284 gl_state.object = (OBJECT); \
285 if (BUFFERP (gl_state.object)) \
286 { \
287 struct buffer *buf = XBUFFER (gl_state.object); \
288 gl_state.b_property = 1; \
289 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
290 gl_state.offset = BUF_BEGV (buf) - 1; \
291 } \
292 else if (NILP (gl_state.object)) \
293 { \
294 gl_state.b_property = 1; \
295 gl_state.e_property = ZV - BEGV + 1; \
296 gl_state.offset = BEGV - 1; \
297 } \
298 else if (EQ (gl_state.object, Qt)) \
299 { \
300 gl_state.b_property = 0; \
301 gl_state.e_property = 1500000000; \
302 gl_state.offset = 0; \
303 } \
304 else \
305 { \
306 gl_state.b_property = 0; \
307 gl_state.e_property = 1 + SCHARS (gl_state.object); \
308 gl_state.offset = 0; \
309 } \
310 gl_state.use_global = 0; \
311 gl_state.current_syntax_table = current_buffer->syntax_table; \
312 if (parse_sexp_lookup_properties) \
313 update_syntax_table (((FROM) + gl_state.offset \
314 + (COUNT > 0 ? 0 : -1)), \
315 COUNT, 1, gl_state.object); \
316 } \
317 else
318
319 struct gl_state_s
320 {
321 Lisp_Object object; /* The object we are scanning. */
322 int start; /* Where to stop. */
323 int stop; /* Where to stop. */
324 int use_global; /* Whether to use global_code
325 or c_s_t. */
326 Lisp_Object global_code; /* Syntax code of current char. */
327 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
328 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
329 int b_property; /* First index where c_s_t is valid. */
330 int e_property; /* First index where c_s_t is
331 not valid. */
332 INTERVAL forward_i; /* Where to start lookup on forward */
333 INTERVAL backward_i; /* or backward movement. The
334 data in c_s_t is valid
335 between these intervals,
336 and possibly at the
337 intervals too, depending
338 on: */
339 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
340 int offset;
341 };
342
343 extern struct gl_state_s gl_state;
344 extern int parse_sexp_lookup_properties;
345 extern INTERVAL interval_of P_ ((int, Lisp_Object));
346
347 extern int scan_words P_ ((int, int));
348
349 /* arch-tag: 28833cca-cd73-4741-8c85-a3111166a0e0
350 (do not change this comment) */