]> code.delx.au - gnu-emacs/blob - src/category.c
* category.h (CHAR_HAS_CATEGORY): Define as inline.
[gnu-emacs] / src / category.c
1 /* GNU Emacs routines to deal with category tables.
2
3 Copyright (C) 1998, 2001-2012 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8 Copyright (C) 2003
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
11
12 This file is part of GNU Emacs.
13
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
26
27
28 /* Here we handle three objects: category, category set, and category
29 table. Read comments in the file category.h to understand them. */
30
31 #include <config.h>
32 #include <ctype.h>
33 #include <setjmp.h>
34 #include "lisp.h"
35 #include "buffer.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "category.h"
39 #include "keymap.h"
40
41 /* The version number of the latest category table. Each category
42 table has a unique version number. It is assigned a new number
43 also when it is modified. When a regular expression is compiled
44 into the struct re_pattern_buffer, the version number of the
45 category table (of the current buffer) at that moment is also
46 embedded in the structure.
47
48 For the moment, we are not using this feature. */
49 static int category_table_version;
50
51 static Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
52
53 /* Make CATEGORY_SET includes (if VAL is t) or excludes (if VAL is
54 nil) CATEGORY. */
55 #define SET_CATEGORY_SET(category_set, category, val) \
56 set_category_set (category_set, category, val)
57 static void set_category_set (Lisp_Object, Lisp_Object, Lisp_Object);
58 \f
59 /* Category set staff. */
60
61 static Lisp_Object hash_get_category_set (Lisp_Object, Lisp_Object);
62
63 static Lisp_Object
64 hash_get_category_set (Lisp_Object table, Lisp_Object category_set)
65 {
66 struct Lisp_Hash_Table *h;
67 ptrdiff_t i;
68 EMACS_UINT hash;
69
70 if (NILP (XCHAR_TABLE (table)->extras[1]))
71 XCHAR_TABLE (table)->extras[1]
72 = make_hash_table (Qequal, make_number (DEFAULT_HASH_SIZE),
73 make_float (DEFAULT_REHASH_SIZE),
74 make_float (DEFAULT_REHASH_THRESHOLD),
75 Qnil, Qnil, Qnil);
76 h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
77 i = hash_lookup (h, category_set, &hash);
78 if (i >= 0)
79 return HASH_KEY (h, i);
80 hash_put (h, category_set, Qnil, hash);
81 return category_set;
82 }
83
84
85 DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
86 doc: /* Return a newly created category-set which contains CATEGORIES.
87 CATEGORIES is a string of category mnemonics.
88 The value is a bool-vector which has t at the indices corresponding to
89 those categories. */)
90 (Lisp_Object categories)
91 {
92 Lisp_Object val;
93 int len;
94
95 CHECK_STRING (categories);
96 val = MAKE_CATEGORY_SET;
97
98 if (STRING_MULTIBYTE (categories))
99 error ("Multibyte string in `make-category-set'");
100
101 len = SCHARS (categories);
102 while (--len >= 0)
103 {
104 Lisp_Object category;
105
106 XSETFASTINT (category, SREF (categories, len));
107 CHECK_CATEGORY (category);
108 SET_CATEGORY_SET (val, category, Qt);
109 }
110 return val;
111 }
112
113 \f
114 /* Category staff. */
115
116 static Lisp_Object check_category_table (Lisp_Object table);
117
118 DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
119 doc: /* Define CATEGORY as a category which is described by DOCSTRING.
120 CATEGORY should be an ASCII printing character in the range ` ' to `~'.
121 DOCSTRING is the documentation string of the category. The first line
122 should be a terse text (preferably less than 16 characters),
123 and the rest lines should be the full description.
124 The category is defined only in category table TABLE, which defaults to
125 the current buffer's category table. */)
126 (Lisp_Object category, Lisp_Object docstring, Lisp_Object table)
127 {
128 CHECK_CATEGORY (category);
129 CHECK_STRING (docstring);
130 table = check_category_table (table);
131
132 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
133 error ("Category `%c' is already defined", (int) XFASTINT (category));
134 if (!NILP (Vpurify_flag))
135 docstring = Fpurecopy (docstring);
136 CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
137
138 return Qnil;
139 }
140
141 DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
142 doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
143 TABLE should be a category table and defaults to the current buffer's
144 category table. */)
145 (Lisp_Object category, Lisp_Object table)
146 {
147 CHECK_CATEGORY (category);
148 table = check_category_table (table);
149
150 return CATEGORY_DOCSTRING (table, XFASTINT (category));
151 }
152
153 DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
154 0, 1, 0,
155 doc: /* Return a category which is not yet defined in TABLE.
156 If no category remains available, return nil.
157 The optional argument TABLE specifies which category table to modify;
158 it defaults to the current buffer's category table. */)
159 (Lisp_Object table)
160 {
161 int i;
162
163 table = check_category_table (table);
164
165 for (i = ' '; i <= '~'; i++)
166 if (NILP (CATEGORY_DOCSTRING (table, i)))
167 return make_number (i);
168
169 return Qnil;
170 }
171
172 \f
173 /* Category-table staff. */
174
175 DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
176 doc: /* Return t if ARG is a category table. */)
177 (Lisp_Object arg)
178 {
179 if (CHAR_TABLE_P (arg)
180 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
181 return Qt;
182 return Qnil;
183 }
184
185 /* If TABLE is nil, return the current category table. If TABLE is
186 not nil, check the validity of TABLE as a category table. If
187 valid, return TABLE itself, but if not valid, signal an error of
188 wrong-type-argument. */
189
190 static Lisp_Object
191 check_category_table (Lisp_Object table)
192 {
193 if (NILP (table))
194 return BVAR (current_buffer, category_table);
195 CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
196 return table;
197 }
198
199 DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
200 doc: /* Return the current category table.
201 This is the one specified by the current buffer. */)
202 (void)
203 {
204 return BVAR (current_buffer, category_table);
205 }
206
207 DEFUN ("standard-category-table", Fstandard_category_table,
208 Sstandard_category_table, 0, 0, 0,
209 doc: /* Return the standard category table.
210 This is the one used for new buffers. */)
211 (void)
212 {
213 return Vstandard_category_table;
214 }
215
216
217 static void
218 copy_category_entry (Lisp_Object table, Lisp_Object c, Lisp_Object val)
219 {
220 val = Fcopy_sequence (val);
221 if (CONSP (c))
222 char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
223 else
224 char_table_set (table, XINT (c), val);
225 }
226
227 /* Return a copy of category table TABLE. We can't simply use the
228 function copy-sequence because no contents should be shared between
229 the original and the copy. This function is called recursively by
230 binding TABLE to a sub char table. */
231
232 static Lisp_Object
233 copy_category_table (Lisp_Object table)
234 {
235 table = copy_char_table (table);
236
237 if (! NILP (XCHAR_TABLE (table)->defalt))
238 XCHAR_TABLE (table)->defalt
239 = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
240 XCHAR_TABLE (table)->extras[0]
241 = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]);
242 map_char_table (copy_category_entry, Qnil, table, table);
243
244 return table;
245 }
246
247 DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
248 0, 1, 0,
249 doc: /* Construct a new category table and return it.
250 It is a copy of the TABLE, which defaults to the standard category table. */)
251 (Lisp_Object table)
252 {
253 if (!NILP (table))
254 check_category_table (table);
255 else
256 table = Vstandard_category_table;
257
258 return copy_category_table (table);
259 }
260
261 DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
262 0, 0, 0,
263 doc: /* Construct a new and empty category table and return it. */)
264 (void)
265 {
266 Lisp_Object val;
267 int i;
268
269 val = Fmake_char_table (Qcategory_table, Qnil);
270 XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
271 for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
272 XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET;
273 Fset_char_table_extra_slot (val, make_number (0),
274 Fmake_vector (make_number (95), Qnil));
275 return val;
276 }
277
278 DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
279 doc: /* Specify TABLE as the category table for the current buffer.
280 Return TABLE. */)
281 (Lisp_Object table)
282 {
283 int idx;
284 table = check_category_table (table);
285 BVAR (current_buffer, category_table) = table;
286 /* Indicate that this buffer now has a specified category table. */
287 idx = PER_BUFFER_VAR_IDX (category_table);
288 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
289 return table;
290 }
291
292 \f
293 Lisp_Object
294 char_category_set (int c)
295 {
296 return CHAR_TABLE_REF (BVAR (current_buffer, category_table), c);
297 }
298
299 DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
300 doc: /* Return the category set of CHAR.
301 usage: (char-category-set CHAR) */)
302 (Lisp_Object ch)
303 {
304 CHECK_CHARACTER (ch);
305 return CATEGORY_SET (XFASTINT (ch));
306 }
307
308 DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
309 Scategory_set_mnemonics, 1, 1, 0,
310 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
311 CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
312 that are indexes where t occurs in the bool-vector.
313 The return value is a string containing those same categories. */)
314 (Lisp_Object category_set)
315 {
316 int i, j;
317 char str[96];
318
319 CHECK_CATEGORY_SET (category_set);
320
321 j = 0;
322 for (i = 32; i < 127; i++)
323 if (CATEGORY_MEMBER (i, category_set))
324 str[j++] = i;
325 str[j] = '\0';
326
327 return build_string (str);
328 }
329
330 static void
331 set_category_set (Lisp_Object category_set, Lisp_Object category, Lisp_Object val)
332 {
333 do {
334 int idx = XINT (category) / 8;
335 unsigned char bits = 1 << (XINT (category) % 8);
336
337 if (NILP (val))
338 XCATEGORY_SET (category_set)->data[idx] &= ~bits;
339 else
340 XCATEGORY_SET (category_set)->data[idx] |= bits;
341 } while (0);
342 }
343
344 DEFUN ("modify-category-entry", Fmodify_category_entry,
345 Smodify_category_entry, 2, 4, 0,
346 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
347 The category is changed only for table TABLE, which defaults to
348 the current buffer's category table.
349 CHARACTER can be either a single character or a cons representing the
350 lower and upper ends of an inclusive character range to modify.
351 If optional fourth argument RESET is non-nil,
352 then delete CATEGORY from the category set instead of adding it. */)
353 (Lisp_Object character, Lisp_Object category, Lisp_Object table, Lisp_Object reset)
354 {
355 Lisp_Object set_value; /* Actual value to be set in category sets. */
356 Lisp_Object category_set;
357 int start, end;
358 int from, to;
359
360 if (INTEGERP (character))
361 {
362 CHECK_CHARACTER (character);
363 start = end = XFASTINT (character);
364 }
365 else
366 {
367 CHECK_CONS (character);
368 CHECK_CHARACTER_CAR (character);
369 CHECK_CHARACTER_CDR (character);
370 start = XFASTINT (XCAR (character));
371 end = XFASTINT (XCDR (character));
372 }
373
374 CHECK_CATEGORY (category);
375 table = check_category_table (table);
376
377 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
378 error ("Undefined category: %c", (int) XFASTINT (category));
379
380 set_value = NILP (reset) ? Qt : Qnil;
381
382 while (start <= end)
383 {
384 from = start, to = end;
385 category_set = char_table_ref_and_range (table, start, &from, &to);
386 if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
387 {
388 category_set = Fcopy_sequence (category_set);
389 SET_CATEGORY_SET (category_set, category, set_value);
390 category_set = hash_get_category_set (table, category_set);
391 char_table_set_range (table, start, to, category_set);
392 }
393 start = to + 1;
394 }
395
396 return Qnil;
397 }
398 \f
399 /* Return 1 if there is a word boundary between two word-constituent
400 characters C1 and C2 if they appear in this order, else return 0.
401 Use the macro WORD_BOUNDARY_P instead of calling this function
402 directly. */
403
404 int
405 word_boundary_p (int c1, int c2)
406 {
407 Lisp_Object category_set1, category_set2;
408 Lisp_Object tail;
409 int default_result;
410
411 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
412 CHAR_TABLE_REF (Vchar_script_table, c2)))
413 {
414 tail = Vword_separating_categories;
415 default_result = 0;
416 }
417 else
418 {
419 tail = Vword_combining_categories;
420 default_result = 1;
421 }
422
423 category_set1 = CATEGORY_SET (c1);
424 if (NILP (category_set1))
425 return default_result;
426 category_set2 = CATEGORY_SET (c2);
427 if (NILP (category_set2))
428 return default_result;
429
430 for (; CONSP (tail); tail = XCDR (tail))
431 {
432 Lisp_Object elt = XCAR (tail);
433
434 if (CONSP (elt)
435 && (NILP (XCAR (elt))
436 || (CATEGORYP (XCAR (elt))
437 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)
438 && ! CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set2)))
439 && (NILP (XCDR (elt))
440 || (CATEGORYP (XCDR (elt))
441 && ! CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set1)
442 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
443 return !default_result;
444 }
445 return default_result;
446 }
447
448 \f
449 void
450 init_category_once (void)
451 {
452 /* This has to be done here, before we call Fmake_char_table. */
453 DEFSYM (Qcategory_table, "category-table");
454
455 /* Intern this now in case it isn't already done.
456 Setting this variable twice is harmless.
457 But don't staticpro it here--that is done in alloc.c. */
458 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
459
460 /* Now we are ready to set up this property, so we can
461 create category tables. */
462 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
463
464 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
465 /* Set a category set which contains nothing to the default. */
466 XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
467 Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
468 Fmake_vector (make_number (95), Qnil));
469 }
470
471 void
472 syms_of_category (void)
473 {
474 DEFSYM (Qcategoryp, "categoryp");
475 DEFSYM (Qcategorysetp, "categorysetp");
476 DEFSYM (Qcategory_table_p, "category-table-p");
477
478 DEFVAR_LISP ("word-combining-categories", Vword_combining_categories,
479 doc: /* List of pair (cons) of categories to determine word boundary.
480
481 Emacs treats a sequence of word constituent characters as a single
482 word (i.e. finds no word boundary between them) only if they belong to
483 the same script. But, exceptions are allowed in the following cases.
484
485 \(1) The case that characters are in different scripts is controlled
486 by the variable `word-combining-categories'.
487
488 Emacs finds no word boundary between characters of different scripts
489 if they have categories matching some element of this list.
490
491 More precisely, if an element of this list is a cons of category CAT1
492 and CAT2, and a multibyte character C1 which has CAT1 is followed by
493 C2 which has CAT2, there's no word boundary between C1 and C2.
494
495 For instance, to tell that Han characters followed by Hiragana
496 characters can form a single word, the element `(?C . ?H)' should be
497 in this list.
498
499 \(2) The case that character are in the same script is controlled by
500 the variable `word-separating-categories'.
501
502 Emacs finds a word boundary between characters of the same script
503 if they have categories matching some element of this list.
504
505 More precisely, if an element of this list is a cons of category CAT1
506 and CAT2, and a multibyte character C1 which has CAT1 but not CAT2 is
507 followed by C2 which has CAT2 but not CAT1, there's a word boundary
508 between C1 and C2.
509
510 For instance, to tell that there's a word boundary between Hiragana
511 and Katakana (both are in the same script `kana'),
512 the element `(?H . ?K) should be in this list. */);
513
514 Vword_combining_categories = Qnil;
515
516 DEFVAR_LISP ("word-separating-categories", Vword_separating_categories,
517 doc: /* List of pair (cons) of categories to determine word boundary.
518 See the documentation of the variable `word-combining-categories'. */);
519
520 Vword_separating_categories = Qnil;
521
522 defsubr (&Smake_category_set);
523 defsubr (&Sdefine_category);
524 defsubr (&Scategory_docstring);
525 defsubr (&Sget_unused_category);
526 defsubr (&Scategory_table_p);
527 defsubr (&Scategory_table);
528 defsubr (&Sstandard_category_table);
529 defsubr (&Scopy_category_table);
530 defsubr (&Smake_category_table);
531 defsubr (&Sset_category_table);
532 defsubr (&Schar_category_set);
533 defsubr (&Scategory_set_mnemonics);
534 defsubr (&Smodify_category_entry);
535
536 category_table_version = 0;
537 }