X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/3a4336e6def99c0e15e2f9ae36e6f31b3d6dad69..910565284a6fd361708d559abc3b06037f197e21:/src/category.c?ds=sidebyside diff --git a/src/category.c b/src/category.c index 218d5f7f78..84af413120 100644 --- a/src/category.c +++ b/src/category.c @@ -1,8 +1,8 @@ /* GNU Emacs routines to deal with category tables. - Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007 + Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007 + 2005, 2006, 2007, 2008, 2009 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H14PRO021 Copyright (C) 2003 @@ -11,10 +11,10 @@ This file is part of GNU Emacs. -GNU Emacs is free software; you can redistribute it and/or modify +GNU Emacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -22,9 +22,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ +along with GNU Emacs. If not, see . */ /* Here we handle three objects: category, category set, and category @@ -60,6 +58,32 @@ Lisp_Object _temp_category_set; /* Category set staff. */ +static Lisp_Object hash_get_category_set P_ ((Lisp_Object, Lisp_Object)); + +static Lisp_Object +hash_get_category_set (table, category_set) + Lisp_Object table, category_set; +{ + Lisp_Object val; + struct Lisp_Hash_Table *h; + int i; + unsigned hash; + + if (NILP (XCHAR_TABLE (table)->extras[1])) + XCHAR_TABLE (table)->extras[1] + = make_hash_table (Qequal, make_number (DEFAULT_HASH_SIZE), + make_float (DEFAULT_REHASH_SIZE), + make_float (DEFAULT_REHASH_THRESHOLD), + Qnil, Qnil, Qnil); + h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]); + i = hash_lookup (h, category_set, &hash); + if (i >= 0) + return HASH_KEY (h, i); + hash_put (h, category_set, Qnil, hash); + return category_set; +} + + DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0, doc: /* Return a newly created category-set which contains CATEGORIES. CATEGORIES is a string of category mnemonics. @@ -97,7 +121,9 @@ Lisp_Object check_category_table (); DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0, doc: /* Define CATEGORY as a category which is described by DOCSTRING. CATEGORY should be an ASCII printing character in the range ` ' to `~'. -DOCSTRING is the documentation string of the category. +DOCSTRING is the documentation string of the category. The first line +should be a terse text (preferably less than 16 characters), +and the rest lines should be the full description. The category is defined only in category table TABLE, which defaults to the current buffer's category table. */) (category, docstring, table) @@ -370,15 +396,14 @@ then delete CATEGORY from the category set instead of adding it. */) while (start <= end) { + from = start, to = end; category_set = char_table_ref_and_range (table, start, &from, &to); if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset)) { category_set = Fcopy_sequence (category_set); SET_CATEGORY_SET (category_set, category, set_value); - if (to > end) - char_table_set_range (table, start, end, category_set); - else - char_table_set_range (table, start, to, category_set); + category_set = hash_get_category_set (table, category_set); + char_table_set_range (table, start, to, category_set); } start = to + 1; } @@ -399,7 +424,8 @@ word_boundary_p (c1, c2) Lisp_Object tail; int default_result; - if (CHAR_CHARSET (c1) == CHAR_CHARSET (c2)) + if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1), + CHAR_TABLE_REF (Vchar_script_table, c2))) { tail = Vword_separating_categories; default_result = 0; @@ -422,10 +448,12 @@ word_boundary_p (c1, c2) Lisp_Object elt = XCAR (tail); if (CONSP (elt) - && CATEGORYP (XCAR (elt)) - && CATEGORYP (XCDR (elt)) - && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1) - && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2)) + && (NILP (XCAR (elt)) + || (CATEGORYP (XCAR (elt)) + && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1))) + && (NILP (XCDR (elt)) + || (CATEGORYP (XCDR (elt)) + && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2)))) return !default_result; } return default_result; @@ -469,36 +497,36 @@ syms_of_category () doc: /* List of pair (cons) of categories to determine word boundary. Emacs treats a sequence of word constituent characters as a single -word (i.e. finds no word boundary between them) iff they belongs to -the same charset. But, exceptions are allowed in the following cases. +word (i.e. finds no word boundary between them) only if they belong to +the same script. But, exceptions are allowed in the following cases. -\(1) The case that characters are in different charsets is controlled +\(1) The case that characters are in different scripts is controlled by the variable `word-combining-categories'. -Emacs finds no word boundary between characters of different charsets +Emacs finds no word boundary between characters of different scripts if they have categories matching some element of this list. More precisely, if an element of this list is a cons of category CAT1 and CAT2, and a multibyte character C1 which has CAT1 is followed by C2 which has CAT2, there's no word boundary between C1 and C2. -For instance, to tell that ASCII characters and Latin-1 characters can -form a single word, the element `(?l . ?l)' should be in this list -because both characters have the category `l' (Latin characters). +For instance, to tell that Han characters followed by Hiragana +characters can form a single word, the element `(?C . ?H)' should be +in this list. -\(2) The case that character are in the same charset is controlled by +\(2) The case that character are in the same script is controlled by the variable `word-separating-categories'. -Emacs find a word boundary between characters of the same charset +Emacs finds a word boundary between characters of the same script if they have categories matching some element of this list. More precisely, if an element of this list is a cons of category CAT1 and CAT2, and a multibyte character C1 which has CAT1 is followed by C2 which has CAT2, there's a word boundary between C1 and C2. -For instance, to tell that there's a word boundary between Japanese -Hiragana and Japanese Kanji (both are in the same charset), the -element `(?H . ?C) should be in this list. */); +For instance, to tell that there's a word boundary between Hiragana +and Katakana (both are in the same script `kana'), +the element `(?H . ?K) should be in this list. */); Vword_combining_categories = Qnil;