X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/3a4336e6def99c0e15e2f9ae36e6f31b3d6dad69..910565284a6fd361708d559abc3b06037f197e21:/src/category.c?ds=sidebyside

diff --git a/src/category.c b/src/category.c
index 218d5f7f78..84af413120 100644
--- a/src/category.c
+++ b/src/category.c
@@ -1,8 +1,8 @@
 /* GNU Emacs routines to deal with category tables.
-   Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+   Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
      Free Software Foundation, Inc.
    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-     2005, 2006, 2007
+     2005, 2006, 2007, 2008, 2009
      National Institute of Advanced Industrial Science and Technology (AIST)
      Registration Number H14PRO021
    Copyright (C) 2003
@@ -11,10 +11,10 @@
 
 This file is part of GNU Emacs.
 
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
 
 GNU Emacs is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -22,9 +22,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING.  If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
+along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 
 
 /* Here we handle three objects: category, category set, and category
@@ -60,6 +58,32 @@ Lisp_Object _temp_category_set;
 
 /* Category set staff.  */
 
+static Lisp_Object hash_get_category_set P_ ((Lisp_Object, Lisp_Object));
+
+static Lisp_Object
+hash_get_category_set (table, category_set)
+     Lisp_Object table, category_set;
+{
+  Lisp_Object val;
+  struct Lisp_Hash_Table *h;
+  int i;
+  unsigned hash;
+
+  if (NILP (XCHAR_TABLE (table)->extras[1]))
+    XCHAR_TABLE (table)->extras[1]
+      = make_hash_table (Qequal, make_number (DEFAULT_HASH_SIZE),
+			 make_float (DEFAULT_REHASH_SIZE),
+			 make_float (DEFAULT_REHASH_THRESHOLD),
+			 Qnil, Qnil, Qnil);
+  h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
+  i = hash_lookup (h, category_set, &hash);
+  if (i >= 0)
+    return HASH_KEY (h, i);
+  hash_put (h, category_set, Qnil, hash);
+  return category_set;
+}
+
+
 DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
        doc: /* Return a newly created category-set which contains CATEGORIES.
 CATEGORIES is a string of category mnemonics.
@@ -97,7 +121,9 @@ Lisp_Object check_category_table ();
 DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
        doc: /* Define CATEGORY as a category which is described by DOCSTRING.
 CATEGORY should be an ASCII printing character in the range ` ' to `~'.
-DOCSTRING is the documentation string of the category.
+DOCSTRING is the documentation string of the category.  The first line
+should be a terse text (preferably less than 16 characters),
+and the rest lines should be the full description.
 The category is defined only in category table TABLE, which defaults to
 the current buffer's category table.  */)
      (category, docstring, table)
@@ -370,15 +396,14 @@ then delete CATEGORY from the category set instead of adding it.  */)
 
   while (start <= end)
     {
+      from = start, to = end;
       category_set = char_table_ref_and_range (table, start, &from, &to);
       if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
 	{
 	  category_set = Fcopy_sequence (category_set);
 	  SET_CATEGORY_SET (category_set, category, set_value);
-	  if (to > end)
-	    char_table_set_range (table, start, end, category_set);
-	  else
-	    char_table_set_range (table, start, to, category_set);
+	  category_set = hash_get_category_set (table, category_set);
+	  char_table_set_range (table, start, to, category_set);
 	}
       start = to + 1;
     }
@@ -399,7 +424,8 @@ word_boundary_p (c1, c2)
   Lisp_Object tail;
   int default_result;
 
-  if (CHAR_CHARSET (c1) == CHAR_CHARSET (c2))
+  if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
+	  CHAR_TABLE_REF (Vchar_script_table, c2)))
     {
       tail = Vword_separating_categories;
       default_result = 0;
@@ -422,10 +448,12 @@ word_boundary_p (c1, c2)
       Lisp_Object elt = XCAR (tail);
 
       if (CONSP (elt)
-	  && CATEGORYP (XCAR (elt))
-	  && CATEGORYP (XCDR (elt))
-	  && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)
-	  && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))
+	  && (NILP (XCAR (elt))
+	      || (CATEGORYP (XCAR (elt))
+		  && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)))
+	  && (NILP (XCDR (elt))
+	      || (CATEGORYP (XCDR (elt))
+		  && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
 	return !default_result;
     }
   return default_result;
@@ -469,36 +497,36 @@ syms_of_category ()
 	       doc: /* List of pair (cons) of categories to determine word boundary.
 
 Emacs treats a sequence of word constituent characters as a single
-word (i.e. finds no word boundary between them) iff they belongs to
-the same charset.  But, exceptions are allowed in the following cases.
+word (i.e. finds no word boundary between them) only if they belong to
+the same script.  But, exceptions are allowed in the following cases.
 
-\(1) The case that characters are in different charsets is controlled
+\(1) The case that characters are in different scripts is controlled
 by the variable `word-combining-categories'.
 
-Emacs finds no word boundary between characters of different charsets
+Emacs finds no word boundary between characters of different scripts
 if they have categories matching some element of this list.
 
 More precisely, if an element of this list is a cons of category CAT1
 and CAT2, and a multibyte character C1 which has CAT1 is followed by
 C2 which has CAT2, there's no word boundary between C1 and C2.
 
-For instance, to tell that ASCII characters and Latin-1 characters can
-form a single word, the element `(?l . ?l)' should be in this list
-because both characters have the category `l' (Latin characters).
+For instance, to tell that Han characters followed by Hiragana
+characters can form a single word, the element `(?C . ?H)' should be
+in this list.
 
-\(2) The case that character are in the same charset is controlled by
+\(2) The case that character are in the same script is controlled by
 the variable `word-separating-categories'.
 
-Emacs find a word boundary between characters of the same charset
+Emacs finds a word boundary between characters of the same script
 if they have categories matching some element of this list.
 
 More precisely, if an element of this list is a cons of category CAT1
 and CAT2, and a multibyte character C1 which has CAT1 is followed by
 C2 which has CAT2, there's a word boundary between C1 and C2.
 
-For instance, to tell that there's a word boundary between Japanese
-Hiragana and Japanese Kanji (both are in the same charset), the
-element `(?H . ?C) should be in this list.  */);
+For instance, to tell that there's a word boundary between Hiragana
+and Katakana (both are in the same script `kana'),
+the element `(?H . ?K) should be in this list.  */);
 
   Vword_combining_categories = Qnil;