code.delx.au - gnu-emacs/blob - lisp/international/characters.el

   1 ;;; characters.el --- set syntax and category for multibyte characters
   2
   3 ;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
   4 ;;   Free Software Foundation, Inc.
   5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   6 ;;   2005, 2006, 2007, 2008, 2009
   7 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   8 ;;   Registration Number H14PRO021
   9 ;; Copyright (C) 2003
  10 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
  11 ;;   Registration Number H13PRO009
  12
  13 ;; Keywords: multibyte character, character set, syntax, category
  14
  15 ;; This file is part of GNU Emacs.
  16
  17 ;; GNU Emacs is free software: you can redistribute it and/or modify
  18 ;; it under the terms of the GNU General Public License as published by
  19 ;; the Free Software Foundation, either version 3 of the License, or
  20 ;; (at your option) any later version.
  21
  22 ;; GNU Emacs is distributed in the hope that it will be useful,
  23 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  24 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  25 ;; GNU General Public License for more details.
  26
  27 ;; You should have received a copy of the GNU General Public License
  28 ;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
  29
  30 ;;; Commentary:
  31
  32 ;;; Code:
  33
  34 ;;; Predefined categories.
  35
  36 ;; For each character set.
  37
  38 (define-category ?a "ASCII
  39 ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])")
  40 (define-category ?l "Latin")
  41 (define-category ?t "Thai")
  42 (define-category ?g "Greek")
  43 (define-category ?b "Arabic")
  44 (define-category ?w "Hebrew")
  45 (define-category ?y "Cyrillic")
  46 (define-category ?k "Katakana
  47 Japanese katakana")
  48 (define-category ?r "Roman
  49 Japanese roman")
  50 (define-category ?c "Chinese")
  51 (define-category ?j "Japanese")
  52 (define-category ?h "Korean")
  53 (define-category ?e "Ethiopic
  54 Ethiopic (Ge'ez)")
  55 (define-category ?v "Viet
  56 Vietnamese")
  57 (define-category ?i "Indian")
  58 (define-category ?o "Lao")
  59 (define-category ?q "Tibetan")
  60
  61 ;; For each group (row) of 2-byte character sets.
  62
  63 (define-category ?A "2-byte alnum
  64 Alpha-numeric characters of 2-byte character sets")
  65 (define-category ?C "2-byte han
  66 Chinese (Han) characters of 2-byte character sets")
  67 (define-category ?G "2-byte Greek
  68 Greek characters of 2-byte character sets")
  69 (define-category ?H "2-byte Hiragana
  70 Japanese Hiragana characters of 2-byte character sets")
  71 (define-category ?K "2-byte Katakana
  72 Japanese Katakana characters of 2-byte character sets")
  73 (define-category ?N "2-byte Korean
  74 Korean Hangul characters of 2-byte character sets")
  75 (define-category ?Y "2-byte Cyrillic
  76 Cyrillic characters of 2-byte character sets")
  77 (define-category ?I "Indian Glyphs")
  78
  79 ;; For phonetic classifications.
  80
  81 (define-category ?0 "consonant")
  82 (define-category ?1 "base vowel
  83 base (independent) vowel")
  84 (define-category ?2 "upper diacritic
  85 upper diacritical mark (including upper vowel)")
  86 (define-category ?3 "lower diacritic
  87 lower diacritical mark (including lower vowel)")
  88 (define-category ?4 "combining tone
  89 combining tone mark")
  90 (define-category ?5 "symbol")
  91 (define-category ?6 "digit")
  92 (define-category ?7 "vowel diacritic
  93 vowel-modifying diacritical mark")
  94 (define-category ?8 "vowel-signs")
  95 (define-category ?9 "semivowel lower")
  96
  97 ;; For filling.
  98 (define-category ?| "line breakable
  99 While filling, we can break a line at this character.")
 100
 101 ;; For indentation calculation.
 102 (define-category ?\s
 103   "space for indent
 104 This character counts as a space for indentation purposes.")
 105
 106 ;; Keep the following for `kinsoku' processing.  See comments in
 107 ;; kinsoku.el.
 108 (define-category ?> "Not at bol
 109 A character which can't be placed at beginning of line.")
 110 (define-category ?< "Not at eol
 111 A character which can't be placed at end of line.")
 112
 113 ;; Combining
 114 (define-category ?^ "Combining
 115 Combining diacritic or mark")
 116 \f
 117 ;;; Setting syntax and category.
 118
 119 ;; ASCII
 120
 121 ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
 122 (modify-category-entry '(32 . 127) ?a)
 123 (modify-category-entry '(32 . 127) ?l)
 124
 125 ;; Deal with the CJK charsets first.  Since the syntax of blocks is
 126 ;; defined per charset, and the charsets may contain e.g. Latin
 127 ;; characters, we end up with the wrong syntax definitions if we're
 128 ;; not careful.
 129
 130 ;; Chinese characters (Unicode)
 131 (modify-category-entry '(#x2E80 . #x312F) ?|)
 132 (modify-category-entry '(#x3190 . #x33FF) ?|)
 133 (modify-category-entry '(#x3400 . #x9FAF) ?C)
 134 (modify-category-entry '(#x3400 . #x9FAF) ?c)
 135 (modify-category-entry '(#x3400 . #x9FAF) ?|)
 136 (modify-category-entry '(#xF900 . #xFAFF) ?C)
 137 (modify-category-entry '(#xF900 . #xFAFF) ?c)
 138 (modify-category-entry '(#xF900 . #xFAFF) ?|)
 139 (modify-category-entry '(#x20000 . #x2AFFF) ?|)
 140 (modify-category-entry '(#x2F800 . #x2FFFF) ?|)
 141 (modify-category-entry '(#x20000 . #x2AFFF) ?C)
 142 (modify-category-entry '(#x2F800 . #x2FFFF) ?C)
 143
 144
 145 ;; Chinese character set (GB2312)
 146
 147 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
 148 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
 149 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
 150
 151 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
 152 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
 153 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
 154 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
 155 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
 156 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
 157 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
 158 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
 159 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
 160
 161 ;; Chinese character set (BIG5)
 162
 163 (map-charset-chars #'modify-category-entry 'big5 ?c)
 164 (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA25F)
 165 (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
 166 (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DF)
 167
 168 ;; Chinese character set (CNS11643)
 169
 170 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
 171              chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
 172              chinese-cns11643-7))
 173   (map-charset-chars #'modify-category-entry c ?c)
 174   (if (eq c 'chinese-cns11643-1)
 175       (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
 176     (map-charset-chars #'modify-category-entry c ?C)))
 177
 178 ;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213)
 179
 180 (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
 181
 182 (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
 183
 184 (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212
 185                                japanese-jisx0213-1 japanese-jisx0213-2))
 186   (map-charset-chars #'modify-category-entry l ?j))
 187
 188 ;; Unicode equivalents of JISX0201-kana
 189 (let ((range '(#xff61 . #xff9f)))
 190   (modify-category-entry range  ?k)
 191   (modify-category-entry range ?j)
 192   (modify-category-entry range ?\|))
 193
 194 ;; Katakana block
 195 (let ((range '(#x30a0 . #x30ff)))
 196   ;; ?K is double width, ?k isn't specified
 197   (modify-category-entry range ?K)
 198   (modify-category-entry range ?\|))
 199
 200 ;; Hiragana block
 201 (let ((range '(#x3040 . #x309d)))
 202   ;; ?H is actually defined to be double width
 203   ;;(modify-category-entry range ?H)
 204   (modify-category-entry range ?\|)
 205   )
 206
 207 ;; JISX0208
 208 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
 209 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
 210 (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
 211   (dolist (elt chars)
 212     (modify-syntax-entry (car chars) "w")))
 213
 214 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
 215 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
 216 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
 217 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
 218 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
 219 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
 220 (modify-category-entry ?ー ?K)
 221 (let ((chars '(?゛ ?゜)))
 222   (while chars
 223     (modify-category-entry (car chars) ?K)
 224     (modify-category-entry (car chars) ?H)
 225     (setq chars (cdr chars))))
 226 (let ((chars '(?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
 227   (while chars
 228     (modify-category-entry (car chars) ?C)
 229     (setq chars (cdr chars))))
 230
 231 ;; JISX0212
 232
 233 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
 234
 235 ;; JISX0201-Kana
 236
 237 (let ((chars '(?｡ ?､ ?･)))
 238   (while chars
 239     (modify-syntax-entry (car chars) ".")
 240     (setq chars (cdr chars))))
 241
 242 (modify-syntax-entry ?\｢ "(｣")
 243 (modify-syntax-entry ?\｣ "(｢")
 244
 245 ;; Korean character set (KSC5601)
 246
 247 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
 248
 249 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
 250 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E)
 251 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E)
 252 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E)
 253 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
 254 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
 255 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
 256 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
 257 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
 258 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
 259 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
 260
 261 ;; These are in more than one charset.
 262 (let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛"
 263                       "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄"
 264                       "（）［］｛｝"))
 265       open close)
 266   (dotimes (i (/ (length parens) 2))
 267     (setq open (aref parens (* i 2))
 268           close (aref parens (1+ (* i 2))))
 269     (modify-syntax-entry open (format "(%c" close))
 270     (modify-syntax-entry close (format ")%c" open))))
 271
 272 ;; Arabic character set
 273
 274 (let ((charsets '(arabic-iso8859-6
 275                   arabic-digit
 276                   arabic-1-column
 277                   arabic-2-column)))
 278   (while charsets
 279     (map-charset-chars #'modify-category-entry (car charsets) ?b)
 280     (setq charsets (cdr charsets))))
 281 (modify-category-entry '(#x600 . #x6ff) ?b)
 282 (modify-category-entry '(#xfb50 . #xfdff) ?b)
 283 (modify-category-entry '(#xfe70 . #xfefe) ?b)
 284
 285 ;; Cyrillic character set (ISO-8859-5)
 286
 287 (modify-syntax-entry ?№ ".")
 288
 289 ;; Ethiopic character set
 290
 291 (modify-category-entry '(#x1200 . #x1399) ?e)
 292 (modify-category-entry '(#x2d80 . #x2dde) ?e)
 293 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨)))
 294   (while chars
 295     (modify-syntax-entry (car chars) ".")
 296     (setq chars (cdr chars))))
 297 (map-charset-chars #'modify-category-entry 'ethiopic ?e)
 298
 299 ;; Hebrew character set (ISO-8859-8)
 300
 301 (modify-syntax-entry #x5be ".") ; MAQAF
 302 (modify-syntax-entry #x5c0 ".") ; PASEQ
 303 (modify-syntax-entry #x5c3 ".") ; SOF PASUQ
 304 (modify-syntax-entry #x5f3 ".") ; GERESH
 305 (modify-syntax-entry #x5f4 ".") ; GERSHAYIM
 306
 307 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
 308
 309 (modify-category-entry '(#x901 . #x970) ?i)
 310 (map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
 311 (map-charset-chars #'modify-category-entry 'indian-2-column ?i)
 312
 313 ;; Lao character set
 314
 315 (modify-category-entry '(#xe80 . #xeff) ?o)
 316 (map-charset-chars #'modify-category-entry 'lao ?o)
 317
 318 (let ((deflist  '(("ກ-ຮ"    "w"     ?0) ; consonant
 319                   ("ະາຳຽເ-ໄ"        "w"     ?1) ; vowel base
 320                   ("ັິ-ືົໍ"   "w"     ?2) ; vowel upper
 321                   ("ຸູ"     "w"     ?3) ; vowel lower
 322                   ("່-໋"    "w"     ?4) ; tone mark
 323                   ("ຼຽ"     "w"     ?9) ; semivowel lower
 324                   ("໐-໙"    "w"     ?6) ; digit
 325                   ("ຯໆ"     "_"     ?5) ; symbol
 326                   ))
 327       elm chars len syntax category to ch i)
 328   (while deflist
 329     (setq elm (car deflist))
 330     (setq chars (car elm)
 331           len (length chars)
 332           syntax (nth 1 elm)
 333           category (nth 2 elm)
 334           i 0)
 335     (while (< i len)
 336       (if (= (aref chars i) ?-)
 337           (setq i (1+ i)
 338                 to (aref chars i))
 339         (setq ch (aref chars i)
 340               to ch))
 341       (while (<= ch to)
 342         (unless (string-equal syntax "w")
 343           (modify-syntax-entry ch syntax))
 344         (modify-category-entry ch category)
 345         (setq ch (1+ ch)))
 346       (setq i (1+ i)))
 347     (setq deflist (cdr deflist))))
 348
 349 ;; Thai character set (TIS620)
 350
 351 (modify-category-entry '(#xe00 . #xe7f) ?t)
 352 (map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
 353
 354 (let ((deflist  '(;; chars      syntax  category
 355                   ("ก-รลว-ฮ"  "w"     ?0) ; consonant
 356                   ("ฤฦะาำเ-ๅ"     "w"     ?1) ; vowel base
 357                   ("ัิ-ื็๎"   "w"     ?2) ; vowel upper
 358                   ("ุ-ฺ"    "w"     ?3) ; vowel lower
 359                   ("่-ํ"    "w"     ?4) ; tone mark
 360                   ("๐-๙"    "w"     ?6) ; digit
 361                   ("ฯๆ฿๏๚๛" "_"     ?5) ; symbol
 362                   ))
 363       elm chars len syntax category to ch i)
 364   (while deflist
 365     (setq elm (car deflist))
 366     (setq chars (car elm)
 367           len (length chars)
 368           syntax (nth 1 elm)
 369           category (nth 2 elm)
 370           i 0)
 371     (while (< i len)
 372       (if (= (aref chars i) ?-)
 373           (setq i (1+ i)
 374                 to (aref chars i))
 375         (setq ch (aref chars i)
 376               to ch))
 377       (while (<= ch to)
 378         (unless (string-equal syntax "w")
 379           (modify-syntax-entry ch syntax))
 380         (modify-category-entry ch category)
 381         (setq ch (1+ ch)))
 382       (setq i (1+ i)))
 383     (setq deflist (cdr deflist))))
 384
 385 ;; Tibetan character set
 386
 387 (modify-category-entry '(#xf00 . #xfff) ?q)
 388 (map-charset-chars #'modify-category-entry 'tibetan ?q)
 389 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
 390
 391 (let ((deflist  '(;; chars             syntax category
 392                   ("ཀ-ཀྵཪ"         "w"     ?0) ; consonant
 393                   ("ྐ-ྐྵྺྻྼ"       "w"     ?0) ;
 394                   ("ིེཻོཽྀ"       "w"       ?2) ; upper vowel
 395                   ("ཾྂྃ྆྇ྈྉྊྋ" "w"    ?2) ; upper modifier
 396                   ("྄ཱུ༙༵༷"       "w"       ?3) ; lowel vowel/modifier
 397                   ("཰"                "w" ?3)             ; invisible vowel a
 398                   ("༠-༩༪-༳"             "w"     ?6) ; digit
 399                   ("་།-༒༔ཿ"        "."     ?|) ; line-break char
 400                   ("་།༏༐༑༔ཿ"            "."     ?|) ;
 401                   ("༈་།-༒༔ཿ༽༴"  "."     ?>) ; prohibition
 402                   ("་།༏༐༑༔ཿ"            "."     ?>) ;
 403                   ("ༀ-༊༼࿁࿂྅"      "."     ?<) ; prohibition
 404                   ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
 405                   ))
 406       elm chars len syntax category to ch i)
 407   (while deflist
 408     (setq elm (car deflist))
 409     (setq chars (car elm)
 410           len (length chars)
 411           syntax (nth 1 elm)
 412           category (nth 2 elm)
 413           i 0)
 414     (while (< i len)
 415       (if (= (aref chars i) ?-)
 416           (setq i (1+ i)
 417                 to (aref chars i))
 418         (setq ch (aref chars i)
 419               to ch))
 420       (while (<= ch to)
 421         (unless (string-equal syntax "w")
 422           (modify-syntax-entry ch syntax))
 423         (modify-category-entry ch category)
 424         (setq ch (1+ ch)))
 425       (setq i (1+ i)))
 426     (setq deflist (cdr deflist))))
 427
 428 ;; Vietnamese character set
 429
 430 ;; To make a word with Latin characters
 431 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
 432 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
 433
 434 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
 435 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
 436
 437 (let ((tbl (standard-case-table))
 438       (i 32))
 439   (while (< i 128)
 440     (let* ((char (decode-char 'vietnamese-viscii-upper i))
 441            (charl (decode-char 'vietnamese-viscii-lower i))
 442            (uc (encode-char char 'ucs))
 443            (lc (encode-char charl 'ucs)))
 444       (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
 445                             tbl)
 446       (if uc (modify-category-entry uc ?v))
 447       (if lc (modify-category-entry lc ?v)))
 448     (setq i (1+ i))))
 449
 450 ;; Tai Viet
 451 (let ((deflist '(;; chars       syntax  category
 452                  ((?ꪀ.  ?ꪯ) "w"     ?0) ; cosonant
 453                  ("ꪱꪵꪶ"           "w"     ?1) ; vowel base
 454                  ((?ꪹ . ?ꪽ) "w"     ?1) ; vowel base
 455                  ("ꪰꪲꪳꪷꪸꪾ"  "w"     ?2) ; vowel upper
 456                  ("ꪴ"         "w"     ?3) ; vowel lower
 457                  ("ꫀꫂ"              "w"     ?1) ; non-combining tone-mark
 458                  ("꪿꫁"              "w"     ?4) ; combining tone-mark
 459                  ((?ꫛ . ?꫟) "_"     ?5) ; symbol
 460                  )))
 461   (dolist (elm deflist)
 462     (let ((chars (car elm))
 463           (syntax (nth 1 elm))
 464           (category (nth 2 elm)))
 465       (if (consp chars)
 466           (progn
 467             (modify-syntax-entry chars syntax)
 468             (modify-category-entry chars category))
 469         (mapc #'(lambda (x)
 470                   (modify-syntax-entry x syntax)
 471                   (modify-category-entry x category))
 472               chars)))))
 473
 474 ;; Latin
 475
 476 (modify-category-entry '(#x80 . #x024F) ?l)
 477
 478 (let ((tbl (standard-case-table)) c)
 479
 480   ;; Latin-1
 481
 482   ;; Fixme: Some of the non-word syntaxes here perhaps should be
 483   ;; reviewed.  (Note that the following all implicitly have word
 484   ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.)  There should be a well-defined way of
 485   ;; relating Unicode categories to Emacs syntax codes.
 486
 487   ;; NBSP isn't semantically interchangeable with other whitespace chars,
 488   ;; so it's more like punctation.
 489   (set-case-syntax ?  "." tbl)
 490   (set-case-syntax ?¡ "." tbl)
 491   (set-case-syntax ?¦ "_" tbl)
 492   (set-case-syntax ?§ "." tbl)
 493   (set-case-syntax ?© "_" tbl)
 494   (set-case-syntax-delims 171 187 tbl)  ; « »
 495   (set-case-syntax ?¬ "_" tbl)
 496   (set-case-syntax ? "_" tbl)
 497   (set-case-syntax ?® "_" tbl)
 498   (set-case-syntax ?° "_" tbl)
 499   (set-case-syntax ?± "_" tbl)
 500   (set-case-syntax ?µ "_" tbl)
 501   (set-case-syntax ?· "_" tbl)
 502   (set-case-syntax ?¼ "_" tbl)
 503   (set-case-syntax ?½ "_" tbl)
 504   (set-case-syntax ?¾ "_" tbl)
 505   (set-case-syntax ?¿ "." tbl)
 506   (let ((c 192))
 507     (while (<= c 222)
 508       (set-case-syntax-pair c (+ c 32) tbl)
 509       (setq c (1+ c))))
 510   (set-case-syntax ?× "_" tbl)
 511   (set-case-syntax ?ß "w" tbl)
 512   (set-case-syntax ?÷ "_" tbl)
 513   ;; See below for ÿ.
 514
 515   ;; Latin Extended-A, Latin Extended-B
 516   (setq c #x0100)
 517   (while (<= c #x02B8)
 518     (modify-category-entry c ?l)
 519     (setq c (1+ c)))
 520
 521   (let ((pair-ranges '((#x0100 . #x012F)
 522                        (#x0132 . #x0137)
 523                        (#x0139 . #x0148)
 524                        (#x014a . #x0177)
 525                        (#x0179 . #x017E)
 526                        (#x0182 . #x0185)
 527                        (#x0187 . #x018C)
 528                        (#x0191 . #x0192)
 529                        (#x0198 . #x0199)
 530                        (#x01A0 . #x01A5)
 531                        (#x01A7 . #x01A8)
 532                        (#x01AC . #x01AD)
 533                        (#x01AF . #x01B0)
 534                        (#x01B3 . #x01B6)
 535                        (#x01BC . #x01BD)
 536                        (#x01CD . #x01DC)
 537                        (#x01DE . #x01EF)
 538                        (#x01F4 . #x01F5)
 539                        (#x01F8 . #x021F)
 540                        (#x0222 . #x0233)
 541                        (#x023B . #x023C)
 542                        (#x0241 . #x0242)
 543                        (#x0246 . #x024F))))
 544     (dolist (elt pair-ranges)
 545       (let ((from (car elt)) (to (cdr elt)))
 546         (while (< from to)
 547           (set-case-syntax-pair from (1+ from) tbl)
 548           (setq from (+ from 2))))))
 549
 550   ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I
 551   ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so
 552   ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN
 553   ;; SMALL LETTER I.
 554
 555   ;; We used to set up half of those correspondence unconditionally,
 556   ;; but that makes searches slow.  So now we don't set up either half
 557   ;; of these correspondences by default.
 558
 559   ;; (set-downcase-syntax  ?İ ?i tbl)
 560   ;; (set-upcase-syntax    ?I ?ı tbl)
 561
 562   (set-case-syntax-pair ?Ǆ ?ǆ tbl)
 563   (set-case-syntax-pair ?ǅ ?ǆ tbl)
 564   (set-case-syntax-pair ?Ǉ ?ǉ tbl)
 565   (set-case-syntax-pair ?ǈ ?ǉ tbl)
 566   (set-case-syntax-pair ?Ǌ ?ǌ tbl)
 567   (set-case-syntax-pair ?ǋ ?ǌ tbl)
 568
 569   ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
 570   (set-case-syntax-pair ?Ǳ ?ǳ tbl)
 571   (set-case-syntax-pair ?ǲ ?ǳ tbl)
 572   (set-case-syntax-pair ?Ƕ ?ƕ tbl)
 573   (set-case-syntax-pair ?Ƿ ?ƿ tbl)
 574
 575   ;; Latin Extended Additional
 576   (modify-category-entry '(#x1e00 . #x1ef9) ?l)
 577   (setq c #x1e00)
 578   (while (<= c #x1ef9)
 579     (and (zerop (% c 2))
 580          (or (<= c #x1e94) (>= c #x1ea0))
 581          (set-case-syntax-pair c (1+ c) tbl))
 582     (setq c (1+ c)))
 583
 584   ;; Greek
 585   (modify-category-entry '(#x0370 . #x03ff) ?g)
 586   (setq c #x0370)
 587   (while (<= c #x03ff)
 588     (if (or (and (>= c #x0391) (<= c #x03a1))
 589             (and (>= c #x03a3) (<= c #x03ab)))
 590         (set-case-syntax-pair c (+ c 32) tbl))
 591     (and (>= c #x03da)
 592          (<= c #x03ee)
 593          (zerop (% c 2))
 594          (set-case-syntax-pair c (1+ c) tbl))
 595     (setq c (1+ c)))
 596   (set-case-syntax-pair ?Ά ?ά tbl)
 597   (set-case-syntax-pair ?Έ ?έ tbl)
 598   (set-case-syntax-pair ?Ή ?ή tbl)
 599   (set-case-syntax-pair ?Ί ?ί tbl)
 600   (set-case-syntax-pair ?Ό ?ό tbl)
 601   (set-case-syntax-pair ?Ύ ?ύ tbl)
 602   (set-case-syntax-pair ?Ώ ?ώ tbl)
 603
 604   ;; Armenian
 605   (setq c #x531)
 606   (while (<= c #x556)
 607     (set-case-syntax-pair c (+ c #x30) tbl)
 608     (setq c (1+ c)))
 609
 610   ;; Greek Extended
 611   (modify-category-entry '(#x1f00 . #x1fff) ?g)
 612   (setq c #x1f00)
 613   (while (<= c #x1fff)
 614     (and (<= (logand c #x000f) 7)
 615          (<= c #x1fa7)
 616          (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
 617          (/= (logand c #x00f0) 7)
 618          (set-case-syntax-pair (+ c 8) c tbl))
 619     (setq c (1+ c)))
 620   (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
 621   (set-case-syntax-pair ?Ᾱ ?ᾱ tbl)
 622   (set-case-syntax-pair ?Ὰ ?ὰ tbl)
 623   (set-case-syntax-pair ?Ά ?ά tbl)
 624   (set-case-syntax-pair ?ᾼ ?ᾳ tbl)
 625   (set-case-syntax-pair ?Ὲ ?ὲ tbl)
 626   (set-case-syntax-pair ?Έ ?έ tbl)
 627   (set-case-syntax-pair ?Ὴ ?ὴ tbl)
 628   (set-case-syntax-pair ?Ή ?ή tbl)
 629   (set-case-syntax-pair ?ῌ ?ῃ tbl)
 630   (set-case-syntax-pair ?Ῐ ?ῐ tbl)
 631   (set-case-syntax-pair ?Ῑ ?ῑ tbl)
 632   (set-case-syntax-pair ?Ὶ ?ὶ tbl)
 633   (set-case-syntax-pair ?Ί ?ί tbl)
 634   (set-case-syntax-pair ?Ῠ ?ῠ tbl)
 635   (set-case-syntax-pair ?Ῡ ?ῡ tbl)
 636   (set-case-syntax-pair ?Ὺ ?ὺ tbl)
 637   (set-case-syntax-pair ?Ύ ?ύ tbl)
 638   (set-case-syntax-pair ?Ῥ ?ῥ tbl)
 639   (set-case-syntax-pair ?Ὸ ?ὸ tbl)
 640   (set-case-syntax-pair ?Ό ?ό tbl)
 641   (set-case-syntax-pair ?Ὼ ?ὼ tbl)
 642   (set-case-syntax-pair ?Ώ ?ώ tbl)
 643   (set-case-syntax-pair ?ῼ ?ῳ tbl)
 644
 645   ;; cyrillic
 646   (modify-category-entry '(#x0400 . #x04FF) ?y)
 647   (setq c #x0400)
 648   (while (<= c #x04ff)
 649     (and (>= c #x0400)
 650          (<= c #x040f)
 651          (set-case-syntax-pair c (+ c 80) tbl))
 652     (and (>= c #x0410)
 653          (<= c #x042f)
 654          (set-case-syntax-pair c (+ c 32) tbl))
 655     (and (zerop (% c 2))
 656          (or (and (>= c #x0460) (<= c #x0480))
 657              (and (>= c #x048c) (<= c #x04be))
 658              (and (>= c #x04d0) (<= c #x04f4)))
 659          (set-case-syntax-pair c (1+ c) tbl))
 660     (setq c (1+ c)))
 661   (set-case-syntax-pair ?Ӂ ?ӂ tbl)
 662   (set-case-syntax-pair ?Ӄ ?ӄ tbl)
 663   (set-case-syntax-pair ?Ӈ ?ӈ tbl)
 664   (set-case-syntax-pair ?Ӌ ?ӌ tbl)
 665   (set-case-syntax-pair ?Ӹ ?ӹ tbl)
 666
 667   ;; general punctuation
 668   (setq c #x2000)
 669   (while (<= c #x200b)
 670     (set-case-syntax c " " tbl)
 671     (setq c (1+ c)))
 672   (while (<= c #x200F)
 673     (set-case-syntax c "." tbl)
 674     (setq c (1+ c)))
 675   ;; Fixme: These aren't all right:
 676   (setq c #x2010)
 677   (while (<= c #x2016)
 678     (set-case-syntax c "_" tbl)
 679     (setq c (1+ c)))
 680   ;; Punctuation syntax for quotation marks (like `)
 681   (while (<= c #x201f)
 682     (set-case-syntax  c "." tbl)
 683     (setq c (1+ c)))
 684   ;; Fixme: These aren't all right:
 685   (while (<= c #x2027)
 686     (set-case-syntax c "_" tbl)
 687     (setq c (1+ c)))
 688   (while (<= c #x206F)
 689     (set-case-syntax c "." tbl)
 690     (setq c (1+ c)))
 691
 692   ;; Roman numerals
 693   (setq c #x2160)
 694   (while (<= c #x216f)
 695     (set-case-syntax-pair c (+ c #x10) tbl)
 696     (setq c (1+ c)))
 697
 698   ;; Fixme: The following blocks might be better as symbol rather than
 699   ;; punctuation.
 700   ;; Arrows
 701   (setq c #x2190)
 702   (while (<= c #x21FF)
 703     (set-case-syntax c "." tbl)
 704     (setq c (1+ c)))
 705   ;; Mathematical Operators
 706   (while (<= c #x22FF)
 707     (set-case-syntax c "." tbl)
 708     (setq c (1+ c)))
 709   ;; Miscellaneous Technical
 710   (while (<= c #x23FF)
 711     (set-case-syntax c "." tbl)
 712     (setq c (1+ c)))
 713   ;; Control Pictures
 714   (while (<= c #x243F)
 715     (set-case-syntax c "_" tbl)
 716     (setq c (1+ c)))
 717
 718   ;; Circled Latin
 719   (setq c #x24b6)
 720   (while (<= c #x24cf)
 721     (set-case-syntax-pair c (+ c 26) tbl)
 722     (modify-category-entry c ?l)
 723     (modify-category-entry (+ c 26) ?l)
 724     (setq c (1+ c)))
 725
 726   ;; Fullwidth Latin
 727   (setq c #xff21)
 728   (while (<= c #xff3a)
 729     (set-case-syntax-pair c (+ c #x20) tbl)
 730     (modify-category-entry c ?l)
 731     (modify-category-entry (+ c #x20) ?l)
 732     (setq c (1+ c)))
 733
 734   ;; Combining diacritics
 735   (modify-category-entry '(#x300 . #x362) ?^)
 736   ;; Combining marks
 737   (modify-category-entry '(#x20d0 . #x20e3) ?^)
 738
 739   ;; Fixme: syntax for symbols &c
 740   )
 741
 742 (let ((pairs
 743        '("⁅⁆"                               ; U+2045 U+2046
 744          "⁽⁾"                               ; U+207D U+207E
 745          "₍₎"                               ; U+208D U+208E
 746          "〈〉"                               ; U+2329 U+232A
 747          "⎴⎵"                               ; U+23B4 U+23B5
 748          "❨❩"                               ; U+2768 U+2769
 749          "❪❫"                               ; U+276A U+276B
 750          "❬❭"                               ; U+276C U+276D
 751          "❰❱"                               ; U+2770 U+2771
 752          "❲❳"                               ; U+2772 U+2773
 753          "❴❵"                               ; U+2774 U+2775
 754          "⟦⟧"                               ; U+27E6 U+27E7
 755          "⟨⟩"                               ; U+27E8 U+27E9
 756          "⟪⟫"                               ; U+27EA U+27EB
 757          "⦃⦄"                               ; U+2983 U+2984
 758          "⦅⦆"                               ; U+2985 U+2986
 759          "⦇⦈"                               ; U+2987 U+2988
 760          "⦉⦊"                               ; U+2989 U+298A
 761          "⦋⦌"                               ; U+298B U+298C
 762          "⦍⦎"                               ; U+298D U+298E
 763          "⦏⦐"                               ; U+298F U+2990
 764          "⦑⦒"                               ; U+2991 U+2992
 765          "⦓⦔"                               ; U+2993 U+2994
 766          "⦕⦖"                               ; U+2995 U+2996
 767          "⦗⦘"                               ; U+2997 U+2998
 768          "⧼⧽"                               ; U+29FC U+29FD
 769          "〈〉"                               ; U+3008 U+3009
 770          "《》"                               ; U+300A U+300B
 771          "「」"                               ; U+300C U+300D
 772          "『』"                               ; U+300E U+300F
 773          "【】"                               ; U+3010 U+3011
 774          "〔〕"                               ; U+3014 U+3015
 775          "〖〗"                               ; U+3016 U+3017
 776          "〘〙"                               ; U+3018 U+3019
 777          "〚〛"                               ; U+301A U+301B
 778          "﴾﴿"                               ; U+FD3E U+FD3F
 779          "︵︶"                               ; U+FE35 U+FE36
 780          "︷︸"                               ; U+FE37 U+FE38
 781          "︹︺"                               ; U+FE39 U+FE3A
 782          "︻︼"                               ; U+FE3B U+FE3C
 783          "︽︾"                               ; U+FE3D U+FE3E
 784          "︿﹀"                               ; U+FE3F U+FE40
 785          "﹁﹂"                               ; U+FE41 U+FE42
 786          "﹃﹄"                               ; U+FE43 U+FE44
 787          "﹙﹚"                               ; U+FE59 U+FE5A
 788          "﹛﹜"                               ; U+FE5B U+FE5C
 789          "﹝﹞"                               ; U+FE5D U+FE5E
 790          "（）"                               ; U+FF08 U+FF09
 791          "［］"                               ; U+FF3B U+FF3D
 792          "｛｝"                               ; U+FF5B U+FF5D
 793          "｟｠"                               ; U+FF5F U+FF60
 794          "｢｣"                               ; U+FF62 U+FF63
 795          )))
 796   (dolist (elt pairs)
 797     (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
 798     (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
 799
 800 \f
 801 ;; For each character set, put the information of the most proper
 802 ;; coding system to encode it by `preferred-coding-system' property.
 803
 804 ;; Fixme: should this be junked?
 805 (let ((l '((latin-iso8859-1     . iso-latin-1)
 806            (latin-iso8859-2     . iso-latin-2)
 807            (latin-iso8859-3     . iso-latin-3)
 808            (latin-iso8859-4     . iso-latin-4)
 809            (thai-tis620         . thai-tis620)
 810            (greek-iso8859-7     . greek-iso-8bit)
 811            (arabic-iso8859-6    . iso-2022-7bit)
 812            (hebrew-iso8859-8    . hebrew-iso-8bit)
 813            (katakana-jisx0201   . japanese-shift-jis)
 814            (latin-jisx0201      . japanese-shift-jis)
 815            (cyrillic-iso8859-5  . cyrillic-iso-8bit)
 816            (latin-iso8859-9     . iso-latin-5)
 817            (japanese-jisx0208-1978 . iso-2022-jp)
 818            (chinese-gb2312      . chinese-iso-8bit)
 819            (chinese-gbk         . chinese-gbk)
 820            (gb18030-2-byte      . chinese-gb18030)
 821            (gb18030-4-byte-bmp  . chinese-gb18030)
 822            (gb18030-4-byte-smp  . chinese-gb18030)
 823            (gb18030-4-byte-ext-1 . chinese-gb18030)
 824            (gb18030-4-byte-ext-2 . chinese-gb18030)
 825            (japanese-jisx0208   . iso-2022-jp)
 826            (korean-ksc5601      . iso-2022-kr)
 827            (japanese-jisx0212   . iso-2022-jp)
 828            (chinese-big5-1      . chinese-big5)
 829            (chinese-big5-2      . chinese-big5)
 830            (chinese-sisheng     . iso-2022-7bit)
 831            (ipa                 . iso-2022-7bit)
 832            (vietnamese-viscii-lower . vietnamese-viscii)
 833            (vietnamese-viscii-upper . vietnamese-viscii)
 834            (arabic-digit        . iso-2022-7bit)
 835            (arabic-1-column     . iso-2022-7bit)
 836            (lao                 . lao)
 837            (arabic-2-column     . iso-2022-7bit)
 838            (indian-is13194      . devanagari)
 839            (indian-glyph        . devanagari)
 840            (tibetan-1-column    . tibetan)
 841            (ethiopic            . iso-2022-7bit)
 842            (chinese-cns11643-1  . iso-2022-cn)
 843            (chinese-cns11643-2  . iso-2022-cn)
 844            (chinese-cns11643-3  . iso-2022-cn)
 845            (chinese-cns11643-4  . iso-2022-cn)
 846            (chinese-cns11643-5  . iso-2022-cn)
 847            (chinese-cns11643-6  . iso-2022-cn)
 848            (chinese-cns11643-7  . iso-2022-cn)
 849            (indian-2-column     . devanagari)
 850            (tibetan             . tibetan)
 851            (latin-iso8859-14    . iso-latin-8)
 852            (latin-iso8859-15    . iso-latin-9))))
 853   (while l
 854     (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
 855     (setq l (cdr l))))
 856
 857 \f
 858 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
 859 ;; SPACE and NEWLINE are already set.
 860
 861 (set-char-table-range auto-fill-chars '(#x3041 . #x30FF) t)
 862 (set-char-table-range auto-fill-chars '(#x3400 . #x4DB5) t)
 863 (set-char-table-range auto-fill-chars '(#x4e00 . #x9fbb) t)
 864 (set-char-table-range auto-fill-chars '(#xF900 . #xFAFF) t)
 865 (set-char-table-range auto-fill-chars '(#xFF00 . #xFF9F) t)
 866 (set-char-table-range auto-fill-chars '(#x20000 . #x2FFFF) t)
 867
 868 \f
 869 ;;; Setting char-width-table.  The default is 1.
 870
 871 ;; 0: non-spacing, enclosing combining, formatting, Hangul Jamo medial
 872 ;;    and final characters.
 873 (let ((l '((#x0300 . #x036F)
 874            (#x0483 . #x0489)
 875            (#x0591 . #x05BD)
 876            (#x05BF . #x05BF)
 877            (#x05C1 . #x05C2)
 878            (#x05C4 . #x05C5)
 879            (#x05C7 . #x05C7)
 880            (#x0600 . #x0603)
 881            (#x0610 . #x0615)
 882            (#x064B . #x065E)
 883            (#x0670 . #x0670)
 884            (#x06D6 . #x06E4)
 885            (#x06E7 . #x06E8)
 886            (#x06EA . #x06ED)
 887            (#x070F . #x070F)
 888            (#x0711 . #x0711)
 889            (#x0730 . #x074A)
 890            (#x07A6 . #x07B0)
 891            (#x07EB . #x07F3)
 892            (#x0901 . #x0902)
 893            (#x093C . #x093C)
 894            (#x0941 . #x0948)
 895            (#x094D . #x094D)
 896            (#x0951 . #x0954)
 897            (#x0962 . #x0963)
 898            (#x0981 . #x0981)
 899            (#x09BC . #x09BC)
 900            (#x09C1 . #x09C4)
 901            (#x09CD . #x09CD)
 902            (#x09E2 . #x09E3)
 903            (#x0A01 . #x0A02)
 904            (#x0A3C . #x0A3C)
 905            (#x0A41 . #x0A4D)
 906            (#x0A70 . #x0A71)
 907            (#x0A81 . #x0A82)
 908            (#x0ABC . #x0ABC)
 909            (#x0AC1 . #x0AC8)
 910            (#x0ACD . #x0ACD)
 911            (#x0AE2 . #x0AE3)
 912            (#x0B01 . #x0B01)
 913            (#x0B3C . #x0B3C)
 914            (#x0B3F . #x0B3F)
 915            (#x0B41 . #x0B43)
 916            (#x0B4D . #x0B56)
 917            (#x0B82 . #x0B82)
 918            (#x0BC0 . #x0BC0)
 919            (#x0BCD . #x0BCD)
 920            (#x0C3E . #x0C40)
 921            (#x0C46 . #x0C56)
 922            (#x0CBC . #x0CBC)
 923            (#x0CBF . #x0CBF)
 924            (#x0CC6 . #x0CC6)
 925            (#x0CCC . #x0CCD)
 926            (#x0CE2 . #x0CE3)
 927            (#x0D41 . #x0D43)
 928            (#x0D4D . #x0D4D)
 929            (#x0DCA . #x0DCA)
 930            (#x0DD2 . #x0DD6)
 931            (#x0E31 . #x0E31)
 932            (#x0E34 . #x0E3A)
 933            (#x0E47 . #x0E4E)
 934            (#x0EB1 . #x0EB1)
 935            (#x0EB4 . #x0EBC)
 936            (#x0EC8 . #x0ECD)
 937            (#x0F18 . #x0F19)
 938            (#x0F35 . #x0F35)
 939            (#x0F37 . #x0F37)
 940            (#x0F39 . #x0F39)
 941            (#x0F71 . #x0F7E)
 942            (#x0F80 . #x0F84)
 943            (#x0F86 . #x0F87)
 944            (#x0F90 . #x0FBC)
 945            (#x0FC6 . #x0FC6)
 946            (#x102D . #x1030)
 947            (#x1032 . #x1037)
 948            (#x1039 . #x1039)
 949            (#x1058 . #x1059)
 950            (#x1160 . #x11FF)
 951            (#x135F . #x135F)
 952            (#x1712 . #x1714)
 953            (#x1732 . #x1734)
 954            (#x1752 . #x1753)
 955            (#x1772 . #x1773)
 956            (#x17B4 . #x17B5)
 957            (#x17B7 . #x17BD)
 958            (#x17C6 . #x17C6)
 959            (#x17C9 . #x17D3)
 960            (#x17DD . #x17DD)
 961            (#x180B . #x180D)
 962            (#x18A9 . #x18A9)
 963            (#x1920 . #x1922)
 964            (#x1927 . #x1928)
 965            (#x1932 . #x1932)
 966            (#x1939 . #x193B)
 967            (#x1A17 . #x1A18)
 968            (#x1B00 . #x1B03)
 969            (#x1B34 . #x1B34)
 970            (#x1B36 . #x1B3A)
 971            (#x1B3C . #x1B3C)
 972            (#x1B42 . #x1B42)
 973            (#x1B6B . #x1B73)
 974            (#x1DC0 . #x1DFF)
 975            (#x200B . #x200F)
 976            (#x202A . #x202E)
 977            (#x2060 . #x206F)
 978            (#x20D0 . #x20EF)
 979            (#x302A . #x302F)
 980            (#x3099 . #x309A)
 981            (#xA806 . #xA806)
 982            (#xA80B . #xA80B)
 983            (#xA825 . #xA826)
 984            (#xFB1E . #xFB1E)
 985            (#xFE00 . #xFE0F)
 986            (#xFE20 . #xFE23)
 987            (#xFEFF . #xFEFF)
 988            (#xFFF9 . #xFFFB)
 989            (#x10A01 . #x10A0F)
 990            (#x10A38 . #x10A3F)
 991            (#x1D167 . #x1D169)
 992            (#x1D173 . #x1D182)
 993            (#x1D185 . #x1D18B)
 994            (#x1D1AA . #x1D1AD)
 995            (#x1D242 . #x1D244)
 996            (#xE0001 . #xE01EF))))
 997   (dolist (elt l)
 998     (set-char-table-range char-width-table elt 0)))
 999
1000 ;; 2: East Asian Wide and Full-width characters.
1001 (let ((l '((#x1100 . #x115F)
1002            (#x2329 . #x232A)
1003            (#x2E80 . #x303E)
1004            (#x3040 . #xA4CF)
1005            (#xAC00 . #xD7A3)
1006            (#xF900 . #xFAFF)
1007            (#xFE30 . #xFE6F)
1008            (#xFF01 . #xFF60)
1009            (#xFFE0 . #xFFE6)
1010            (#x20000 . #x2FFFF)
1011            (#x30000 . #x3FFFF))))
1012   (dolist (elt l)
1013     (set-char-table-range char-width-table elt 2)))
1014
1015 ;; Other double width
1016 ;;(map-charset-chars
1017 ;; (lambda (range ignore) (set-char-table-range char-width-table range 2))
1018 ;; 'ethiopic)
1019 ;; (map-charset-chars
1020 ;;  (lambda (range ignore) (set-char-table-range char-width-table range 2))
1021 ;; 'tibetan)
1022 (map-charset-chars
1023  (lambda (range ignore) (set-char-table-range char-width-table range 2))
1024  'indian-2-column)
1025 (map-charset-chars
1026  (lambda (range ignore) (set-char-table-range char-width-table range 2))
1027  'arabic-2-column)
1028
1029 (defvar cjk-char-width-table
1030   (let ((table (make-char-table nil)))
1031     (dolist (charset '(big5 chinese-gb2312 chinese-cns11643-1
1032                             japanese-jisx0208 korean-ksc5601))
1033       (map-charset-chars #'(lambda (range arg)
1034                              (set-char-table-range table range 2))
1035                          charset))
1036     (optimize-char-table table)
1037     (set-char-table-parent table char-width-table)
1038     table)
1039   "Character width table used in CJK language environment.")
1040
1041 (defun use-cjk-char-width-table ()
1042   "Internal use only.
1043 Setup char-width-table appropriate for CJK language environment."
1044   (setq char-width-table cjk-char-width-table))
1045
1046 (defun use-default-char-width-table ()
1047   "Internal use only.
1048 Setup char-width-table appropriate for non-CJK language environment."
1049   (setq char-width-table (char-table-parent cjk-char-width-table)))
1050
1051 (optimize-char-table (standard-case-table))
1052 (optimize-char-table (standard-category-table))
1053 (optimize-char-table (standard-syntax-table))
1054
1055 \f
1056 ;; Setting char-script-table.
1057
1058 ;; The Unicode blocks actually extend past some of these ranges with
1059 ;; undefined codepoints.
1060 (let ((script-list nil))
1061   (dolist
1062       (elt
1063        '((#x0000 #x007F latin)
1064          (#x00A0 #x024F latin)
1065          (#x0250 #x02AF phonetic)
1066          (#x02B0 #x036F latin)
1067          (#x0370 #x03E1 greek)
1068          (#x03E2 #x03EF coptic)
1069          (#x03F0 #x03F3 greek)
1070          (#x0400 #x04FF cyrillic)
1071          (#x0530 #x058F armenian)
1072          (#x0590 #x05FF hebrew)
1073          (#x0600 #x06FF arabic)
1074          (#x0700 #x074F syriac)
1075          (#x07C0 #x07FA nko)
1076          (#x0780 #x07BF thaana)
1077          (#x0900 #x097F devanagari)
1078          (#x0980 #x09FF bengali)
1079          (#x0A00 #x0A7F gurmukhi)
1080          (#x0A80 #x0AFF gujarati)
1081          (#x0B00 #x0B7F oriya)
1082          (#x0B80 #x0BFF tamil)
1083          (#x0C00 #x0C7F telugu)
1084          (#x0C80 #x0CFF kannada)
1085          (#x0D00 #x0D7F malayalam)
1086          (#x0D80 #x0DFF sinhala)
1087          (#x0E00 #x0E5F thai)
1088          (#x0E80 #x0EDF lao)
1089          (#x0F00 #x0FFF tibetan)
1090          (#x1000 #x105F myanmar)
1091          (#x10A0 #x10FF georgian)
1092          (#x1100 #x11FF hangul)
1093          (#x1200 #x139F ethiopic)
1094          (#x13A0 #x13FF cherokee)
1095          (#x1400 #x167F canadian-aboriginal)
1096          (#x1680 #x169F ogham)
1097          (#x16A0 #x16FF runic)
1098          (#x1780 #x17FF khmer)
1099          (#x1800 #x18AF mongolian)
1100          (#x1D00 #x1DFF phonetic)
1101          (#x1E00 #x1EFF latin)
1102          (#x1F00 #x1FFF greek)
1103          (#x2000 #x27FF symbol)
1104          (#x2800 #x28FF braille)
1105          (#x2D80 #x2DDF ethiopic)
1106          (#x2E80 #x2FDF han)
1107          (#x2FF0 #x2FFF ideographic-description)
1108          (#x3000 #x303F cjk-misc)
1109          (#x3040 #x30FF kana)
1110          (#x3100 #x312F bopomofo)
1111          (#x3130 #x318F hangul)
1112          (#x3190 #x319F kanbun)
1113          (#x31A0 #x31BF bopomofo)
1114          (#x3400 #x9FAF han)
1115          (#xA000 #xA4CF yi)
1116          (#xAA00 #xAA5F cham)
1117          (#xAA80 #xAADF tai-viet)
1118          (#xAC00 #xD7AF hangul)
1119          (#xF900 #xFAFF han)
1120          (#xFB1D #xFB4F hebrew)
1121          (#xFB50 #xFDFF arabic)
1122          (#xFE70 #xFEFC arabic)
1123          (#xFF00 #xFF5F cjk-misc)
1124          (#xFF61 #xFF9F kana)
1125          (#xFFE0 #xFFE6 cjk-misc)
1126          (#x10000 #x100FF linear-b)
1127          (#x10100 #x1013F aegean-number)
1128          (#x10140 #x1018A ancient-greek-number)
1129          (#x10190 #x1019B ancient-symbol)
1130          (#x101D0 #x101FF phaistos-disc)
1131          (#x10280 #x1029F lycian)
1132          (#x102A0 #x102DF carian)
1133          (#x10300 #x1032F olt-italic)
1134          (#x10380 #x1039F ugaritic)
1135          (#x103A0 #x103DF old-persian)
1136          (#x10400 #x1044F deseret)
1137          (#x10450 #x1047F shavian)
1138          (#x10480 #x104AF osmanya)
1139          (#x10800 #x1083F cypriot-syllabary)
1140          (#x10900 #x1091F phoenician)
1141          (#x10920 #x1093F lydian)
1142          (#x10A00 #x10A5F kharoshthi)
1143          (#x12000 #x123FF cuneiform)
1144          (#x12400 #x1247F cuneiform-numbers-and-punctuation)
1145          (#x1D000 #x1D0FF byzantine-musical-symbol)
1146          (#x1D100 #x1D1FF musical-symbol)
1147          (#x1D200 #x1D24F ancient-greek-musical-notation)
1148          (#x1D300 #x1D35F tai-xuan-jing-symbol)
1149          (#x1D360 #x1D37F counting-rod-numeral)
1150          (#x1D400 #x1D7FF mathematical)
1151          (#x1F000 #x1F02F mahjong-tile)
1152          (#x1F030 #x1F09F domino-tile)
1153          (#x20000 #x2AFFF han)
1154          (#x2F800 #x2FFFF han)))
1155     (set-char-table-range char-script-table
1156                           (cons (car elt) (nth 1 elt)) (nth 2 elt))
1157     (or (memq (nth 2 elt) script-list)
1158         (setq script-list (cons (nth 2 elt) script-list))))
1159   (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
1160
1161 (map-charset-chars
1162  #'(lambda (range ignore)
1163      (set-char-table-range char-script-table range 'tibetan))
1164  'tibetan)
1165
1166 \f
1167 ;;; Setting unicode-category-table.
1168
1169 ;; This macro is to build unicode-category-table at compile time so
1170 ;; that C code can access the table efficiently.
1171 (defmacro build-unicode-category-table ()
1172   (let ((table (make-char-table 'unicode-category-table nil)))
1173     (dotimes (i #x110000)
1174       (if (or (< i #xD800)
1175               (and (> i #xF900) (< i #x30000))
1176               (and (> i #xE0000) (< i #xE0200)))
1177           (aset table i (get-char-code-property i 'general-category))))
1178     (set-char-table-range table '(#xE000 . #xF8FF) 'Co)
1179     (set-char-table-range table '(#xF0000 . #xFFFFD) 'Co)
1180     (set-char-table-range table '(#x100000 . #x10FFFD) 'Co)
1181     (optimize-char-table table 'eq)
1182     table))
1183
1184 (setq unicode-category-table (build-unicode-category-table))
1185
1186 \f
1187 ;;; Setting word boundary.
1188
1189 (setq word-combining-categories
1190       '((nil . ?^)
1191         (?^ . nil)
1192         (?C . ?H)
1193         (?C . ?K)))
1194
1195 (setq word-separating-categories        ;  (2-byte character sets)
1196       '((?H . ?K)                       ; Hiragana - Katakana
1197         ))
1198
1199 ;; Local Variables:
1200 ;; coding: utf-8
1201 ;; End:
1202
1203 ;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1204 ;;; characters.el ends here