]> code.delx.au - gnu-emacs/blob - lisp/international/characters.el
Merge from emacs--rel--22
[gnu-emacs] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 ;; 2005, 2006, 2007, 2008
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
9
10 ;; Keywords: multibyte character, character set, syntax, category
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 3, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
28
29 ;;; Commentary:
30
31 ;; This file contains multibyte characters. Save this file always in
32 ;; the coding system `iso-2022-7bit'.
33
34 ;; This file does not define the syntax for Latin-N character sets;
35 ;; those are defined by the files latin-N.el.
36
37 ;;; Code:
38
39 ;; We must set utf-translate-cjk-mode to nil while loading this file
40 ;; to avoid translating CJK characters in decode-char.
41 (defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
42 (setq utf-translate-cjk-mode nil)
43
44 ;;; Predefined categories.
45
46 ;; For each character set.
47
48 (define-category ?a "ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])")
49 (define-category ?l "Latin")
50 (define-category ?t "Thai")
51 (define-category ?g "Greek")
52 (define-category ?b "Arabic")
53 (define-category ?w "Hebrew")
54 (define-category ?y "Cyrillic")
55 (define-category ?k "Japanese katakana")
56 (define-category ?r "Japanese roman")
57 (define-category ?c "Chinese")
58 (define-category ?j "Japanese")
59 (define-category ?h "Korean")
60 (define-category ?e "Ethiopic (Ge'ez)")
61 (define-category ?v "Vietnamese")
62 (define-category ?i "Indian")
63 (define-category ?o "Lao")
64 (define-category ?q "Tibetan")
65
66 ;; For each group (row) of 2-byte character sets.
67
68 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
69 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
70 (define-category ?G "Greek characters of 2-byte character sets")
71 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
72 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
73 (define-category ?N "Korean Hangul characters of 2-byte character sets")
74 (define-category ?Y "Cyrillic characters of 2-byte character sets")
75 (define-category ?I "Indian Glyphs")
76
77 ;; For phonetic classifications.
78
79 (define-category ?0 "consonant")
80 (define-category ?1 "base (independent) vowel")
81 (define-category ?2 "upper diacritical mark (including upper vowel)")
82 (define-category ?3 "lower diacritical mark (including lower vowel)")
83 (define-category ?4 "tone mark")
84 (define-category ?5 "symbol")
85 (define-category ?6 "digit")
86 (define-category ?7 "vowel-modifying diacritical mark")
87 (define-category ?8 "vowel-signs")
88 (define-category ?9 "semivowel lower")
89
90 ;; For filling.
91 (define-category ?| "While filling, we can break a line at this character.")
92
93 ;; For indentation calculation.
94 (define-category ?\s
95 "This character counts as a space for indentation purposes.")
96
97 ;; Keep the following for `kinsoku' processing. See comments in
98 ;; kinsoku.el.
99 (define-category ?> "A character which can't be placed at beginning of line.")
100 (define-category ?< "A character which can't be placed at end of line.")
101
102 ;; Combining
103 (define-category ?^ "Combining diacritic or mark")
104 \f
105 ;;; Setting syntax and category.
106
107 ;; ASCII
108
109 (let ((ch 32))
110 (while (< ch 127) ; All ASCII characters have
111 (modify-category-entry ch ?a) ; the category `a' (ASCII)
112 (modify-category-entry ch ?l) ; and `l' (Latin).
113 (setq ch (1+ ch))))
114
115 ;; Arabic character set
116
117 (let ((charsets '(arabic-iso8859-6
118 arabic-digit
119 arabic-1-column
120 arabic-2-column)))
121 (while charsets
122 ;; (modify-syntax-entry (make-char (car charsets)) "w")
123 (modify-category-entry (make-char (car charsets)) ?b)
124 (setq charsets (cdr charsets))))
125 (let ((ch #x600))
126 (while (<= ch #x6ff)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch)))
129 (setq ch #xfb50)
130 (while (<= ch #xfdff)
131 (modify-category-entry (decode-char 'ucs ch) ?b)
132 (setq ch (1+ ch)))
133 (setq ch #xfe70)
134 (while (<= ch #xfefe)
135 (modify-category-entry (decode-char 'ucs ch) ?b)
136 (setq ch (1+ ch))))
137
138 ;; Chinese character set (GB2312)
139
140 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
141 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
142 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
143 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
144 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
145 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
146 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
147 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
148 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
149 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
150 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
151 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
152 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
153 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
154 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
155 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
156 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
157 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
158 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
159 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
160 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
161 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
162 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
163 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
164
165 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
166 (dotimes (i (length chars))
167 (modify-syntax-entry (aref chars i) ".")))
168
169 (modify-category-entry (make-char 'chinese-gb2312) ?c)
170 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
171 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
172 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
173 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
174 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
175 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
176 (let ((row 48))
177 (while (< row 127)
178 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
179 (setq row (1+ row))))
180
181 (let ((tbl (standard-case-table)))
182 (dotimes (i 26)
183 (set-case-syntax-pair (make-char 'chinese-gb2312 #x23 (+ #x41 i))
184 (make-char 'chinese-gb2312 #x23 (+ #x61 i)) tbl))
185 (dotimes (i 24)
186 (set-case-syntax-pair (make-char 'chinese-gb2312 #x26 (+ #x21 i))
187 (make-char 'chinese-gb2312 #x26 (+ #x41 i)) tbl))
188 (dotimes (i 33)
189 (set-case-syntax-pair (make-char 'chinese-gb2312 #x27 (+ #x21 i))
190 (make-char 'chinese-gb2312 #x27 (+ #x51 i)) tbl)))
191
192 ;; Chinese character set (BIG5)
193
194 (let ((from (decode-big5-char #xA141))
195 (to (decode-big5-char #xA15D)))
196 (while (< from to)
197 (modify-syntax-entry from ".")
198 (setq from (1+ from))))
199 (let ((from (decode-big5-char #xA1A5))
200 (to (decode-big5-char #xA1AD)))
201 (while (< from to)
202 (modify-syntax-entry from ".")
203 (setq from (1+ from))))
204 (let ((from (decode-big5-char #xA1AD))
205 (to (decode-big5-char #xA2AF)))
206 (while (< from to)
207 (modify-syntax-entry from "_")
208 (setq from (1+ from))))
209
210 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
211 open close)
212 (dotimes (i (/ (length parens) 2))
213 (setq open (aref parens (* i 2))
214 close (aref parens (1+ (* i 2))))
215 (modify-syntax-entry open (format "(%c" close))
216 (modify-syntax-entry close (format ")%c" open))))
217
218 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
219 (generic-big5-2-char (make-char 'chinese-big5-2)))
220 ;; (modify-syntax-entry generic-big5-1-char "w")
221 ;; (modify-syntax-entry generic-big5-2-char "w")
222
223 (modify-category-entry generic-big5-1-char ?c)
224 (modify-category-entry generic-big5-2-char ?c)
225
226 (modify-category-entry generic-big5-1-char ?C)
227 (modify-category-entry generic-big5-2-char ?C)
228
229 (modify-category-entry generic-big5-1-char ?\|)
230 (modify-category-entry generic-big5-2-char ?\|))
231
232 (let ((tbl (standard-case-table)))
233 (dotimes (i 22)
234 (set-case-syntax-pair (decode-big5-char (+ #xA2CF i))
235 (decode-big5-char (+ #xA2CF i 26)) tbl))
236 (dotimes (i 4)
237 (set-case-syntax-pair (decode-big5-char (+ #xA2E4 i))
238 (decode-big5-char (+ #xA340 i)) tbl))
239 (dotimes (i 24)
240 (set-case-syntax-pair (decode-big5-char (+ #xA344 i))
241 (decode-big5-char (+ #xA344 i 24)) tbl)))
242
243
244 ;; Chinese character set (CNS11643)
245
246 (let ((cns-list '(chinese-cns11643-1
247 chinese-cns11643-2
248 chinese-cns11643-3
249 chinese-cns11643-4
250 chinese-cns11643-5
251 chinese-cns11643-6
252 chinese-cns11643-7))
253 generic-char)
254 (while cns-list
255 (setq generic-char (make-char (car cns-list)))
256 ;; (modify-syntax-entry generic-char "w")
257 (modify-category-entry generic-char ?c)
258 (modify-category-entry generic-char ?C)
259 (modify-category-entry generic-char ?|)
260 (setq cns-list (cdr cns-list))))
261
262 (let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
263 open close)
264 (dotimes (i (/ (length parens) 2))
265 (setq open (aref parens (* i 2))
266 close (aref parens (1+ (* i 2))))
267 (modify-syntax-entry open (format "(%c" close))
268 (modify-syntax-entry close (format ")%c" open))))
269
270 ;; Cyrillic character set (ISO-8859-5)
271
272 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
273
274 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
275 (modify-syntax-entry ?\e,L-\e(B ".")
276 (modify-syntax-entry ?\e,Lp\e(B ".")
277 (modify-syntax-entry ?\e,L}\e(B ".")
278 (let ((tbl (standard-case-table)))
279 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
280 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
281 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
282 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
283 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
284 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
285 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
286 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
287 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
288 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
289 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
290 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
291 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
292 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
293 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
294 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
295 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
296 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
297 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
298 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
299 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
300 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
301 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
302 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
303 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
304 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
305 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
306 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
307 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
308 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
309 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
310 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
311 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
312 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
313 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
314 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
315 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
316 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
317 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
318 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
319 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
320 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
321 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
322 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
323 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
324 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
326 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
328 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
331 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
332 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
333 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
346 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
347 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
348 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
349 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
350 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
351 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
352 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
353 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
354 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
355 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
356 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
357 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
358 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
359 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
360 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
361 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
362 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
363 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
364 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
365 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
366 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
367 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
368 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
369 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
370 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
371
372 ;; Devanagari character set
373
374 ;;; Commented out since the categories appear not to be used anywhere
375 ;;; and word syntax is the default.
376 ;; (let ((deflist '(;; chars syntax category
377 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
378 ;; ; chandrabindu, anuswar, visarga
379 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
380 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
381 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
382 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
383 ;; ;; Unicode equivalents
384 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
385 ;; ; chandrabindu, anuswar, visarga
386 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
387 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
388 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
389 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
390 ;; ))
391 ;; elm chars len syntax category to ch i)
392 ;; (while deflist
393 ;; (setq elm (car deflist))
394 ;; (setq chars (car elm)
395 ;; len (length chars)
396 ;; syntax (nth 1 elm)
397 ;; category (nth 2 elm)
398 ;; i 0)
399 ;; (while (< i len)
400 ;; (if (= (aref chars i) ?-)
401 ;; (setq i (1+ i)
402 ;; to (aref chars i))
403 ;; (setq ch (aref chars i)
404 ;; to ch))
405 ;; (while (<= ch to)
406 ;; (modify-syntax-entry ch syntax)
407 ;; (modify-category-entry ch category)
408 ;; (setq ch (1+ ch)))
409 ;; (setq i (1+ i)))
410 ;; (setq deflist (cdr deflist))))
411
412 ;; Ethiopic character set
413
414 (modify-category-entry (make-char 'ethiopic) ?e)
415 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
416 (dotimes (i (1+ (- #x137c #x1200)))
417 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
418 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
419 ;; Unicode equivalents of the above:
420 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
421 (while chars
422 (modify-syntax-entry (car chars) ".")
423 (setq chars (cdr chars))))
424
425 ;; Greek character set (ISO-8859-7)
426
427 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
428 (let ((c #x370))
429 (while (<= c #x3ff)
430 (modify-category-entry (decode-char 'ucs c) ?g)
431 (setq c (1+ c))))
432
433 ;; (let ((c 182))
434 ;; (while (< c 255)
435 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
436 ;; (setq c (1+ c))))
437 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
438 (modify-syntax-entry ?\e,F7\e(B ".")
439 (modify-syntax-entry ?\e,F;\e(B ".")
440 (modify-syntax-entry ?\e,F=\e(B ".")
441 (let ((tbl (standard-case-table)))
442 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
443 ;; in several cases.
444 (set-case-syntax ?\e,F!\e(B "." tbl)
445 (set-case-syntax ?\e,F"\e(B "." tbl)
446 (set-case-syntax ?\e,F&\e(B "." tbl)
447 (set-case-syntax ?\e,F&\e(B "_" tbl)
448 (set-case-syntax ?\e,F'\e(B "." tbl)
449 (set-case-syntax ?\e,F)\e(B "_" tbl)
450 (set-case-syntax ?\e,F+\e(B "." tbl)
451 (set-case-syntax ?\e,F,\e(B "_" tbl)
452 (set-case-syntax ?\e,F-\e(B "_" tbl)
453 (set-case-syntax ?\e,F/\e(B "." tbl)
454 (set-case-syntax ?\e,F0\e(B "_" tbl)
455 (set-case-syntax ?\e,F1\e(B "_" tbl)
456 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
457 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
458 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
459 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
460 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
461 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
462 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
463 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
464 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
465 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
466 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
467 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
468 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
469 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
470 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
471 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
472 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
473 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
474 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
475 (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
476 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
477 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
478 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
479 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
480 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
481 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
482 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
483 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
484 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
485 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
486 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
487 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
488 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
489 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
490 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
491 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
492 ;; Unicode equivalents
493 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
494 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
495 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
496 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
502 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
503 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
504 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
505 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
506 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
507 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
508 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
509 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
510 (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
511 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
512 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
513 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
514 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
515 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
516 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
517 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
518 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
519 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
520 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
521 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
522 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
523 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
524 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
525 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
526 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
527
528 ;; Hebrew character set (ISO-8859-8)
529
530 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
531 (let ((c #x591))
532 (while (<= c #x5f4)
533 (modify-category-entry (decode-char 'ucs c) ?w)
534 (setq c (1+ c))))
535
536 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
537 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
538 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
539 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
540 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
541 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
542 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
543
544 ;; (let ((c 224))
545 ;; (while (< c 251)
546 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
547 ;; (setq c (1+ c))))
548 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
549
550 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
551
552 (modify-category-entry (make-char 'indian-is13194) ?i)
553 (modify-category-entry (make-char 'indian-2-column) ?I)
554 (modify-category-entry (make-char 'indian-glyph) ?I)
555 ;; Unicode Devanagari block
556 (let ((c #x901))
557 (while (<= c #x970)
558 (modify-category-entry (decode-char 'ucs c) ?i)
559 (setq c (1+ c))))
560
561 (let ((l '(;; RANGE CATEGORY MEANINGS
562 (#x01 #x03 ?7) ; vowel modifier
563 (#x05 #x14 ?1) ; base vowel
564 (#x15 #x39 ?0) ; consonants
565 (#x3e #x4d ?8) ; vowel modifier
566 (#x51 #x54 ?4) ; stress/tone mark
567 (#x58 #x5f ?0) ; consonants
568 (#x60 #x61 ?1) ; base vowel
569 (#x62 #x63 ?8) ; vowel modifier
570 (#x66 #x6f ?6) ; digits
571 )))
572 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
573 (dolist (elt2 l)
574 (let* ((from (car elt2))
575 (counts (1+ (- (nth 1 elt2) from)))
576 (category (nth 2 elt2)))
577 (dotimes (i counts)
578 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
579 category))))))
580
581 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
582
583 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
584 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
585 (modify-category-entry (make-char 'latin-jisx0201) ?r)
586 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
587 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
588 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
589 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
590 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
591
592 ;; Unicode equivalents of JISX0201-kana
593 (let ((c #xff61))
594 (while (<= c #xff9f)
595 (modify-category-entry (decode-char 'ucs c) ?k)
596 (modify-category-entry (decode-char 'ucs c) ?j)
597 (modify-category-entry (decode-char 'ucs c) ?\|)
598 (setq c (1+ c))))
599
600 ;; Katakana block
601 (let ((c #x30a0))
602 (while (<= c #x30ff)
603 ;; ?K is double width, ?k isn't specified
604 (modify-category-entry (decode-char 'ucs c) ?k)
605 (modify-category-entry (decode-char 'ucs c) ?j)
606 (modify-category-entry (decode-char 'ucs c) ?\|)
607 (setq c (1+ c))))
608
609 ;; Hiragana block
610 (let ((c #x3040))
611 (while (<= c #x309f)
612 ;; ?H is actually defined to be double width
613 (modify-category-entry (decode-char 'ucs c) ?H)
614 ;;(modify-category-entry (decode-char 'ucs c) ?j)
615 (modify-category-entry (decode-char 'ucs c) ?\|)
616 (setq c (1+ c))))
617
618 ;; JISX0208
619 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
620 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
621 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
622 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
623 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
624 (while chars
625 (modify-syntax-entry (car chars) "w")
626 (setq chars (cdr chars))))
627 (let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
628 open close)
629 (dotimes (i (/ (length parens) 2))
630 (setq open (aref parens (* i 2))
631 close (aref parens (1+ (* i 2))))
632 (modify-syntax-entry open (format "(%c" close))
633 (modify-syntax-entry close (format ")%c" open))))
634
635 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
636 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
637 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
638 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
639 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
640 (let ((row 48))
641 (while (< row 127)
642 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
643 (setq row (1+ row))))
644 (modify-category-entry ?\e$B!<\e(B ?K)
645 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
646 (while chars
647 (modify-category-entry (car chars) ?K)
648 (modify-category-entry (car chars) ?H)
649 (setq chars (cdr chars))))
650 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
651 (while chars
652 (modify-category-entry (car chars) ?C)
653 (setq chars (cdr chars))))
654
655 (let ((tbl (standard-case-table)))
656 (dotimes (i 26)
657 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x23 (+ #x41 i))
658 (make-char 'japanese-jisx0208 #x23 (+ #x61 i)) tbl))
659 (dotimes (i 24)
660 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x26 (+ #x21 i))
661 (make-char 'japanese-jisx0208 #x26 (+ #x41 i)) tbl))
662 (dotimes (i 33)
663 (set-case-syntax-pair (make-char 'japanese-jisx0208 #x27 (+ #x21 i))
664 (make-char 'japanese-jisx0208 #x27 (+ #x51 i)) tbl)))
665
666 ;; JISX0212
667 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
668 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
669 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
670 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
671
672 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
673
674 ;; JISX0201-Kana
675 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
676 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
677 ;; Unicode:
678 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
679 (while chars
680 (modify-syntax-entry (car chars) ".")
681 (setq chars (cdr chars))))
682
683 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
684 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
685
686 ;; Korean character set (KSC5601)
687
688 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
689 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
690 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
691 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
692 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
693 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
694 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
695
696 (modify-category-entry (make-char 'korean-ksc5601) ?h)
697 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
698 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
699 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
700 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
701 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
702
703 (let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
704 open close)
705 (dotimes (i (/ (length parens) 2))
706 (setq open (aref parens (* i 2))
707 close (aref parens (1+ (* i 2))))
708 (modify-syntax-entry open (format "(%c" close))
709 (modify-syntax-entry close (format ")%c" open))))
710
711 (let ((tbl (standard-case-table)))
712 (dotimes (i 26)
713 (set-case-syntax-pair (make-char 'korean-ksc5601 #x23 (+ #x41 i))
714 (make-char 'korean-ksc5601 #x23 (+ #x61 i)) tbl))
715 (dotimes (i 10)
716 (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x21 i))
717 (make-char 'korean-ksc5601 #x25 (+ #x30 i)) tbl))
718 (dotimes (i 24)
719 (set-case-syntax-pair (make-char 'korean-ksc5601 #x25 (+ #x41 i))
720 (make-char 'korean-ksc5601 #x25 (+ #x61 i)) tbl))
721 (dotimes (i 33)
722 (set-case-syntax-pair (make-char 'korean-ksc5601 #x2C (+ #x21 i))
723 (make-char 'korean-ksc5601 #x2C (+ #x51 i)) tbl)))
724
725 ;; Latin character set (latin-1,2,3,4,5,8,9)
726
727 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
728 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
729 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
730 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
731 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
732 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
733 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
734
735 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
736 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
737 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
738 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
739 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
740 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
741 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
742
743 ;; Lao character set
744
745 (modify-category-entry (make-char 'lao) ?o)
746 (dotimes (i (1+ (- #xeff #xe80)))
747 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
748
749 (let ((deflist '(;; chars syntax category
750 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
751 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
752 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
753 ("\e(1XY\e(B" "w" ?3) ; vowel lower
754 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
755 ("\e(1\\e(B" "w" ?9) ; semivowel lower
756 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
757 ("\e(1Of\e(B" "_" ?5) ; symbol
758 ;; Unicode equivalents
759 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
760 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
761 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
762 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
763 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
764 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
765 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
766 ("\e$,1DODf\e(B" "_" ?5) ; symbol
767 ))
768 elm chars len syntax category to ch i)
769 (while deflist
770 (setq elm (car deflist))
771 (setq chars (car elm)
772 len (length chars)
773 syntax (nth 1 elm)
774 category (nth 2 elm)
775 i 0)
776 (while (< i len)
777 (if (= (aref chars i) ?-)
778 (setq i (1+ i)
779 to (aref chars i))
780 (setq ch (aref chars i)
781 to ch))
782 (while (<= ch to)
783 (unless (string-equal syntax "w")
784 (modify-syntax-entry ch syntax))
785 (modify-category-entry ch category)
786 (setq ch (1+ ch)))
787 (setq i (1+ i)))
788 (setq deflist (cdr deflist))))
789
790 ;; Thai character set (TIS620)
791
792 (modify-category-entry (make-char 'thai-tis620) ?t)
793 (dotimes (i (1+ (- #xe7f #xe00)))
794 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
795
796 (let ((deflist '(;; chars syntax category
797 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
798 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
799 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
800 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
801 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
802 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
803 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
804 ;; Unicode equivalents
805 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
806 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
807 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
808 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
809 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
810 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
811 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
812 ))
813 elm chars len syntax category to ch i)
814 (while deflist
815 (setq elm (car deflist))
816 (setq chars (car elm)
817 len (length chars)
818 syntax (nth 1 elm)
819 category (nth 2 elm)
820 i 0)
821 (while (< i len)
822 (if (= (aref chars i) ?-)
823 (setq i (1+ i)
824 to (aref chars i))
825 (setq ch (aref chars i)
826 to ch))
827 (while (<= ch to)
828 (unless (string-equal syntax "w")
829 (modify-syntax-entry ch syntax))
830 (modify-category-entry ch category)
831 (setq ch (1+ ch)))
832 (setq i (1+ i)))
833 (setq deflist (cdr deflist))))
834
835 ;; Tibetan character set
836
837 (modify-category-entry (make-char 'tibetan) ?q)
838 (modify-category-entry (make-char 'tibetan-1-column) ?q)
839 (dotimes (i (1+ (- #xfff #xf00)))
840 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
841
842 (let ((deflist '(;; chars syntax category
843 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
844 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
845 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
846 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
847 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
848 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
849 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
850 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
851 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
852 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
853 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
854 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
855 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
856 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
857
858 ;; Unicode version (not complete)
859 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
860 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
861 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
862 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
863 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
864 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
865 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
866 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
867 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
868 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
869 ))
870 elm chars len syntax category to ch i)
871 (while deflist
872 (setq elm (car deflist))
873 (setq chars (car elm)
874 len (length chars)
875 syntax (nth 1 elm)
876 category (nth 2 elm)
877 i 0)
878 (while (< i len)
879 (if (= (aref chars i) ?-)
880 (setq i (1+ i)
881 to (aref chars i))
882 (setq ch (aref chars i)
883 to ch))
884 (while (<= ch to)
885 (unless (string-equal syntax "w")
886 (modify-syntax-entry ch syntax))
887 (modify-category-entry ch category)
888 (setq ch (1+ ch)))
889 (setq i (1+ i)))
890 (setq deflist (cdr deflist))))
891
892 ;; Vietnamese character set
893
894 (let ((lower (make-char 'vietnamese-viscii-lower))
895 (upper (make-char 'vietnamese-viscii-upper)))
896 ;; (modify-syntax-entry lower "w")
897 ;; (modify-syntax-entry upper "w")
898 (modify-category-entry lower ?v)
899 (modify-category-entry upper ?v)
900 (modify-category-entry lower ?l) ; To make a word with
901 (modify-category-entry upper ?l) ; latin characters.
902 )
903
904 (let ((tbl (standard-case-table))
905 (i 32))
906 (while (< i 128)
907 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
908 (make-char 'vietnamese-viscii-lower i)
909 tbl)
910 (setq i (1+ i))))
911
912 ;; Unicode (mule-unicode-0100-24ff)
913
914 (let ((tbl (standard-case-table)) c)
915
916 ;; Latin Extended-A, Latin Extended-B, IPA
917 (setq c #x0100)
918 (while (<= c #x02AF)
919 (modify-category-entry (decode-char 'ucs c) ?l)
920 (setq c (1+ c)))
921
922 (setq c #x0100)
923 (while (<= c #x0177)
924 (and (or (<= c #x012e)
925 (and (>= c #x014a) (<= c #x0177)))
926 (zerop (% c 2))
927 (set-case-syntax-pair
928 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
929 (and (>= c #x013a)
930 (<= c #x0148)
931 (zerop (% c 2))
932 (set-case-syntax-pair
933 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
934 (setq c (1+ c)))
935
936
937 ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I
938 ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so
939 ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN
940 ;; SMALL LETTER I.
941
942 ;; We used to set up half of those correspondence unconditionally,
943 ;; but that makes searches slow. So now we don't set up either half
944 ;; of these correspondences by default.
945
946 ;; (set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
947 ;; (set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
948
949 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
950 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
951 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
952 (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
953 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
954 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
955 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
956
957 ;; Latin Extended-B
958 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
959 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
960 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
961 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
962 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
963 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
964 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
965 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
966 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
967 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
968 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
969 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
970 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
971 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
972 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
973 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
974 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
975 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
976 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
977 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
978 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
979 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
980 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
981 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
982 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
983 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
984 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
985 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
986 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
987 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
988 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
989 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
990 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
991 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
992 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
993 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
994 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
995 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
996 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
997 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
998 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
999 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
1000 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
1001 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
1002 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
1003 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
1004 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
1005 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
1006 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
1007 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
1008 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
1009 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
1010 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
1011 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
1012 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
1013 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
1014 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
1015 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
1017 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
1018 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
1027 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
1028 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
1029 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
1030 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
1031 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
1032 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
1033 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
1034 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
1039 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
1052
1053 ;; Latin Extended Additional
1054 (setq c #x1e00)
1055 (while (<= c #x1ef9)
1056 (modify-category-entry (decode-char 'ucs c) ?l)
1057 (and (zerop (% c 2))
1058 (or (<= c #x1e94) (>= c #x1ea0))
1059 (set-case-syntax-pair
1060 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1061 (setq c (1+ c)))
1062
1063 ;; Greek
1064 (setq c #x0370)
1065 (while (<= c #x03ff)
1066 (modify-category-entry (decode-char 'ucs c) ?g)
1067 (if (or (and (>= c #x0391) (<= c #x03a1))
1068 (and (>= c #x03a3) (<= c #x03ab)))
1069 (set-case-syntax-pair
1070 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1071 (and (>= c #x03da)
1072 (<= c #x03ee)
1073 (zerop (% c 2))
1074 (set-case-syntax-pair
1075 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1076 (setq c (1+ c)))
1077 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1078 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1079 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1080 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1081 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1082 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1083 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1084
1085 ;; Armenian
1086 (setq c #x531)
1087 (while (<= c #x556)
1088 (set-case-syntax-pair (decode-char 'ucs c)
1089 (decode-char 'ucs (+ c #x30)) tbl)
1090 (setq c (1+ c)))
1091
1092 ;; Greek Extended
1093 (setq c #x1f00)
1094 (while (<= c #x1fff)
1095 (modify-category-entry (decode-char 'ucs c) ?g)
1096 (and (<= (logand c #x000f) 7)
1097 (<= c #x1fa7)
1098 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1099 (/= (logand c #x00f0) 7)
1100 (set-case-syntax-pair
1101 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1102 (setq c (1+ c)))
1103 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1104 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1105 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1106 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1107 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1108 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1109 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1110 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1111 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1112 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1113 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1114 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1115 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1116 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1117 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1118 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1119 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1120 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1121 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1122 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1123 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1124 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1125 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1126 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1127
1128 ;; cyrillic
1129 (setq c #x0400)
1130 (while (<= c #x04ff)
1131 (modify-category-entry (decode-char 'ucs c) ?y)
1132 (and (>= c #x0400)
1133 (<= c #x040f)
1134 (set-case-syntax-pair
1135 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1136 (and (>= c #x0410)
1137 (<= c #x042f)
1138 (set-case-syntax-pair
1139 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1140 (and (zerop (% c 2))
1141 (or (and (>= c #x0460) (<= c #x0480))
1142 (and (>= c #x048c) (<= c #x04be))
1143 (and (>= c #x04d0) (<= c #x04f4)))
1144 (set-case-syntax-pair
1145 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1146 (setq c (1+ c)))
1147 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1148 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1149 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1150 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1151 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1152
1153 ;; general punctuation
1154 (setq c #x2000)
1155 (while (<= c #x200b)
1156 (set-case-syntax (decode-char 'ucs c) " " tbl)
1157 (setq c (1+ c)))
1158 (setq c #x2010)
1159 (while (<= c #x2016)
1160 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1161 (setq c (1+ c)))
1162 ;; Punctuation syntax for quotation marks (like `)
1163 (while (<= c #x201f)
1164 (set-case-syntax (decode-char 'ucs c) "." tbl)
1165 (setq c (1+ c)))
1166 (while (<= c #x2027)
1167 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1168 (setq c (1+ c)))
1169
1170 ;; Roman numerals
1171 (setq c #x2160)
1172 (while (<= c #x216f)
1173 (set-case-syntax-pair (decode-char 'ucs c)
1174 (decode-char 'ucs (+ c #x10)) tbl)
1175 (setq c (1+ c)))
1176
1177 ;; Circled Latin
1178 (setq c #x24b6)
1179 (while (<= c #x24cf)
1180 (set-case-syntax-pair (decode-char 'ucs c)
1181 (decode-char 'ucs (+ c 26)) tbl)
1182 (modify-category-entry (decode-char 'ucs c) ?l)
1183 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1184 (setq c (1+ c)))
1185
1186 ;; Fullwidth Latin
1187 (setq c #xff21)
1188 (while (<= c #xff3a)
1189 (set-case-syntax-pair (decode-char 'ucs c)
1190 (decode-char 'ucs (+ c #x20)) tbl)
1191 (modify-category-entry (decode-char 'ucs c) ?l)
1192 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1193 (setq c (1+ c)))
1194
1195 ;; Combining diacritics
1196 (setq c #x300)
1197 (while (<= c #x362)
1198 (modify-category-entry (decode-char 'ucs c) ?^)
1199 (setq c (1+ c)))
1200
1201 ;; Combining marks
1202 (setq c #x20d0)
1203 (while (<= c #x20e3)
1204 (modify-category-entry (decode-char 'ucs c) ?^)
1205 (setq c (1+ c)))
1206
1207 ;; Fixme: syntax for symbols &c
1208 )
1209
1210 (let ((pairs
1211 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1212 "\e$,1s}s~\e(B" ; U+207D U+207E
1213 "\e$,1t-t.\e(B" ; U+208D U+208E
1214 "\e$,1{){*\e(B" ; U+2329 U+232A
1215 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1216 "\e$,2&H&I\e(B" ; U+2768 U+2769
1217 "\e$,2&J&K\e(B" ; U+276A U+276B
1218 "\e$,2&L&M\e(B" ; U+276C U+276D
1219 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1220 "\e$,2&R&S\e(B" ; U+2772 U+2773
1221 "\e$,2&T&U\e(B" ; U+2774 U+2775
1222 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1223 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1224 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1225 "\e$,2,#,$\e(B" ; U+2983 U+2984
1226 "\e$,2,%,&\e(B" ; U+2985 U+2986
1227 "\e$,2,',(\e(B" ; U+2987 U+2988
1228 "\e$,2,),*\e(B" ; U+2989 U+298A
1229 "\e$,2,+,,\e(B" ; U+298B U+298C
1230 "\e$,2,-,.\e(B" ; U+298D U+298E
1231 "\e$,2,/,0\e(B" ; U+298F U+2990
1232 "\e$,2,1,2\e(B" ; U+2991 U+2992
1233 "\e$,2,3,4\e(B" ; U+2993 U+2994
1234 "\e$,2,5,6\e(B" ; U+2995 U+2996
1235 "\e$,2,7,8\e(B" ; U+2997 U+2998
1236 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1237 "\e$,2=H=I\e(B" ; U+3008 U+3009
1238 "\e$,2=J=K\e(B" ; U+300A U+300B
1239 "\e$,2=L=M\e(B" ; U+300C U+300D
1240 "\e$,2=N=O\e(B" ; U+300E U+300F
1241 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1242 "\e$,2=T=U\e(B" ; U+3014 U+3015
1243 "\e$,2=V=W\e(B" ; U+3016 U+3017
1244 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1245 "\e$,2=Z=[\e(B" ; U+301A U+301B
1246 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1247 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1248 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1249 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1250 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1251 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1252 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1253 "\e$,3papb\e(B" ; U+FE41 U+FE42
1254 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1255 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1256 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1257 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1258 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1259 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1260 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1261 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1262 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1263 )))
1264 (dolist (elt pairs)
1265 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1266 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1267
1268 \f
1269 ;;; Setting word boundary.
1270
1271 (setq word-combining-categories
1272 '((?l . ?l)
1273 (?C . ?C)
1274 (?C . ?H)
1275 (?C . ?K)))
1276
1277 (setq word-separating-categories ; (2-byte character sets)
1278 '((?A . ?K) ; Alpha numeric - Katakana
1279 (?A . ?C) ; Alpha numeric - Chinese
1280 (?H . ?A) ; Hiragana - Alpha numeric
1281 (?H . ?K) ; Hiragana - Katakana
1282 (?H . ?C) ; Hiragana - Chinese
1283 (?K . ?A) ; Katakana - Alpha numeric
1284 (?K . ?C) ; Katakana - Chinese
1285 (?C . ?A) ; Chinese - Alpha numeric
1286 (?C . ?K) ; Chinese - Katakana
1287 ))
1288
1289 \f
1290 ;; For each character set, put the information of the most proper
1291 ;; coding system to encode it by `preferred-coding-system' property.
1292
1293 (let ((l '((latin-iso8859-1 . iso-latin-1)
1294 (latin-iso8859-2 . iso-latin-2)
1295 (latin-iso8859-3 . iso-latin-3)
1296 (latin-iso8859-4 . iso-latin-4)
1297 (thai-tis620 . thai-tis620)
1298 (greek-iso8859-7 . greek-iso-8bit)
1299 (arabic-iso8859-6 . iso-2022-7bit)
1300 (hebrew-iso8859-8 . hebrew-iso-8bit)
1301 (katakana-jisx0201 . japanese-shift-jis)
1302 (latin-jisx0201 . japanese-shift-jis)
1303 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1304 (latin-iso8859-9 . iso-latin-5)
1305 (japanese-jisx0208-1978 . iso-2022-jp)
1306 (chinese-gb2312 . cn-gb-2312)
1307 (japanese-jisx0208 . iso-2022-jp)
1308 (korean-ksc5601 . iso-2022-kr)
1309 (japanese-jisx0212 . iso-2022-jp)
1310 (chinese-cns11643-1 . iso-2022-cn)
1311 (chinese-cns11643-2 . iso-2022-cn)
1312 (chinese-big5-1 . chinese-big5)
1313 (chinese-big5-2 . chinese-big5)
1314 (chinese-sisheng . iso-2022-7bit)
1315 (ipa . iso-2022-7bit)
1316 (vietnamese-viscii-lower . vietnamese-viscii)
1317 (vietnamese-viscii-upper . vietnamese-viscii)
1318 (arabic-digit . iso-2022-7bit)
1319 (arabic-1-column . iso-2022-7bit)
1320 (ascii-right-to-left . iso-2022-7bit)
1321 (lao . lao)
1322 (arabic-2-column . iso-2022-7bit)
1323 (indian-is13194 . devanagari)
1324 (indian-glyph . devanagari)
1325 (tibetan-1-column . tibetan)
1326 (ethiopic . iso-2022-7bit)
1327 (chinese-cns11643-3 . iso-2022-cn)
1328 (chinese-cns11643-4 . iso-2022-cn)
1329 (chinese-cns11643-5 . iso-2022-cn)
1330 (chinese-cns11643-6 . iso-2022-cn)
1331 (chinese-cns11643-7 . iso-2022-cn)
1332 (indian-2-column . devanagari)
1333 (tibetan . tibetan)
1334 (latin-iso8859-14 . iso-latin-8)
1335 (latin-iso8859-15 . iso-latin-9))))
1336 (while l
1337 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1338 (setq l (cdr l))))
1339
1340 \f
1341 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1342 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1343 ;; property on the charsets.
1344 (let ((l '(katakana-jisx0201
1345 japanese-jisx0208 japanese-jisx0212
1346 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1347 (while l
1348 (aset auto-fill-chars (make-char (car l)) t)
1349 (put-charset-property (car l) 'nospace-between-words t)
1350 (setq l (cdr l))))
1351
1352 \f
1353 (setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1354 (makunbound 'saved-utf-translate-cjk-mode)
1355
1356 ;; Local Variables:
1357 ;; coding: iso-2022-7bit
1358 ;; End:
1359
1360 ;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1361 ;;; characters.el ends here