]> code.delx.au - gnu-emacs/blob - lisp/international/ucs-normalize.el
Merge from emacs-23; up to 2010-06-03T05:41:49Z!rgm@gnu.org.
[gnu-emacs] / lisp / international / ucs-normalize.el
1 ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC
2
3 ;; Copyright (C) 2009-2011 Free Software Foundation, Inc.
4
5 ;; Author: Taichi Kawabata <kawabata.taichi@gmail.com>
6 ;; Keywords: unicode, normalization
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
22
23 ;;; Commentary:
24 ;;
25 ;; This program has passed the NormalizationTest-5.2.0.txt.
26 ;;
27 ;; References:
28 ;; http://www.unicode.org/reports/tr15/
29 ;; http://www.unicode.org/review/pr-29.html
30 ;;
31 ;; HFS-Normalization:
32 ;; Reference:
33 ;; http://developer.apple.com/technotes/tn/tn1150.html
34 ;;
35 ;; HFS Normalization excludes following area for decomposition.
36 ;;
37 ;; U+02000 .. U+02FFF :: Punctuation, symbols, dingbats, arrows, etc.
38 ;; (Characters in this region will be composed.)
39 ;; U+0F900 .. U+0FAFF :: CJK compatibility Ideographs.
40 ;; U+2F800 .. U+2FFFF :: CJK compatibility Ideographs.
41 ;;
42 ;; HFS-Normalization is useful for normalizing text involving CJK Ideographs.
43 ;;
44 ;;;
45 ;;; Implementation Notes on NFC/HFS-NFC.
46 ;;;
47 ;;
48 ;; <Stages> Decomposition Composition
49 ;; NFD: 'nfd nil
50 ;; NFC: 'nfd t
51 ;; NFKD: 'nfkd nil
52 ;; NFKC: 'nfkd t
53 ;; HFS-NFD: 'hfs-nfd 'hfs-nfd-comp-p
54 ;; HFS-NFC: 'hfs-nfd t
55 ;;
56 ;; Algorithm for Normalization
57 ;;
58 ;; Before normalization, following data will be prepared.
59 ;;
60 ;; 1. quick-check-list
61 ;;
62 ;; `quick-check-list' consists of characters that will be decomposed
63 ;; during normalization. It includes composition-exclusions,
64 ;; singletons, non-starter-decompositions and decomposable
65 ;; characters.
66 ;;
67 ;; `quick-check-regexp' will search the above characters plus
68 ;; combining characters.
69 ;;
70 ;; 2. decomposition-translation
71 ;;
72 ;; `decomposition-translation' is a translation table that will be
73 ;; used to decompose the characters.
74 ;;
75 ;;
76 ;; Normalization Process
77 ;;
78 ;; A. Searching (`ucs-normalize-region')
79 ;;
80 ;; Region is searched for `quick-check-regexp' to find possibly
81 ;; normalizable point.
82 ;;
83 ;; B. Identification of Normalization Block
84 ;;
85 ;; (1) start of the block
86 ;; If the searched character is a starter and not combining
87 ;; with previous character, then the beginning of the block is
88 ;; the searched character. If searched character is combining
89 ;; character, then previous character will be the target
90 ;; character
91 ;; (2) end of the block
92 ;; Block ends at non-composable starter character.
93 ;;
94 ;; C. Decomposition (`ucs-normalize-block')
95 ;;
96 ;; The entire block will be decomposed by
97 ;; `decomposition-translation' table.
98 ;;
99 ;; D. Sorting and Composition of Smaller Blocks (`ucs-normalize-block-compose-chars')
100 ;;
101 ;; The block will be split to multiple samller blocks by starter
102 ;; characters. Each block is sorted, and composed if necessary.
103 ;;
104 ;; E. Composition of Entire Block (`ucs-normalize-compose-chars')
105 ;;
106 ;; Composed blocks are collected and again composed.
107
108 ;;; Code:
109
110 (defconst ucs-normalize-version "1.2")
111
112 (eval-when-compile (require 'cl))
113
114 (declare-function nfd "ucs-normalize" (char))
115
116 (eval-when-compile
117
118 (defconst ucs-normalize-composition-exclusions
119 '(#x0958 #x0959 #x095A #x095B #x095C #x095D #x095E #x095F
120 #x09DC #x09DD #x09DF #x0A33 #x0A36 #x0A59 #x0A5A #x0A5B
121 #x0A5E #x0B5C #x0B5D #x0F43 #x0F4D #x0F52 #x0F57 #x0F5C
122 #x0F69 #x0F76 #x0F78 #x0F93 #x0F9D #x0FA2 #x0FA7 #x0FAC
123 #x0FB9 #xFB1D #xFB1F #xFB2A #xFB2B #xFB2C #xFB2D #xFB2E
124 #xFB2F #xFB30 #xFB31 #xFB32 #xFB33 #xFB34 #xFB35 #xFB36
125 #xFB38 #xFB39 #xFB3A #xFB3B #xFB3C #xFB3E #xFB40 #xFB41
126 #xFB43 #xFB44 #xFB46 #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B
127 #xFB4C #xFB4D #xFB4E #x2ADC #x1D15E #x1D15F #x1D160 #x1D161
128 #x1D162 #x1D163 #x1D164 #x1D1BB #x1D1BC #x1D1BD #x1D1BE
129 #x1D1BF #x1D1C0)
130 "Composition Exclusion List.
131 This list is taken from
132 http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt")
133
134 ;; Unicode ranges that decompositions & combinings are defined.
135 (defvar check-range nil)
136 (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff)))
137
138 ;; Basic normalization functions
139 (defun nfd (char)
140 (let ((decomposition
141 (get-char-code-property char 'decomposition)))
142 (if (and decomposition (numberp (car decomposition)))
143 decomposition)))
144
145 (defun nfkd (char)
146 (let ((decomposition
147 (get-char-code-property char 'decomposition)))
148 (if (symbolp (car decomposition)) (cdr decomposition)
149 decomposition)))
150
151 (defun hfs-nfd (char)
152 (when (or (and (>= char 0) (< char #x2000))
153 (and (>= char #x3000) (< char #xf900))
154 (and (>= char #xfb00) (< char #x2f800))
155 (>= char #x30000))
156 (nfd char))))
157
158 (eval-and-compile
159 (defun ucs-normalize-hfs-nfd-comp-p (char)
160 (and (>= char #x2000) (< char #x3000)))
161
162 (defsubst ucs-normalize-ccc (char)
163 (get-char-code-property char 'canonical-combining-class))
164 )
165
166 ;; Data common to all normalizations
167
168 (eval-when-compile
169
170 (defvar combining-chars nil)
171 (setq combining-chars nil)
172 (defvar decomposition-pair-to-composition nil)
173 (setq decomposition-pair-to-composition nil)
174 (defvar non-starter-decompositions nil)
175 (setq non-starter-decompositions nil)
176 (let ((char 0) ccc decomposition)
177 (mapc
178 (lambda (start-end)
179 (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
180 (setq ccc (ucs-normalize-ccc char))
181 (setq decomposition (get-char-code-property
182 char 'decomposition))
183 (if (and ccc (/= 0 ccc)) (add-to-list 'combining-chars char))
184 (if (and (numberp (car decomposition))
185 (/= (ucs-normalize-ccc (car decomposition))
186 0))
187 (add-to-list 'non-starter-decompositions char))
188 (when (numberp (car decomposition))
189 (if (and (= 2 (length decomposition))
190 (null (memq char ucs-normalize-composition-exclusions))
191 (null (memq char non-starter-decompositions)))
192 (setq decomposition-pair-to-composition
193 (cons (cons decomposition char)
194 decomposition-pair-to-composition)))
195 ;; If not singleton decomposition, second and later characters in
196 ;; decomposition will be the subject of combining characters.
197 (if (cdr decomposition)
198 (dolist (char (cdr decomposition))
199 (add-to-list 'combining-chars char))))))
200 check-range))
201
202 (setq combining-chars
203 (append combining-chars
204 '(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ
205 ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
206 ?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ
207 ?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ)))
208 )
209
210 (eval-and-compile
211 (defun ucs-normalize-make-hash-table-from-alist (alist)
212 (let ((table (make-hash-table :test 'equal :size 2000)))
213 (mapc (lambda (x) (puthash (car x) (cdr x) table)) alist)
214 table))
215
216 (defvar ucs-normalize-decomposition-pair-to-primary-composite nil
217 "Hashtable of decomposed pair to primary composite.
218 Note that Hangul are excluded.")
219 (setq ucs-normalize-decomposition-pair-to-primary-composite
220 (ucs-normalize-make-hash-table-from-alist
221 (eval-when-compile decomposition-pair-to-composition)))
222
223 (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate)
224 "Convert DECOMPOSITION-PAIR to primay composite using COMPOSITION-PREDICATE."
225 (let ((char (or (gethash decomposition-pair
226 ucs-normalize-decomposition-pair-to-primary-composite)
227 (and (<= #x1100 (car decomposition-pair))
228 (< (car decomposition-pair) #x1113)
229 (<= #x1161 (cadr decomposition-pair))
230 (< (car decomposition-pair) #x1176)
231 (let ((lindex (- (car decomposition-pair) #x1100))
232 (vindex (- (cadr decomposition-pair) #x1161)))
233 (+ #xAC00 (* (+ (* lindex 21) vindex) 28))))
234 (and (<= #xac00 (car decomposition-pair))
235 (< (car decomposition-pair) #xd7a4)
236 (<= #x11a7 (cadr decomposition-pair))
237 (< (cadr decomposition-pair) #x11c3)
238 (= 0 (% (- (car decomposition-pair) #xac00) 28))
239 (let ((tindex (- (cadr decomposition-pair) #x11a7)))
240 (+ (car decomposition-pair) tindex))))))
241 (if (and char
242 (functionp composition-predicate)
243 (null (funcall composition-predicate char)))
244 nil char)))
245 )
246
247 (defvar ucs-normalize-combining-chars nil)
248 (setq ucs-normalize-combining-chars (eval-when-compile combining-chars))
249
250 (defvar ucs-normalize-combining-chars-regexp nil
251 "Regular expression to match sequence of combining characters.")
252 (setq ucs-normalize-combining-chars-regexp
253 (eval-when-compile (concat (regexp-opt (mapcar 'char-to-string combining-chars)) "+")))
254
255 (declare-function decomposition-translation-alist "ucs-normalize"
256 (decomposition-function))
257 (declare-function decomposition-char-recursively "ucs-normalize"
258 (char decomposition-function))
259 (declare-function alist-list-to-vector "ucs-normalize" (alist))
260
261 (eval-when-compile
262
263 (defun decomposition-translation-alist (decomposition-function)
264 (let (decomposition alist)
265 (mapc
266 (lambda (start-end)
267 (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
268 (setq decomposition (funcall decomposition-function char))
269 (if decomposition
270 (setq alist (cons (cons char
271 (apply 'append
272 (mapcar (lambda (x)
273 (decomposition-char-recursively
274 x decomposition-function))
275 decomposition)))
276 alist)))))
277 check-range)
278 alist))
279
280 (defun decomposition-char-recursively (char decomposition-function)
281 (let ((decomposition (funcall decomposition-function char)))
282 (if decomposition
283 (apply 'append
284 (mapcar (lambda (x)
285 (decomposition-char-recursively x decomposition-function))
286 decomposition))
287 (list char))))
288
289 (defun alist-list-to-vector (alist)
290 (mapcar (lambda (x) (cons (car x) (apply 'vector (cdr x)))) alist))
291
292 (defvar nfd-alist nil)
293 (setq nfd-alist (alist-list-to-vector (decomposition-translation-alist 'nfd)))
294 (defvar nfkd-alist nil)
295 (setq nfkd-alist (alist-list-to-vector (decomposition-translation-alist 'nfkd)))
296 (defvar hfs-nfd-alist nil)
297 (setq hfs-nfd-alist (alist-list-to-vector (decomposition-translation-alist 'hfs-nfd)))
298 )
299
300 (eval-and-compile
301 (defvar ucs-normalize-hangul-translation-alist nil)
302 (setq ucs-normalize-hangul-translation-alist
303 (let ((i 0) entries)
304 (while (< i 11172)
305 (setq entries
306 (cons (cons (+ #xac00 i)
307 (if (= 0 (% i 28))
308 (vector (+ #x1100 (/ i 588))
309 (+ #x1161 (/ (% i 588) 28)))
310 (vector (+ #x1100 (/ i 588))
311 (+ #x1161 (/ (% i 588) 28))
312 (+ #x11a7 (% i 28)))))
313 entries)
314 i (1+ i))) entries))
315
316 (defun ucs-normalize-make-translation-table-from-alist (alist)
317 (make-translation-table-from-alist
318 (append alist ucs-normalize-hangul-translation-alist)))
319
320 (define-translation-table 'ucs-normalize-nfd-table
321 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfd-alist)))
322 (define-translation-table 'ucs-normalize-nfkd-table
323 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfkd-alist)))
324 (define-translation-table 'ucs-normalize-hfs-nfd-table
325 (ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist)))
326
327 (defun ucs-normalize-sort (chars)
328 "Sort by canonical combining class of CHARS."
329 (sort chars
330 (lambda (ch1 ch2)
331 (< (ucs-normalize-ccc ch1) (ucs-normalize-ccc ch2)))))
332
333 (defun ucs-normalize-compose-chars (chars composition-predicate)
334 "Compose CHARS by COMPOSITION-PREDICATE.
335 CHARS must be sorted and normalized in starter-combining pairs."
336 (if composition-predicate
337 (let* ((starter (car chars))
338 remain result prev-ccc
339 (target-chars (cdr chars))
340 target target-ccc
341 primary-composite)
342 (while target-chars
343 (setq target (car target-chars)
344 target-ccc (ucs-normalize-ccc target))
345 (if (and (or (null prev-ccc)
346 (< prev-ccc target-ccc))
347 (setq primary-composite
348 (ucs-normalize-primary-composite (list starter target)
349 composition-predicate)))
350 ;; case 1: composable
351 (setq starter primary-composite
352 prev-ccc nil)
353 (if (= 0 target-ccc)
354 ;; case 2: move starter
355 (setq result (nconc result (cons starter (nreverse remain)))
356 starter target
357 remain nil)
358 ;; case 3: move target
359 (setq prev-ccc target-ccc
360 remain (cons target remain))))
361 (setq target-chars (cdr target-chars)))
362 (nconc result (cons starter (nreverse remain))))
363 chars))
364
365 (defun ucs-normalize-block-compose-chars (chars composition-predicate)
366 "Try composing CHARS by COMPOSITION-PREDICATE.
367 If COMPOSITION-PREDICATE is not given, then do nothing."
368 (let ((chars (ucs-normalize-sort chars)))
369 (if composition-predicate
370 (ucs-normalize-compose-chars chars composition-predicate)
371 chars)))
372 )
373
374 (declare-function quick-check-list "ucs-normalize"
375 (decomposition-translation &optional composition-predicate))
376 (declare-function quick-check-list-to-regexp "ucs-normalize" (quick-check-list))
377
378 (eval-when-compile
379
380 (defun quick-check-list (decomposition-translation
381 &optional composition-predicate)
382 "Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE.
383 It includes Singletons, CompositionExclusions, and Non-Starter
384 decomposition."
385 (let (entries decomposition composition)
386 (mapc
387 (lambda (start-end)
388 (do ((i (car start-end) (+ i 1))) ((> i (cdr start-end)))
389 (setq decomposition
390 (string-to-list
391 (with-temp-buffer
392 (insert i)
393 (translate-region 1 2 decomposition-translation)
394 (buffer-string))))
395 (setq composition
396 (ucs-normalize-block-compose-chars decomposition composition-predicate))
397 (when (not (equal composition (list i)))
398 (setq entries (cons i entries)))))
399 check-range)
400 ;;(remove-duplicates
401 (append entries
402 ucs-normalize-composition-exclusions
403 non-starter-decompositions)))
404 ;;)
405
406 (defvar nfd-quick-check-list nil)
407 (setq nfd-quick-check-list (quick-check-list 'ucs-normalize-nfd-table ))
408 (defvar nfc-quick-check-list nil)
409 (setq nfc-quick-check-list (quick-check-list 'ucs-normalize-nfd-table t ))
410 (defvar nfkd-quick-check-list nil)
411 (setq nfkd-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table ))
412 (defvar nfkc-quick-check-list nil)
413 (setq nfkc-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table t ))
414 (defvar hfs-nfd-quick-check-list nil)
415 (setq hfs-nfd-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table
416 'ucs-normalize-hfs-nfd-comp-p))
417 (defvar hfs-nfc-quick-check-list nil)
418 (setq hfs-nfc-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table t ))
419
420 (defun quick-check-list-to-regexp (quick-check-list)
421 (regexp-opt (mapcar 'char-to-string (append quick-check-list combining-chars))))
422
423 (defun quick-check-decomposition-list-to-regexp (quick-check-list)
424 (concat (quick-check-list-to-regexp quick-check-list) "\\|[가-힣]"))
425
426 (defun quick-check-composition-list-to-regexp (quick-check-list)
427 (concat (quick-check-list-to-regexp quick-check-list) "\\|[ᅡ-ᅵᆨ-ᇂ]"))
428 )
429
430
431 ;; NFD/NFC
432 (defvar ucs-normalize-nfd-quick-check-regexp nil)
433 (setq ucs-normalize-nfd-quick-check-regexp
434 (eval-when-compile (quick-check-decomposition-list-to-regexp nfd-quick-check-list)))
435 (defvar ucs-normalize-nfc-quick-check-regexp nil)
436 (setq ucs-normalize-nfc-quick-check-regexp
437 (eval-when-compile (quick-check-composition-list-to-regexp nfc-quick-check-list)))
438
439 ;; NFKD/NFKC
440 (defvar ucs-normalize-nfkd-quick-check-regexp nil)
441 (setq ucs-normalize-nfkd-quick-check-regexp
442 (eval-when-compile (quick-check-decomposition-list-to-regexp nfkd-quick-check-list)))
443 (defvar ucs-normalize-nfkc-quick-check-regexp nil)
444 (setq ucs-normalize-nfkc-quick-check-regexp
445 (eval-when-compile (quick-check-composition-list-to-regexp nfkc-quick-check-list)))
446
447 ;; HFS-NFD/HFS-NFC
448 (defvar ucs-normalize-hfs-nfd-quick-check-regexp nil)
449 (setq ucs-normalize-hfs-nfd-quick-check-regexp
450 (eval-when-compile (concat (quick-check-decomposition-list-to-regexp hfs-nfd-quick-check-list))))
451 (defvar ucs-normalize-hfs-nfc-quick-check-regexp nil)
452 (setq ucs-normalize-hfs-nfc-quick-check-regexp
453 (eval-when-compile (quick-check-composition-list-to-regexp hfs-nfc-quick-check-list)))
454
455 ;;------------------------------------------------------------------------------------------
456
457 ;; Normalize local region.
458
459 (defun ucs-normalize-block
460 (from to &optional decomposition-translation-table composition-predicate)
461 "Normalize region FROM TO, by sorting the region with canonical-cc.
462 If DECOMPOSITION-TRANSLATION-TABLE is given, translate region
463 before sorting. If COMPOSITION-PREDICATE is given, then compose
464 the region by using it."
465 (save-restriction
466 (narrow-to-region from to)
467 (goto-char (point-min))
468 (if decomposition-translation-table
469 (translate-region from to decomposition-translation-table))
470 (goto-char (point-min))
471 (let ((start (point)) chars); ccc)
472 (while (not (eobp))
473 (forward-char)
474 (when (or (eobp)
475 (= 0 (ucs-normalize-ccc (char-after (point)))))
476 (setq chars
477 (nconc chars
478 (ucs-normalize-block-compose-chars
479 (string-to-list (buffer-substring start (point)))
480 composition-predicate))
481 start (point)))
482 ;;(unless ccc (error "Undefined character can not be normalized!"))
483 )
484 (delete-region (point-min) (point-max))
485 (apply 'insert
486 (ucs-normalize-compose-chars
487 chars composition-predicate)))))
488
489 (defun ucs-normalize-region
490 (from to quick-check-regexp translation-table composition-predicate)
491 "Normalize region from FROM to TO.
492 QUICK-CHECK-REGEXP is applied for searching the region.
493 TRANSLATION-TABLE will be used to decompose region.
494 COMPOSITION-PREDICATE will be used to compose region."
495 (save-excursion
496 (save-restriction
497 (narrow-to-region from to)
498 (goto-char (point-min))
499 (let (start-pos starter)
500 (while (re-search-forward quick-check-regexp nil t)
501 (setq starter (string-to-char (match-string 0)))
502 (setq start-pos (match-beginning 0))
503 (ucs-normalize-block
504 ;; from
505 (if (or (= start-pos (point-min))
506 (and (= 0 (ucs-normalize-ccc starter))
507 (not (memq starter ucs-normalize-combining-chars))))
508 start-pos (1- start-pos))
509 ;; to
510 (if (looking-at ucs-normalize-combining-chars-regexp)
511 (match-end 0) (1+ start-pos))
512 translation-table composition-predicate))))))
513
514 ;; --------------------------------------------------------------------------------
515
516 (defmacro ucs-normalize-string (ucs-normalize-region)
517 `(with-temp-buffer
518 (insert str)
519 (,ucs-normalize-region (point-min) (point-max))
520 (buffer-string)))
521
522 ;;;###autoload
523 (defun ucs-normalize-NFD-region (from to)
524 "Normalize the current region by the Unicode NFD."
525 (interactive "r")
526 (ucs-normalize-region from to
527 ucs-normalize-nfd-quick-check-regexp
528 'ucs-normalize-nfd-table nil))
529 ;;;###autoload
530 (defun ucs-normalize-NFD-string (str)
531 "Normalize the string STR by the Unicode NFD."
532 (ucs-normalize-string ucs-normalize-NFD-region))
533
534 ;;;###autoload
535 (defun ucs-normalize-NFC-region (from to)
536 "Normalize the current region by the Unicode NFC."
537 (interactive "r")
538 (ucs-normalize-region from to
539 ucs-normalize-nfc-quick-check-regexp
540 'ucs-normalize-nfd-table t))
541 ;;;###autoload
542 (defun ucs-normalize-NFC-string (str)
543 "Normalize the string STR by the Unicode NFC."
544 (ucs-normalize-string ucs-normalize-NFC-region))
545
546 ;;;###autoload
547 (defun ucs-normalize-NFKD-region (from to)
548 "Normalize the current region by the Unicode NFKD."
549 (interactive "r")
550 (ucs-normalize-region from to
551 ucs-normalize-nfkd-quick-check-regexp
552 'ucs-normalize-nfkd-table nil))
553 ;;;###autoload
554 (defun ucs-normalize-NFKD-string (str)
555 "Normalize the string STR by the Unicode NFKD."
556 (ucs-normalize-string ucs-normalize-NFKD-region))
557
558 ;;;###autoload
559 (defun ucs-normalize-NFKC-region (from to)
560 "Normalize the current region by the Unicode NFKC."
561 (interactive "r")
562 (ucs-normalize-region from to
563 ucs-normalize-nfkc-quick-check-regexp
564 'ucs-normalize-nfkd-table t))
565 ;;;###autoload
566 (defun ucs-normalize-NFKC-string (str)
567 "Normalize the string STR by the Unicode NFKC."
568 (ucs-normalize-string ucs-normalize-NFKC-region))
569
570 ;;;###autoload
571 (defun ucs-normalize-HFS-NFD-region (from to)
572 "Normalize the current region by the Unicode NFD and Mac OS's HFS Plus."
573 (interactive "r")
574 (ucs-normalize-region from to
575 ucs-normalize-hfs-nfd-quick-check-regexp
576 'ucs-normalize-hfs-nfd-table
577 'ucs-normalize-hfs-nfd-comp-p))
578 ;;;###autoload
579 (defun ucs-normalize-HFS-NFD-string (str)
580 "Normalize the string STR by the Unicode NFD and Mac OS's HFS Plus."
581 (ucs-normalize-string ucs-normalize-HFS-NFD-region))
582 ;;;###autoload
583 (defun ucs-normalize-HFS-NFC-region (from to)
584 "Normalize the current region by the Unicode NFC and Mac OS's HFS Plus."
585 (interactive "r")
586 (ucs-normalize-region from to
587 ucs-normalize-hfs-nfc-quick-check-regexp
588 'ucs-normalize-hfs-nfd-table t))
589 ;;;###autoload
590 (defun ucs-normalize-HFS-NFC-string (str)
591 "Normalize the string STR by the Unicode NFC and Mac OS's HFS Plus."
592 (ucs-normalize-string ucs-normalize-HFS-NFC-region))
593
594 ;; Post-read-conversion function for `utf-8-hfs'.
595 (defun ucs-normalize-hfs-nfd-post-read-conversion (len)
596 (save-excursion
597 (save-restriction
598 (narrow-to-region (point) (+ (point) len))
599 (ucs-normalize-HFS-NFC-region (point-min) (point-max))
600 (- (point-max) (point-min)))))
601
602 ;; Pre-write conversion for `utf-8-hfs'.
603 (defun ucs-normalize-hfs-nfd-pre-write-conversion (from to)
604 (let ((old-buf (current-buffer)))
605 (set-buffer (generate-new-buffer " *temp*"))
606 (if (stringp from)
607 (insert from)
608 (insert-buffer-substring old-buf from to))
609 (ucs-normalize-HFS-NFD-region (point-min) (point-max))
610 nil))
611
612 ;;; coding-system definition
613 (define-coding-system 'utf-8-hfs
614 "UTF-8 based coding system for MacOS HFS file names.
615 The singleton characters in HFS normalization exclusion will not
616 be decomposed."
617 :coding-type 'utf-8
618 :mnemonic ?U
619 :charset-list '(unicode)
620 :post-read-conversion 'ucs-normalize-hfs-nfd-post-read-conversion
621 :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion
622 )
623
624 (provide 'ucs-normalize)
625
626 ;; Local Variables:
627 ;; coding: utf-8
628 ;; End:
629
630 ;;; ucs-normalize.el ends here