]> code.delx.au - gnu-emacs/blob - lisp/international/ucs-normalize.el
Add 2012 to FSF copyright years for Emacs files (do not merge to trunk)
[gnu-emacs] / lisp / international / ucs-normalize.el
1 ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC
2
3 ;; Copyright (C) 2009, 2010, 2011, 2012
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: Taichi Kawabata <kawabata.taichi@gmail.com>
7 ;; Keywords: unicode, normalization
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23
24 ;;; Commentary:
25 ;;
26 ;; This program has passed the NormalizationTest-5.2.0.txt.
27 ;;
28 ;; References:
29 ;; http://www.unicode.org/reports/tr15/
30 ;; http://www.unicode.org/review/pr-29.html
31 ;;
32 ;; HFS-Normalization:
33 ;; Reference:
34 ;; http://developer.apple.com/technotes/tn/tn1150.html
35 ;;
36 ;; HFS Normalization excludes following area for decomposition.
37 ;;
38 ;; U+02000 .. U+02FFF :: Punctuation, symbols, dingbats, arrows, etc.
39 ;; (Characters in this region will be composed.)
40 ;; U+0F900 .. U+0FAFF :: CJK compatibility Ideographs.
41 ;; U+2F800 .. U+2FFFF :: CJK compatibility Ideographs.
42 ;;
43 ;; HFS-Normalization is useful for normalizing text involving CJK Ideographs.
44 ;;
45 ;;;
46 ;;; Implementation Notes on NFC/HFS-NFC.
47 ;;;
48 ;;
49 ;; <Stages> Decomposition Composition
50 ;; NFD: 'nfd nil
51 ;; NFC: 'nfd t
52 ;; NFKD: 'nfkd nil
53 ;; NFKC: 'nfkd t
54 ;; HFS-NFD: 'hfs-nfd 'hfs-nfd-comp-p
55 ;; HFS-NFC: 'hfs-nfd t
56 ;;
57 ;; Algorithm for Normalization
58 ;;
59 ;; Before normalization, following data will be prepared.
60 ;;
61 ;; 1. quick-check-list
62 ;;
63 ;; `quick-check-list' consists of characters that will be decomposed
64 ;; during normalization. It includes composition-exclusions,
65 ;; singletons, non-starter-decompositions and decomposable
66 ;; characters.
67 ;;
68 ;; `quick-check-regexp' will search the above characters plus
69 ;; combining characters.
70 ;;
71 ;; 2. decomposition-translation
72 ;;
73 ;; `decomposition-translation' is a translation table that will be
74 ;; used to decompose the characters.
75 ;;
76 ;;
77 ;; Normalization Process
78 ;;
79 ;; A. Searching (`ucs-normalize-region')
80 ;;
81 ;; Region is searched for `quick-check-regexp' to find possibly
82 ;; normalizable point.
83 ;;
84 ;; B. Identification of Normalization Block
85 ;;
86 ;; (1) start of the block
87 ;; If the searched character is a starter and not combining
88 ;; with previous character, then the beginning of the block is
89 ;; the searched character. If searched character is combining
90 ;; character, then previous character will be the target
91 ;; character
92 ;; (2) end of the block
93 ;; Block ends at non-composable starter character.
94 ;;
95 ;; C. Decomposition (`ucs-normalize-block')
96 ;;
97 ;; The entire block will be decomposed by
98 ;; `decomposition-translation' table.
99 ;;
100 ;; D. Sorting and Composition of Smaller Blocks (`ucs-normalize-block-compose-chars')
101 ;;
102 ;; The block will be split to multiple samller blocks by starter
103 ;; characters. Each block is sorted, and composed if necessary.
104 ;;
105 ;; E. Composition of Entire Block (`ucs-normalize-compose-chars')
106 ;;
107 ;; Composed blocks are collected and again composed.
108
109 ;;; Code:
110
111 (defconst ucs-normalize-version "1.2")
112
113 (eval-when-compile (require 'cl))
114
115 (declare-function nfd "ucs-normalize" (char))
116
117 (eval-when-compile
118
119 (defconst ucs-normalize-composition-exclusions
120 '(#x0958 #x0959 #x095A #x095B #x095C #x095D #x095E #x095F
121 #x09DC #x09DD #x09DF #x0A33 #x0A36 #x0A59 #x0A5A #x0A5B
122 #x0A5E #x0B5C #x0B5D #x0F43 #x0F4D #x0F52 #x0F57 #x0F5C
123 #x0F69 #x0F76 #x0F78 #x0F93 #x0F9D #x0FA2 #x0FA7 #x0FAC
124 #x0FB9 #xFB1D #xFB1F #xFB2A #xFB2B #xFB2C #xFB2D #xFB2E
125 #xFB2F #xFB30 #xFB31 #xFB32 #xFB33 #xFB34 #xFB35 #xFB36
126 #xFB38 #xFB39 #xFB3A #xFB3B #xFB3C #xFB3E #xFB40 #xFB41
127 #xFB43 #xFB44 #xFB46 #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B
128 #xFB4C #xFB4D #xFB4E #x2ADC #x1D15E #x1D15F #x1D160 #x1D161
129 #x1D162 #x1D163 #x1D164 #x1D1BB #x1D1BC #x1D1BD #x1D1BE
130 #x1D1BF #x1D1C0)
131 "Composition Exclusion List.
132 This list is taken from
133 http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt")
134
135 ;; Unicode ranges that decompositions & combinings are defined.
136 (defvar check-range nil)
137 (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff)))
138
139 ;; Basic normalization functions
140 (defun nfd (char)
141 (let ((decomposition
142 (get-char-code-property char 'decomposition)))
143 (if (and decomposition (numberp (car decomposition)))
144 decomposition)))
145
146 (defun nfkd (char)
147 (let ((decomposition
148 (get-char-code-property char 'decomposition)))
149 (if (symbolp (car decomposition)) (cdr decomposition)
150 decomposition)))
151
152 (defun hfs-nfd (char)
153 (when (or (and (>= char 0) (< char #x2000))
154 (and (>= char #x3000) (< char #xf900))
155 (and (>= char #xfb00) (< char #x2f800))
156 (>= char #x30000))
157 (nfd char))))
158
159 (eval-and-compile
160 (defun ucs-normalize-hfs-nfd-comp-p (char)
161 (and (>= char #x2000) (< char #x3000)))
162
163 (defsubst ucs-normalize-ccc (char)
164 (get-char-code-property char 'canonical-combining-class))
165 )
166
167 ;; Data common to all normalizations
168
169 (eval-when-compile
170
171 (defvar combining-chars nil)
172 (setq combining-chars nil)
173 (defvar decomposition-pair-to-composition nil)
174 (setq decomposition-pair-to-composition nil)
175 (defvar non-starter-decompositions nil)
176 (setq non-starter-decompositions nil)
177 (let ((char 0) ccc decomposition)
178 (mapc
179 (lambda (start-end)
180 (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
181 (setq ccc (ucs-normalize-ccc char))
182 (setq decomposition (get-char-code-property
183 char 'decomposition))
184 (if (and ccc (/= 0 ccc)) (add-to-list 'combining-chars char))
185 (if (and (numberp (car decomposition))
186 (/= (ucs-normalize-ccc (car decomposition))
187 0))
188 (add-to-list 'non-starter-decompositions char))
189 (when (numberp (car decomposition))
190 (if (and (= 2 (length decomposition))
191 (null (memq char ucs-normalize-composition-exclusions))
192 (null (memq char non-starter-decompositions)))
193 (setq decomposition-pair-to-composition
194 (cons (cons decomposition char)
195 decomposition-pair-to-composition)))
196 ;; If not singleton decomposition, second and later characters in
197 ;; decomposition will be the subject of combining characters.
198 (if (cdr decomposition)
199 (dolist (char (cdr decomposition))
200 (add-to-list 'combining-chars char))))))
201 check-range))
202
203 (setq combining-chars
204 (append combining-chars
205 '(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ
206 ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
207 ?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ
208 ?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ)))
209 )
210
211 (eval-and-compile
212 (defun ucs-normalize-make-hash-table-from-alist (alist)
213 (let ((table (make-hash-table :test 'equal :size 2000)))
214 (mapc (lambda (x) (puthash (car x) (cdr x) table)) alist)
215 table))
216
217 (defvar ucs-normalize-decomposition-pair-to-primary-composite nil
218 "Hashtable of decomposed pair to primary composite.
219 Note that Hangul are excluded.")
220 (setq ucs-normalize-decomposition-pair-to-primary-composite
221 (ucs-normalize-make-hash-table-from-alist
222 (eval-when-compile decomposition-pair-to-composition)))
223
224 (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate)
225 "Convert DECOMPOSITION-PAIR to primay composite using COMPOSITION-PREDICATE."
226 (let ((char (or (gethash decomposition-pair
227 ucs-normalize-decomposition-pair-to-primary-composite)
228 (and (<= #x1100 (car decomposition-pair))
229 (< (car decomposition-pair) #x1113)
230 (<= #x1161 (cadr decomposition-pair))
231 (< (car decomposition-pair) #x1176)
232 (let ((lindex (- (car decomposition-pair) #x1100))
233 (vindex (- (cadr decomposition-pair) #x1161)))
234 (+ #xAC00 (* (+ (* lindex 21) vindex) 28))))
235 (and (<= #xac00 (car decomposition-pair))
236 (< (car decomposition-pair) #xd7a4)
237 (<= #x11a7 (cadr decomposition-pair))
238 (< (cadr decomposition-pair) #x11c3)
239 (= 0 (% (- (car decomposition-pair) #xac00) 28))
240 (let ((tindex (- (cadr decomposition-pair) #x11a7)))
241 (+ (car decomposition-pair) tindex))))))
242 (if (and char
243 (functionp composition-predicate)
244 (null (funcall composition-predicate char)))
245 nil char)))
246 )
247
248 (defvar ucs-normalize-combining-chars nil)
249 (setq ucs-normalize-combining-chars (eval-when-compile combining-chars))
250
251 (defvar ucs-normalize-combining-chars-regexp nil
252 "Regular expression to match sequence of combining characters.")
253 (setq ucs-normalize-combining-chars-regexp
254 (eval-when-compile (concat (regexp-opt (mapcar 'char-to-string combining-chars)) "+")))
255
256 (declare-function decomposition-translation-alist "ucs-normalize"
257 (decomposition-function))
258 (declare-function decomposition-char-recursively "ucs-normalize"
259 (char decomposition-function))
260 (declare-function alist-list-to-vector "ucs-normalize" (alist))
261
262 (eval-when-compile
263
264 (defun decomposition-translation-alist (decomposition-function)
265 (let (decomposition alist)
266 (mapc
267 (lambda (start-end)
268 (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
269 (setq decomposition (funcall decomposition-function char))
270 (if decomposition
271 (setq alist (cons (cons char
272 (apply 'append
273 (mapcar (lambda (x)
274 (decomposition-char-recursively
275 x decomposition-function))
276 decomposition)))
277 alist)))))
278 check-range)
279 alist))
280
281 (defun decomposition-char-recursively (char decomposition-function)
282 (let ((decomposition (funcall decomposition-function char)))
283 (if decomposition
284 (apply 'append
285 (mapcar (lambda (x)
286 (decomposition-char-recursively x decomposition-function))
287 decomposition))
288 (list char))))
289
290 (defun alist-list-to-vector (alist)
291 (mapcar (lambda (x) (cons (car x) (apply 'vector (cdr x)))) alist))
292
293 (defvar nfd-alist nil)
294 (setq nfd-alist (alist-list-to-vector (decomposition-translation-alist 'nfd)))
295 (defvar nfkd-alist nil)
296 (setq nfkd-alist (alist-list-to-vector (decomposition-translation-alist 'nfkd)))
297 (defvar hfs-nfd-alist nil)
298 (setq hfs-nfd-alist (alist-list-to-vector (decomposition-translation-alist 'hfs-nfd)))
299 )
300
301 (eval-and-compile
302 (defvar ucs-normalize-hangul-translation-alist nil)
303 (setq ucs-normalize-hangul-translation-alist
304 (let ((i 0) entries)
305 (while (< i 11172)
306 (setq entries
307 (cons (cons (+ #xac00 i)
308 (if (= 0 (% i 28))
309 (vector (+ #x1100 (/ i 588))
310 (+ #x1161 (/ (% i 588) 28)))
311 (vector (+ #x1100 (/ i 588))
312 (+ #x1161 (/ (% i 588) 28))
313 (+ #x11a7 (% i 28)))))
314 entries)
315 i (1+ i))) entries))
316
317 (defun ucs-normalize-make-translation-table-from-alist (alist)
318 (make-translation-table-from-alist
319 (append alist ucs-normalize-hangul-translation-alist)))
320
321 (define-translation-table 'ucs-normalize-nfd-table
322 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfd-alist)))
323 (define-translation-table 'ucs-normalize-nfkd-table
324 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfkd-alist)))
325 (define-translation-table 'ucs-normalize-hfs-nfd-table
326 (ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist)))
327
328 (defun ucs-normalize-sort (chars)
329 "Sort by canonical combining class of CHARS."
330 (sort chars
331 (lambda (ch1 ch2)
332 (< (ucs-normalize-ccc ch1) (ucs-normalize-ccc ch2)))))
333
334 (defun ucs-normalize-compose-chars (chars composition-predicate)
335 "Compose CHARS by COMPOSITION-PREDICATE.
336 CHARS must be sorted and normalized in starter-combining pairs."
337 (if composition-predicate
338 (let* ((starter (car chars))
339 remain result prev-ccc
340 (target-chars (cdr chars))
341 target target-ccc
342 primary-composite)
343 (while target-chars
344 (setq target (car target-chars)
345 target-ccc (ucs-normalize-ccc target))
346 (if (and (or (null prev-ccc)
347 (< prev-ccc target-ccc))
348 (setq primary-composite
349 (ucs-normalize-primary-composite (list starter target)
350 composition-predicate)))
351 ;; case 1: composable
352 (setq starter primary-composite
353 prev-ccc nil)
354 (if (= 0 target-ccc)
355 ;; case 2: move starter
356 (setq result (nconc result (cons starter (nreverse remain)))
357 starter target
358 remain nil)
359 ;; case 3: move target
360 (setq prev-ccc target-ccc
361 remain (cons target remain))))
362 (setq target-chars (cdr target-chars)))
363 (nconc result (cons starter (nreverse remain))))
364 chars))
365
366 (defun ucs-normalize-block-compose-chars (chars composition-predicate)
367 "Try composing CHARS by COMPOSITION-PREDICATE.
368 If COMPOSITION-PREDICATE is not given, then do nothing."
369 (let ((chars (ucs-normalize-sort chars)))
370 (if composition-predicate
371 (ucs-normalize-compose-chars chars composition-predicate)
372 chars)))
373 )
374
375 (declare-function quick-check-list "ucs-normalize"
376 (decomposition-translation &optional composition-predicate))
377 (declare-function quick-check-list-to-regexp "ucs-normalize" (quick-check-list))
378
379 (eval-when-compile
380
381 (defun quick-check-list (decomposition-translation
382 &optional composition-predicate)
383 "Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE.
384 It includes Singletons, CompositionExclusions, and Non-Starter
385 decomposition."
386 (let (entries decomposition composition)
387 (mapc
388 (lambda (start-end)
389 (do ((i (car start-end) (+ i 1))) ((> i (cdr start-end)))
390 (setq decomposition
391 (string-to-list
392 (with-temp-buffer
393 (insert i)
394 (translate-region 1 2 decomposition-translation)
395 (buffer-string))))
396 (setq composition
397 (ucs-normalize-block-compose-chars decomposition composition-predicate))
398 (when (not (equal composition (list i)))
399 (setq entries (cons i entries)))))
400 check-range)
401 ;;(remove-duplicates
402 (append entries
403 ucs-normalize-composition-exclusions
404 non-starter-decompositions)))
405 ;;)
406
407 (defvar nfd-quick-check-list nil)
408 (setq nfd-quick-check-list (quick-check-list 'ucs-normalize-nfd-table ))
409 (defvar nfc-quick-check-list nil)
410 (setq nfc-quick-check-list (quick-check-list 'ucs-normalize-nfd-table t ))
411 (defvar nfkd-quick-check-list nil)
412 (setq nfkd-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table ))
413 (defvar nfkc-quick-check-list nil)
414 (setq nfkc-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table t ))
415 (defvar hfs-nfd-quick-check-list nil)
416 (setq hfs-nfd-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table
417 'ucs-normalize-hfs-nfd-comp-p))
418 (defvar hfs-nfc-quick-check-list nil)
419 (setq hfs-nfc-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table t ))
420
421 (defun quick-check-list-to-regexp (quick-check-list)
422 (regexp-opt (mapcar 'char-to-string (append quick-check-list combining-chars))))
423
424 (defun quick-check-decomposition-list-to-regexp (quick-check-list)
425 (concat (quick-check-list-to-regexp quick-check-list) "\\|[가-힣]"))
426
427 (defun quick-check-composition-list-to-regexp (quick-check-list)
428 (concat (quick-check-list-to-regexp quick-check-list) "\\|[ᅡ-ᅵᆨ-ᇂ]"))
429 )
430
431
432 ;; NFD/NFC
433 (defvar ucs-normalize-nfd-quick-check-regexp nil)
434 (setq ucs-normalize-nfd-quick-check-regexp
435 (eval-when-compile (quick-check-decomposition-list-to-regexp nfd-quick-check-list)))
436 (defvar ucs-normalize-nfc-quick-check-regexp nil)
437 (setq ucs-normalize-nfc-quick-check-regexp
438 (eval-when-compile (quick-check-composition-list-to-regexp nfc-quick-check-list)))
439
440 ;; NFKD/NFKC
441 (defvar ucs-normalize-nfkd-quick-check-regexp nil)
442 (setq ucs-normalize-nfkd-quick-check-regexp
443 (eval-when-compile (quick-check-decomposition-list-to-regexp nfkd-quick-check-list)))
444 (defvar ucs-normalize-nfkc-quick-check-regexp nil)
445 (setq ucs-normalize-nfkc-quick-check-regexp
446 (eval-when-compile (quick-check-composition-list-to-regexp nfkc-quick-check-list)))
447
448 ;; HFS-NFD/HFS-NFC
449 (defvar ucs-normalize-hfs-nfd-quick-check-regexp nil)
450 (setq ucs-normalize-hfs-nfd-quick-check-regexp
451 (eval-when-compile (concat (quick-check-decomposition-list-to-regexp hfs-nfd-quick-check-list))))
452 (defvar ucs-normalize-hfs-nfc-quick-check-regexp nil)
453 (setq ucs-normalize-hfs-nfc-quick-check-regexp
454 (eval-when-compile (quick-check-composition-list-to-regexp hfs-nfc-quick-check-list)))
455
456 ;;------------------------------------------------------------------------------------------
457
458 ;; Normalize local region.
459
460 (defun ucs-normalize-block
461 (from to &optional decomposition-translation-table composition-predicate)
462 "Normalize region FROM TO, by sorting the region with canonical-cc.
463 If DECOMPOSITION-TRANSLATION-TABLE is given, translate region
464 before sorting. If COMPOSITION-PREDICATE is given, then compose
465 the region by using it."
466 (save-restriction
467 (narrow-to-region from to)
468 (goto-char (point-min))
469 (if decomposition-translation-table
470 (translate-region from to decomposition-translation-table))
471 (goto-char (point-min))
472 (let ((start (point)) chars); ccc)
473 (while (not (eobp))
474 (forward-char)
475 (when (or (eobp)
476 (= 0 (ucs-normalize-ccc (char-after (point)))))
477 (setq chars
478 (nconc chars
479 (ucs-normalize-block-compose-chars
480 (string-to-list (buffer-substring start (point)))
481 composition-predicate))
482 start (point)))
483 ;;(unless ccc (error "Undefined character can not be normalized!"))
484 )
485 (delete-region (point-min) (point-max))
486 (apply 'insert
487 (ucs-normalize-compose-chars
488 chars composition-predicate)))))
489
490 (defun ucs-normalize-region
491 (from to quick-check-regexp translation-table composition-predicate)
492 "Normalize region from FROM to TO.
493 QUICK-CHECK-REGEXP is applied for searching the region.
494 TRANSLATION-TABLE will be used to decompose region.
495 COMPOSITION-PREDICATE will be used to compose region."
496 (save-excursion
497 (save-restriction
498 (narrow-to-region from to)
499 (goto-char (point-min))
500 (let (start-pos starter)
501 (while (re-search-forward quick-check-regexp nil t)
502 (setq starter (string-to-char (match-string 0)))
503 (setq start-pos (match-beginning 0))
504 (ucs-normalize-block
505 ;; from
506 (if (or (= start-pos (point-min))
507 (and (= 0 (ucs-normalize-ccc starter))
508 (not (memq starter ucs-normalize-combining-chars))))
509 start-pos (1- start-pos))
510 ;; to
511 (if (looking-at ucs-normalize-combining-chars-regexp)
512 (match-end 0) (1+ start-pos))
513 translation-table composition-predicate))))))
514
515 ;; --------------------------------------------------------------------------------
516
517 (defmacro ucs-normalize-string (ucs-normalize-region)
518 `(with-temp-buffer
519 (insert str)
520 (,ucs-normalize-region (point-min) (point-max))
521 (buffer-string)))
522
523 ;;;###autoload
524 (defun ucs-normalize-NFD-region (from to)
525 "Normalize the current region by the Unicode NFD."
526 (interactive "r")
527 (ucs-normalize-region from to
528 ucs-normalize-nfd-quick-check-regexp
529 'ucs-normalize-nfd-table nil))
530 ;;;###autoload
531 (defun ucs-normalize-NFD-string (str)
532 "Normalize the string STR by the Unicode NFD."
533 (ucs-normalize-string ucs-normalize-NFD-region))
534
535 ;;;###autoload
536 (defun ucs-normalize-NFC-region (from to)
537 "Normalize the current region by the Unicode NFC."
538 (interactive "r")
539 (ucs-normalize-region from to
540 ucs-normalize-nfc-quick-check-regexp
541 'ucs-normalize-nfd-table t))
542 ;;;###autoload
543 (defun ucs-normalize-NFC-string (str)
544 "Normalize the string STR by the Unicode NFC."
545 (ucs-normalize-string ucs-normalize-NFC-region))
546
547 ;;;###autoload
548 (defun ucs-normalize-NFKD-region (from to)
549 "Normalize the current region by the Unicode NFKD."
550 (interactive "r")
551 (ucs-normalize-region from to
552 ucs-normalize-nfkd-quick-check-regexp
553 'ucs-normalize-nfkd-table nil))
554 ;;;###autoload
555 (defun ucs-normalize-NFKD-string (str)
556 "Normalize the string STR by the Unicode NFKD."
557 (ucs-normalize-string ucs-normalize-NFKD-region))
558
559 ;;;###autoload
560 (defun ucs-normalize-NFKC-region (from to)
561 "Normalize the current region by the Unicode NFKC."
562 (interactive "r")
563 (ucs-normalize-region from to
564 ucs-normalize-nfkc-quick-check-regexp
565 'ucs-normalize-nfkd-table t))
566 ;;;###autoload
567 (defun ucs-normalize-NFKC-string (str)
568 "Normalize the string STR by the Unicode NFKC."
569 (ucs-normalize-string ucs-normalize-NFKC-region))
570
571 ;;;###autoload
572 (defun ucs-normalize-HFS-NFD-region (from to)
573 "Normalize the current region by the Unicode NFD and Mac OS's HFS Plus."
574 (interactive "r")
575 (ucs-normalize-region from to
576 ucs-normalize-hfs-nfd-quick-check-regexp
577 'ucs-normalize-hfs-nfd-table
578 'ucs-normalize-hfs-nfd-comp-p))
579 ;;;###autoload
580 (defun ucs-normalize-HFS-NFD-string (str)
581 "Normalize the string STR by the Unicode NFD and Mac OS's HFS Plus."
582 (ucs-normalize-string ucs-normalize-HFS-NFD-region))
583 ;;;###autoload
584 (defun ucs-normalize-HFS-NFC-region (from to)
585 "Normalize the current region by the Unicode NFC and Mac OS's HFS Plus."
586 (interactive "r")
587 (ucs-normalize-region from to
588 ucs-normalize-hfs-nfc-quick-check-regexp
589 'ucs-normalize-hfs-nfd-table t))
590 ;;;###autoload
591 (defun ucs-normalize-HFS-NFC-string (str)
592 "Normalize the string STR by the Unicode NFC and Mac OS's HFS Plus."
593 (ucs-normalize-string ucs-normalize-HFS-NFC-region))
594
595 ;; Post-read-conversion function for `utf-8-hfs'.
596 (defun ucs-normalize-hfs-nfd-post-read-conversion (len)
597 (save-excursion
598 (save-restriction
599 (narrow-to-region (point) (+ (point) len))
600 (ucs-normalize-HFS-NFC-region (point-min) (point-max))
601 (- (point-max) (point-min)))))
602
603 ;; Pre-write conversion for `utf-8-hfs'.
604 (defun ucs-normalize-hfs-nfd-pre-write-conversion (from to)
605 (let ((old-buf (current-buffer)))
606 (set-buffer (generate-new-buffer " *temp*"))
607 (if (stringp from)
608 (insert from)
609 (insert-buffer-substring old-buf from to))
610 (ucs-normalize-HFS-NFD-region (point-min) (point-max))
611 nil))
612
613 ;;; coding-system definition
614 (define-coding-system 'utf-8-hfs
615 "UTF-8 based coding system for MacOS HFS file names.
616 The singleton characters in HFS normalization exclusion will not
617 be decomposed."
618 :coding-type 'utf-8
619 :mnemonic ?U
620 :charset-list '(unicode)
621 :post-read-conversion 'ucs-normalize-hfs-nfd-post-read-conversion
622 :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion
623 )
624
625 (provide 'ucs-normalize)
626
627 ;; Local Variables:
628 ;; coding: utf-8
629 ;; End:
630
631 ;; arch-tag: cef65ae7-71ad-4e19-8da8-56ab4d42aaa4
632 ;;; ucs-normalize.el ends here