]> code.delx.au - gnu-emacs/blob - lisp/international/ucs-normalize.el
(ucs-normalize-version): Changed to 1.1.
[gnu-emacs] / lisp / international / ucs-normalize.el
1 ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC
2
3 ;; Copyright (C) 2009
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: Taichi Kawabata <kawabata.taichi@gmail.com>
7 ;; Keywords: unicode, normalization
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23
24 ;;; Commentary:
25 ;;
26 ;; This program has passed the NormalizationTest-5.1.0.txt.
27 ;;
28 ;; References:
29 ;; http://www.unicode.org/reports/tr15/
30 ;; http://www.unicode.org/review/pr-29.html
31 ;;
32 ;; HFS-Normalization:
33 ;; Reference:
34 ;; http://developer.apple.com/technotes/tn/tn1150.html
35 ;;
36 ;; HFS Normalization excludes following area for decomposition.
37 ;;
38 ;; U+02000 .. U+02FFF :: Punctuation, symbols, dingbats, arrows, etc.
39 ;; (Characters in this region will be composed.)
40 ;; U+0F900 .. U+0FAFF :: CJK compatibility Ideographs.
41 ;; U+2F800 .. U+2FFFF :: CJK compatibility Ideographs.
42 ;;
43 ;; HFS-Normalization is useful for normalizing text involving CJK Ideographs.
44 ;;
45 ;;;
46 ;;; Implementation Notes on NFC/HFS-NFC.
47 ;;;
48 ;;
49 ;; <Stages> Decomposition Composition
50 ;; NFD: 'nfd nil
51 ;; NFC: 'nfd t
52 ;; NFKD: 'nfkd nil
53 ;; NFKC: 'nfkd t
54 ;; HFS-NFD: 'hfs-nfd 'hfs-nfd-comp-p
55 ;; HFS-NFC: 'hfs-nfd t
56 ;;
57 ;; Algorithm for Normalization
58 ;;
59 ;; Before normalization, following data will be prepared.
60 ;;
61 ;; 1. quick-check-list
62 ;;
63 ;; `quick-check-list' consists of characters that will be decomposed
64 ;; during normalization. It includes composition-exclusions,
65 ;; singletons, non-starter-decompositions and decomposable
66 ;; characters.
67 ;;
68 ;; `quick-check-regexp' will search the above characters plus
69 ;; combining characters.
70 ;;
71 ;; 2. decomposition-translation
72 ;;
73 ;; `decomposition-translation' is a translation table that will be
74 ;; used to decompose the characters.
75 ;;
76 ;;
77 ;; Normalization Process
78 ;;
79 ;; A. Searching (`ucs-normalize-region')
80 ;;
81 ;; Region is searched for `quick-check-regexp' to find possibly
82 ;; normalizable point.
83 ;;
84 ;; B. Identification of Normalization Block
85 ;;
86 ;; (1) start of the block
87 ;; If the searched character is a starter and not combining
88 ;; with previous character, then the beginning of the block is
89 ;; the searched character. If searched character is combining
90 ;; character, then previous character will be the target
91 ;; character
92 ;; (2) end of the block
93 ;; Block ends at non-composable starter character.
94 ;;
95 ;; C. Decomposition (`ucs-normalize-block')
96 ;;
97 ;; The entire block will be decomposed by
98 ;; `decomposition-translation' table.
99 ;;
100 ;; D. Sorting and Composition of Smaller Blocks (`ucs-normalize-block-compose-chars')
101 ;;
102 ;; The block will be split to multiple samller blocks by starter
103 ;; charcters. Each block is sorted, and composed if necessary.
104 ;;
105 ;; E. Composition of Entire Block (`ucs-normalize-compose-chars')
106 ;;
107 ;; Composed blocks are collected and again composed.
108
109 ;;; Code:
110
111 (defconst ucs-normalize-version "1.1")
112
113 (eval-when-compile (require 'cl))
114
115 (eval-when-compile
116
117 (defconst ucs-normalize-composition-exclusions
118 '(#x0958 #x0959 #x095A #x095B #x095C #x095D #x095E #x095F
119 #x09DC #x09DD #x09DF #x0A33 #x0A36 #x0A59 #x0A5A #x0A5B
120 #x0A5E #x0B5C #x0B5D #x0F43 #x0F4D #x0F52 #x0F57 #x0F5C
121 #x0F69 #x0F76 #x0F78 #x0F93 #x0F9D #x0FA2 #x0FA7 #x0FAC
122 #x0FB9 #xFB1D #xFB1F #xFB2A #xFB2B #xFB2C #xFB2D #xFB2E
123 #xFB2F #xFB30 #xFB31 #xFB32 #xFB33 #xFB34 #xFB35 #xFB36
124 #xFB38 #xFB39 #xFB3A #xFB3B #xFB3C #xFB3E #xFB40 #xFB41
125 #xFB43 #xFB44 #xFB46 #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B
126 #xFB4C #xFB4D #xFB4E #x2ADC #x1D15E #x1D15F #x1D160 #x1D161
127 #x1D162 #x1D163 #x1D164 #x1D1BB #x1D1BC #x1D1BD #x1D1BE
128 #x1D1BF #x1D1C0)
129 "Composition Exclusion List.
130 This list is taken from
131 http://www.unicode.org/Public/UNIDATA/CompositionExclusions-5.1.0.txt")
132
133 ;; Unicode ranges that decompositions & combinings are defined.
134 (defvar check-range nil)
135 (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x10fff) (#x1d000 . #x1dfff) (#x2f800 . #x2faff)))
136
137 ;; Basic normalization functions
138 (defun nfd (char)
139 (let ((decomposition
140 (get-char-code-property char 'decomposition)))
141 (if (and decomposition (numberp (car decomposition)))
142 decomposition)))
143
144 (defun nfkd (char)
145 (let ((decomposition
146 (get-char-code-property char 'decomposition)))
147 (if (symbolp (car decomposition)) (cdr decomposition)
148 decomposition)))
149
150 (defun hfs-nfd (char)
151 (when (or (and (>= char 0) (< char #x2000))
152 (and (>= char #x3000) (< char #xf900))
153 (and (>= char #xfb00) (< char #x2f800))
154 (>= char #x30000))
155 (nfd char))))
156
157 (eval-and-compile
158 (defun ucs-normalize-hfs-nfd-comp-p (char)
159 (and (>= char #x2000) (< char #x3000)))
160
161 (defsubst ucs-normalize-ccc (char)
162 (get-char-code-property char 'canonical-combining-class))
163 )
164
165 ;; Data common to all normalizations
166
167 (eval-when-compile
168
169 (defvar combining-chars nil)
170 (setq combining-chars nil)
171 (defvar decomposition-pair-to-composition nil)
172 (setq decomposition-pair-to-composition nil)
173 (defvar non-starter-decompositions nil)
174 (setq non-starter-decompositions nil)
175 (let ((char 0) ccc decomposition)
176 (mapc
177 (lambda (start-end)
178 (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
179 (setq ccc (ucs-normalize-ccc char))
180 (setq decomposition (get-char-code-property
181 char 'decomposition))
182 (if (and ccc (/= 0 ccc)) (add-to-list 'combining-chars char))
183 (if (and (numberp (car decomposition))
184 (/= (ucs-normalize-ccc (car decomposition))
185 0))
186 (add-to-list 'non-starter-decompositions char))
187 (when (numberp (car decomposition))
188 (if (and (= 2 (length decomposition))
189 (null (memq char ucs-normalize-composition-exclusions))
190 (null (memq char non-starter-decompositions)))
191 (setq decomposition-pair-to-composition
192 (cons (cons decomposition char)
193 decomposition-pair-to-composition)))
194 ;; If not singleton decomposition, second and later characters in
195 ;; decomposition will be the subject of combining characters.
196 (if (cdr decomposition)
197 (dolist (char (cdr decomposition))
198 (add-to-list 'combining-chars char))))))
199 check-range))
200
201 (setq combining-chars
202 (append combining-chars
203 '(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ
204 ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
205 ?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ
206 ?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ)))
207 )
208
209 (eval-and-compile
210 (defun ucs-normalize-make-hash-table-from-alist (alist)
211 (let ((table (make-hash-table :test 'equal :size 2000)))
212 (mapc (lambda (x) (puthash (car x) (cdr x) table)) alist)
213 table))
214
215 (defvar ucs-normalize-decomposition-pair-to-primary-composite nil
216 "Hashtable of decomposed pair to primary composite.
217 Note that Hangul are excluded.")
218 (setq ucs-normalize-decomposition-pair-to-primary-composite
219 (ucs-normalize-make-hash-table-from-alist
220 (eval-when-compile decomposition-pair-to-composition)))
221
222 (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate)
223 "Convert DECOMPOSITION-PAIR to primay composite using COMPOSITION-PREDICATE."
224 (let ((char (or (gethash decomposition-pair
225 ucs-normalize-decomposition-pair-to-primary-composite)
226 (and (<= #x1100 (car decomposition-pair))
227 (< (car decomposition-pair) #x1113)
228 (<= #x1161 (cadr decomposition-pair))
229 (< (car decomposition-pair) #x1176)
230 (let ((lindex (- (car decomposition-pair) #x1100))
231 (vindex (- (cadr decomposition-pair) #x1161)))
232 (+ #xAC00 (* (+ (* lindex 21) vindex) 28))))
233 (and (<= #xac00 (car decomposition-pair))
234 (< (car decomposition-pair) #xd7a4)
235 (<= #x11a7 (cadr decomposition-pair))
236 (< (cadr decomposition-pair) #x11c3)
237 (= 0 (% (- (car decomposition-pair) #xac00) 28))
238 (let ((tindex (- (cadr decomposition-pair) #x11a7)))
239 (+ (car decomposition-pair) tindex))))))
240 (if (and char
241 (functionp composition-predicate)
242 (null (funcall composition-predicate char)))
243 nil char)))
244 )
245
246 (defvar ucs-normalize-combining-chars nil)
247 (setq ucs-normalize-combining-chars (eval-when-compile combining-chars))
248
249 (defvar ucs-normalize-combining-chars-regexp nil
250 "Regular expression to match sequence of combining characters.")
251 (setq ucs-normalize-combining-chars-regexp
252 (eval-when-compile (concat (regexp-opt (mapcar 'char-to-string combining-chars)) "+")))
253
254 (eval-when-compile
255
256 (defun decomposition-translation-alist (decomposition-function)
257 (let (decomposition alist)
258 (mapc
259 (lambda (start-end)
260 (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
261 (setq decomposition (funcall decomposition-function char))
262 (if decomposition
263 (setq alist (cons (cons char
264 (apply 'append
265 (mapcar (lambda (x)
266 (decomposition-char-recursively
267 x decomposition-function))
268 decomposition)))
269 alist)))))
270 check-range)
271 alist))
272
273 (defun decomposition-char-recursively (char decomposition-function)
274 (let ((decomposition (funcall decomposition-function char)))
275 (if decomposition
276 (apply 'append
277 (mapcar (lambda (x)
278 (decomposition-char-recursively x decomposition-function))
279 decomposition))
280 (list char))))
281
282 (defun alist-list-to-vector (alist)
283 (mapcar (lambda (x) (cons (car x) (apply 'vector (cdr x)))) alist))
284
285 (defvar nfd-alist nil)
286 (setq nfd-alist (alist-list-to-vector (decomposition-translation-alist 'nfd)))
287 (defvar nfkd-alist nil)
288 (setq nfkd-alist (alist-list-to-vector (decomposition-translation-alist 'nfkd)))
289 (defvar hfs-nfd-alist nil)
290 (setq hfs-nfd-alist (alist-list-to-vector (decomposition-translation-alist 'hfs-nfd)))
291 )
292
293 (eval-and-compile
294 (defvar ucs-normalize-hangul-translation-alist nil)
295 (setq ucs-normalize-hangul-translation-alist
296 (let ((i 0) entries)
297 (while (< i 11172)
298 (setq entries
299 (cons (cons (+ #xac00 i)
300 (if (= 0 (% i 28))
301 (vector (+ #x1100 (/ i 588))
302 (+ #x1161 (/ (% i 588) 28)))
303 (vector (+ #x1100 (/ i 588))
304 (+ #x1161 (/ (% i 588) 28))
305 (+ #x11a7 (% i 28)))))
306 entries)
307 i (1+ i))) entries))
308
309 (defun ucs-normalize-make-translation-table-from-alist (alist)
310 (make-translation-table-from-alist
311 (append alist ucs-normalize-hangul-translation-alist)))
312
313 (define-translation-table 'ucs-normalize-nfd-table
314 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfd-alist)))
315 (define-translation-table 'ucs-normalize-nfkd-table
316 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfkd-alist)))
317 (define-translation-table 'ucs-normalize-hfs-nfd-table
318 (ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist)))
319
320 (defun ucs-normalize-sort (chars)
321 "Sort by canonical combining class of chars."
322 (sort chars
323 (lambda (ch1 ch2)
324 (< (ucs-normalize-ccc ch1) (ucs-normalize-ccc ch2)))))
325
326 (defun ucs-normalize-compose-chars (chars composition-predicate)
327 "Compose CHARS by COMPOSITION-PREDICATE.
328 CHARS must be sorted and normalized in starter-combining pairs."
329 (if composition-predicate
330 (let* ((starter (car chars))
331 remain result prev-ccc
332 (target-chars (cdr chars))
333 target target-ccc
334 primary-composite)
335 (while target-chars
336 (setq target (car target-chars)
337 target-ccc (ucs-normalize-ccc target))
338 (if (and (or (null prev-ccc)
339 (< prev-ccc target-ccc))
340 (setq primary-composite
341 (ucs-normalize-primary-composite (list starter target)
342 composition-predicate)))
343 ;; case 1: composable
344 (setq starter primary-composite
345 prev-ccc nil)
346 (if (= 0 target-ccc)
347 ;; case 2: move starter
348 (setq result (nconc result (cons starter (nreverse remain)))
349 starter target
350 remain nil)
351 ;; case 3: move target
352 (setq prev-ccc target-ccc
353 remain (cons target remain))))
354 (setq target-chars (cdr target-chars)))
355 (nconc result (cons starter (nreverse remain))))
356 chars))
357
358 (defun ucs-normalize-block-compose-chars (chars composition-predicate)
359 "Try composing CHARS by COMPOSITION-PREDICATE.
360 If COMPOSITION-PREDICATE is not given, then do nothing."
361 (let ((chars (ucs-normalize-sort chars)))
362 (if composition-predicate
363 (ucs-normalize-compose-chars chars composition-predicate)
364 chars)))
365 )
366
367 (eval-when-compile
368
369 (defun quick-check-list (decomposition-translation
370 &optional composition-predicate)
371 "Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE.
372 It includes Singletons, CompositionExclusions, and Non-Starter
373 decomposition. "
374 (let (entries decomposition composition)
375 (mapc
376 (lambda (start-end)
377 (do ((i (car start-end) (+ i 1))) ((> i (cdr start-end)))
378 (setq decomposition
379 (string-to-list
380 (with-temp-buffer
381 (insert i)
382 (translate-region 1 2 decomposition-translation)
383 (buffer-string))))
384 (setq composition
385 (ucs-normalize-block-compose-chars decomposition composition-predicate))
386 (when (not (equal composition (list i)))
387 (setq entries (cons i entries)))))
388 check-range)
389 ;;(remove-duplicates
390 (append entries
391 ucs-normalize-composition-exclusions
392 non-starter-decompositions)))
393 ;;)
394
395 (defvar nfd-quick-check-list nil)
396 (setq nfd-quick-check-list (quick-check-list 'ucs-normalize-nfd-table ))
397 (defvar nfc-quick-check-list nil)
398 (setq nfc-quick-check-list (quick-check-list 'ucs-normalize-nfd-table t ))
399 (defvar nfkd-quick-check-list nil)
400 (setq nfkd-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table ))
401 (defvar nfkc-quick-check-list nil)
402 (setq nfkc-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table t ))
403 (defvar hfs-nfd-quick-check-list nil)
404 (setq hfs-nfd-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table
405 'ucs-normalize-hfs-nfd-comp-p))
406 (defvar hfs-nfc-quick-check-list nil)
407 (setq hfs-nfc-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table t ))
408
409 (defun quick-check-list-to-regexp (quick-check-list)
410 (regexp-opt (mapcar 'char-to-string (append quick-check-list combining-chars))))
411
412 (defun quick-check-decomposition-list-to-regexp (quick-check-list)
413 (concat (quick-check-list-to-regexp quick-check-list) "\\|[가-힣]"))
414
415 (defun quick-check-composition-list-to-regexp (quick-check-list)
416 (concat (quick-check-list-to-regexp quick-check-list) "\\|[ᅡ-ᅵᆨ-ᇂ]"))
417 )
418
419
420 ;; NFD/NFC
421 (defvar ucs-normalize-nfd-quick-check-regexp nil)
422 (setq ucs-normalize-nfd-quick-check-regexp
423 (eval-when-compile (quick-check-decomposition-list-to-regexp nfd-quick-check-list)))
424 (defvar ucs-normalize-nfc-quick-check-regexp nil)
425 (setq ucs-normalize-nfc-quick-check-regexp
426 (eval-when-compile (quick-check-composition-list-to-regexp nfc-quick-check-list)))
427
428 ;; NFKD/NFKC
429 (defvar ucs-normalize-nfkd-quick-check-regexp nil)
430 (setq ucs-normalize-nfkd-quick-check-regexp
431 (eval-when-compile (quick-check-decomposition-list-to-regexp nfkd-quick-check-list)))
432 (defvar ucs-normalize-nfkc-quick-check-regexp nil)
433 (setq ucs-normalize-nfkc-quick-check-regexp
434 (eval-when-compile (quick-check-composition-list-to-regexp nfkc-quick-check-list)))
435
436 ;; HFS-NFD/HFS-NFC
437 (defvar ucs-normalize-hfs-nfd-quick-check-regexp nil)
438 (setq ucs-normalize-hfs-nfd-quick-check-regexp
439 (eval-when-compile (concat (quick-check-decomposition-list-to-regexp hfs-nfd-quick-check-list))))
440 (defvar ucs-normalize-hfs-nfc-quick-check-regexp nil)
441 (setq ucs-normalize-hfs-nfc-quick-check-regexp
442 (eval-when-compile (quick-check-composition-list-to-regexp hfs-nfc-quick-check-list)))
443
444 ;;------------------------------------------------------------------------------------------
445
446 ;; Normalize local region.
447
448 (defun ucs-normalize-block
449 (from to &optional decomposition-translation-table composition-predicate)
450 "Normalize region FROM TO, by sorting the region with canonical-cc.
451 If DECOMPOSITION-TRANSLATION-TABLE is given, translate region
452 before sorting. If COMPOSITION-PREDICATE is given, then compose
453 the region by using it."
454 (save-restriction
455 (narrow-to-region from to)
456 (goto-char (point-min))
457 (if decomposition-translation-table
458 (translate-region from to decomposition-translation-table))
459 (goto-char (point-min))
460 (let ((start (point)) chars); ccc)
461 (while (not (eobp))
462 (forward-char)
463 (when (or (eobp)
464 (= 0 (ucs-normalize-ccc (char-after (point)))))
465 (setq chars
466 (nconc chars
467 (ucs-normalize-block-compose-chars
468 (string-to-list (buffer-substring start (point)))
469 composition-predicate))
470 start (point)))
471 ;;(unless ccc (error "Undefined character can not be normalized!"))
472 )
473 (delete-region (point-min) (point-max))
474 (apply 'insert
475 (ucs-normalize-compose-chars
476 chars composition-predicate)))))
477
478 (defun ucs-normalize-region
479 (from to quick-check-regexp translation-table composition-predicate)
480 "Normalize region from FROM to TO.
481 QUICK-CHECK-REGEXP is applied for searching the region.
482 TRANSLATION-TABLE will be used to decompose region.
483 COMPOSITION-PREDICATE will be used to compose region."
484 (save-excursion
485 (save-restriction
486 (narrow-to-region from to)
487 (goto-char (point-min))
488 (let (start-pos starter)
489 (while (re-search-forward quick-check-regexp nil t)
490 (setq starter (string-to-char (match-string 0)))
491 (setq start-pos (match-beginning 0))
492 (ucs-normalize-block
493 ;; from
494 (if (or (= start-pos (point-min))
495 (and (= 0 (ucs-normalize-ccc starter))
496 (not (memq starter ucs-normalize-combining-chars))))
497 start-pos (1- start-pos))
498 ;; to
499 (if (looking-at ucs-normalize-combining-chars-regexp)
500 (match-end 0) (1+ start-pos))
501 translation-table composition-predicate))))))
502
503 ;; --------------------------------------------------------------------------------
504
505 (defmacro ucs-normalize-string (ucs-normalize-region)
506 `(with-temp-buffer
507 (insert str)
508 (,ucs-normalize-region (point-min) (point-max))
509 (buffer-string)))
510
511 ;;;###autoload
512 (defun ucs-normalize-NFD-region (from to)
513 "Normalize the current region by the Unicode NFD."
514 (interactive "r")
515 (ucs-normalize-region from to
516 ucs-normalize-nfd-quick-check-regexp
517 'ucs-normalize-nfd-table nil))
518 ;;;###autoload
519 (defun ucs-normalize-NFD-string (str)
520 "Normalize the string STR by the Unicode NFD."
521 (ucs-normalize-string ucs-normalize-NFD-region))
522
523 ;;;###autoload
524 (defun ucs-normalize-NFC-region (from to)
525 "Normalize the current region by the Unicode NFC."
526 (interactive "r")
527 (ucs-normalize-region from to
528 ucs-normalize-nfc-quick-check-regexp
529 'ucs-normalize-nfd-table t))
530 ;;;###autoload
531 (defun ucs-normalize-NFC-string (str)
532 "Normalize the string STR by the Unicode NFC."
533 (ucs-normalize-string ucs-normalize-NFC-region))
534
535 ;;;###autoload
536 (defun ucs-normalize-NFKD-region (from to)
537 "Normalize the current region by the Unicode NFKD."
538 (interactive "r")
539 (ucs-normalize-region from to
540 ucs-normalize-nfkd-quick-check-regexp
541 'ucs-normalize-nfkd-table nil))
542 ;;;###autoload
543 (defun ucs-normalize-NFKD-string (str)
544 "Normalize the string STR by the Unicode NFKD."
545 (ucs-normalize-string ucs-normalize-NFKD-region))
546
547 ;;;###autoload
548 (defun ucs-normalize-NFKC-region (from to)
549 "Normalize the current region by the Unicode NFKC."
550 (interactive "r")
551 (ucs-normalize-region from to
552 ucs-normalize-nfkc-quick-check-regexp
553 'ucs-normalize-nfkd-table t))
554 ;;;###autoload
555 (defun ucs-normalize-NFKC-string (str)
556 "Normalize the string STR by the Unicode NFKC."
557 (ucs-normalize-string ucs-normalize-NFKC-region))
558
559 ;;;###autoload
560 (defun ucs-normalize-HFS-NFD-region (from to)
561 "Normalize the current region by the Unicode NFD and Mac OS's HFS Plus."
562 (interactive "r")
563 (ucs-normalize-region from to
564 ucs-normalize-hfs-nfd-quick-check-regexp
565 'ucs-normalize-hfs-nfd-table
566 'ucs-normalize-hfs-nfd-comp-p))
567 ;;;###autoload
568 (defun ucs-normalize-HFS-NFD-string (str)
569 "Normalize the string STR by the Unicode NFD and Mac OS's HFS Plus."
570 (ucs-normalize-string ucs-normalize-HFS-NFD-region))
571 ;;;###autoload
572 (defun ucs-normalize-HFS-NFC-region (from to)
573 "Normalize the current region by the Unicode NFC and Mac OS's HFS Plus."
574 (interactive "r")
575 (ucs-normalize-region from to
576 ucs-normalize-hfs-nfc-quick-check-regexp
577 'ucs-normalize-hfs-nfd-table t))
578 ;;;###autoload
579 (defun ucs-normalize-HFS-NFC-string (str)
580 "Normalize the string STR by the Unicode NFC and Mac OS's HFS Plus."
581 (ucs-normalize-string ucs-normalize-HFS-NFC-region))
582
583 ;; Post-read-conversion function for `utf-8-hfs'.
584 (defun ucs-normalize-hfs-nfd-post-read-conversion (len)
585 (save-excursion
586 (save-restriction
587 (narrow-to-region (point) (+ (point) len))
588 (let ((buffer-modified-p (buffer-modified-p)))
589 (ucs-normalize-HFS-NFC-region (point-min) (point-max))
590 (- (point-max) (point-min))))))
591
592 ;; Pre-write conversion for `utf-8-hfs'.
593 (defun ucs-normalize-hfs-nfd-pre-write-conversion (from to)
594 (let ((old-buf (current-buffer)))
595 (set-buffer (generate-new-buffer " *temp*"))
596 (if (stringp from)
597 (insert from)
598 (insert-buffer-substring old-buf from to))
599 (ucs-normalize-HFS-NFD-region (point-min) (point-max))
600 nil))
601
602 ;;; coding-system definition
603 (define-coding-system 'utf-8-hfs
604 "UTF-8 based coding system for MacOS HFS file names.
605 The singleton characters in HFS normalization exclusion will not
606 be decomposed."
607 :coding-type 'utf-8
608 :mnemonic ?U
609 :charset-list '(unicode)
610 :post-read-conversion 'ucs-normalize-hfs-nfd-post-read-conversion
611 :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion
612 )
613
614 (provide 'ucs-normalize)
615
616 ;; Local Variables:
617 ;; coding: utf-8
618 ;; End:
619
620 ;; arch-tag: cef65ae7-71ad-4e19-8da8-56ab4d42aaa4
621 ;;; ucs-normalize.el ends here