X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/73b0cd50031a714347109169ceb8bacae338612a..eab3f9e76b8188b5f99139b220a272fe3f2debaf:/lisp/international/ucs-normalize.el diff --git a/lisp/international/ucs-normalize.el b/lisp/international/ucs-normalize.el index 1782e0b2bd..74978ce38a 100644 --- a/lisp/international/ucs-normalize.el +++ b/lisp/international/ucs-normalize.el @@ -1,7 +1,6 @@ ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC -;; Copyright (C) 2009-2011 -;; Free Software Foundation, Inc. +;; Copyright (C) 2009-2016 Free Software Foundation, Inc. ;; Author: Taichi Kawabata ;; Keywords: unicode, normalization @@ -110,7 +109,7 @@ (defconst ucs-normalize-version "1.2") -(eval-when-compile (require 'cl)) +(eval-when-compile (require 'cl-lib)) (declare-function nfd "ucs-normalize" (char)) @@ -132,7 +131,7 @@ This list is taken from http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt") - ;; Unicode ranges that decompositions & combinings are defined. + ;; Unicode ranges that decompositions & combining characters are defined. (defvar check-range nil) (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff))) @@ -140,14 +139,17 @@ (defun nfd (char) (let ((decomposition (get-char-code-property char 'decomposition))) - (if (and decomposition (numberp (car decomposition))) + (if (and decomposition (numberp (car decomposition)) + (or (> (length decomposition) 1) + (/= (car decomposition) char))) decomposition))) (defun nfkd (char) (let ((decomposition (get-char-code-property char 'decomposition))) (if (symbolp (car decomposition)) (cdr decomposition) - decomposition))) + (if (or (> (length decomposition) 1) + (/= (car decomposition) char)) decomposition)))) (defun hfs-nfd (char) (when (or (and (>= char 0) (< char #x2000)) @@ -174,13 +176,23 @@ (setq decomposition-pair-to-composition nil) (defvar non-starter-decompositions nil) (setq non-starter-decompositions nil) + ;; This file needs to access these 2 Unicode properties, but when we + ;; compile it during bootstrap, charprop.el was not built yet, and + ;; therefore is not yet loaded into bootstrap-emacs, so + ;; char-code-property-alist is nil, and get-char-code-property + ;; always returns nil, something the code here doesn't like. + (define-char-code-property 'decomposition "uni-decomposition.el") + (define-char-code-property 'canonical-combining-class "uni-combining.el") (let ((char 0) ccc decomposition) (mapc (lambda (start-end) - (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end))) + (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end))) (setq ccc (ucs-normalize-ccc char)) (setq decomposition (get-char-code-property char 'decomposition)) + (if (and (= (length decomposition) 1) + (= (car decomposition) char)) + (setq decomposition nil)) (if (and ccc (/= 0 ccc)) (add-to-list 'combining-chars char)) (if (and (numberp (car decomposition)) (/= (ucs-normalize-ccc (car decomposition)) @@ -222,7 +234,7 @@ Note that Hangul are excluded.") (eval-when-compile decomposition-pair-to-composition))) (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate) - "Convert DECOMPOSITION-PAIR to primay composite using COMPOSITION-PREDICATE." + "Convert DECOMPOSITION-PAIR to primary composite using COMPOSITION-PREDICATE." (let ((char (or (gethash decomposition-pair ucs-normalize-decomposition-pair-to-primary-composite) (and (<= #x1100 (car decomposition-pair)) @@ -265,7 +277,7 @@ Note that Hangul are excluded.") (let (decomposition alist) (mapc (lambda (start-end) - (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end))) + (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end))) (setq decomposition (funcall decomposition-function char)) (if decomposition (setq alist (cons (cons char @@ -386,7 +398,7 @@ decomposition." (let (entries decomposition composition) (mapc (lambda (start-end) - (do ((i (car start-end) (+ i 1))) ((> i (cdr start-end))) + (cl-do ((i (car start-end) (+ i 1))) ((> i (cdr start-end))) (setq decomposition (string-to-list (with-temp-buffer @@ -622,6 +634,10 @@ be decomposed." :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion ) +;; This is tested in dired.c:file_name_completion in order to reject +;; false positives due to comparison of encoded file names. +(coding-system-put 'utf-8-hfs 'decomposed-characters 't) + (provide 'ucs-normalize) ;; Local Variables: