X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/73b0cd50031a714347109169ceb8bacae338612a..e333157cba3b4ffd7c25f8210a6aa5a21ae10de7:/lisp/international/ucs-normalize.el diff --git a/lisp/international/ucs-normalize.el b/lisp/international/ucs-normalize.el index 1782e0b2bd..ac2a0d9e77 100644 --- a/lisp/international/ucs-normalize.el +++ b/lisp/international/ucs-normalize.el @@ -1,7 +1,6 @@ ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC -;; Copyright (C) 2009-2011 -;; Free Software Foundation, Inc. +;; Copyright (C) 2009-2016 Free Software Foundation, Inc. ;; Author: Taichi Kawabata ;; Keywords: unicode, normalization @@ -110,7 +109,7 @@ (defconst ucs-normalize-version "1.2") -(eval-when-compile (require 'cl)) +(eval-when-compile (require 'cl-lib)) (declare-function nfd "ucs-normalize" (char)) @@ -132,7 +131,7 @@ This list is taken from http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt") - ;; Unicode ranges that decompositions & combinings are defined. + ;; Unicode ranges that decompositions & combining characters are defined. (defvar check-range nil) (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff))) @@ -140,14 +139,17 @@ (defun nfd (char) (let ((decomposition (get-char-code-property char 'decomposition))) - (if (and decomposition (numberp (car decomposition))) + (if (and decomposition (numberp (car decomposition)) + (or (> (length decomposition) 1) + (/= (car decomposition) char))) decomposition))) (defun nfkd (char) (let ((decomposition (get-char-code-property char 'decomposition))) (if (symbolp (car decomposition)) (cdr decomposition) - decomposition))) + (if (or (> (length decomposition) 1) + (/= (car decomposition) char)) decomposition)))) (defun hfs-nfd (char) (when (or (and (>= char 0) (< char #x2000)) @@ -174,13 +176,23 @@ (setq decomposition-pair-to-composition nil) (defvar non-starter-decompositions nil) (setq non-starter-decompositions nil) + ;; This file needs to access these 2 Unicode properties, but when we + ;; compile it during bootstrap, charprop.el was not built yet, and + ;; therefore is not yet loaded into bootstrap-emacs, so + ;; char-code-property-alist is nil, and get-char-code-property + ;; always returns nil, something the code here doesn't like. + (define-char-code-property 'decomposition "uni-decomposition.el") + (define-char-code-property 'canonical-combining-class "uni-combining.el") (let ((char 0) ccc decomposition) (mapc (lambda (start-end) - (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end))) + (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end))) (setq ccc (ucs-normalize-ccc char)) (setq decomposition (get-char-code-property char 'decomposition)) + (if (and (= (length decomposition) 1) + (= (car decomposition) char)) + (setq decomposition nil)) (if (and ccc (/= 0 ccc)) (add-to-list 'combining-chars char)) (if (and (numberp (car decomposition)) (/= (ucs-normalize-ccc (car decomposition)) @@ -222,7 +234,7 @@ Note that Hangul are excluded.") (eval-when-compile decomposition-pair-to-composition))) (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate) - "Convert DECOMPOSITION-PAIR to primay composite using COMPOSITION-PREDICATE." + "Convert DECOMPOSITION-PAIR to primary composite using COMPOSITION-PREDICATE." (let ((char (or (gethash decomposition-pair ucs-normalize-decomposition-pair-to-primary-composite) (and (<= #x1100 (car decomposition-pair)) @@ -251,7 +263,7 @@ Note that Hangul are excluded.") (defvar ucs-normalize-combining-chars-regexp nil "Regular expression to match sequence of combining characters.") (setq ucs-normalize-combining-chars-regexp - (eval-when-compile (concat (regexp-opt (mapcar 'char-to-string combining-chars)) "+"))) + (eval-when-compile (concat (regexp-opt-charset combining-chars) "+"))) (declare-function decomposition-translation-alist "ucs-normalize" (decomposition-function)) @@ -265,7 +277,7 @@ Note that Hangul are excluded.") (let (decomposition alist) (mapc (lambda (start-end) - (do ((char (car start-end) (+ char 1))) ((> char (cdr start-end))) + (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end))) (setq decomposition (funcall decomposition-function char)) (if decomposition (setq alist (cons (cons char @@ -384,20 +396,22 @@ If COMPOSITION-PREDICATE is not given, then do nothing." It includes Singletons, CompositionExclusions, and Non-Starter decomposition." (let (entries decomposition composition) - (mapc - (lambda (start-end) - (do ((i (car start-end) (+ i 1))) ((> i (cdr start-end))) - (setq decomposition - (string-to-list - (with-temp-buffer - (insert i) - (translate-region 1 2 decomposition-translation) - (buffer-string)))) - (setq composition - (ucs-normalize-block-compose-chars decomposition composition-predicate)) - (when (not (equal composition (list i))) - (setq entries (cons i entries))))) - check-range) + (with-temp-buffer + (mapc + (lambda (start-end) + (cl-do ((i (car start-end) (+ i 1))) ((> i (cdr start-end))) + (setq decomposition + (string-to-list + (progn + (erase-buffer) + (insert i) + (translate-region 1 2 decomposition-translation) + (buffer-string)))) + (setq composition + (ucs-normalize-block-compose-chars decomposition composition-predicate)) + (when (not (equal composition (list i))) + (setq entries (cons i entries))))) + check-range)) ;;(remove-duplicates (append entries ucs-normalize-composition-exclusions @@ -419,7 +433,7 @@ decomposition." (setq hfs-nfc-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table t )) (defun quick-check-list-to-regexp (quick-check-list) - (regexp-opt (mapcar 'char-to-string (append quick-check-list combining-chars)))) + (regexp-opt-charset (append quick-check-list combining-chars))) (defun quick-check-decomposition-list-to-regexp (quick-check-list) (concat (quick-check-list-to-regexp quick-check-list) "\\|[가-힣]")) @@ -601,14 +615,9 @@ COMPOSITION-PREDICATE will be used to compose region." (- (point-max) (point-min))))) ;; Pre-write conversion for `utf-8-hfs'. -(defun ucs-normalize-hfs-nfd-pre-write-conversion (from to) - (let ((old-buf (current-buffer))) - (set-buffer (generate-new-buffer " *temp*")) - (if (stringp from) - (insert from) - (insert-buffer-substring old-buf from to)) - (ucs-normalize-HFS-NFD-region (point-min) (point-max)) - nil)) +;; _from and _to are legacy arguments (see `define-coding-system'). +(defun ucs-normalize-hfs-nfd-pre-write-conversion (_from _to) + (ucs-normalize-HFS-NFD-region (point-min) (point-max))) ;;; coding-system definition (define-coding-system 'utf-8-hfs @@ -622,6 +631,10 @@ be decomposed." :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion ) +;; This is tested in dired.c:file_name_completion in order to reject +;; false positives due to comparison of encoded file names. +(coding-system-put 'utf-8-hfs 'decomposed-characters 't) + (provide 'ucs-normalize) ;; Local Variables: