- (unless (or (and (eq char (car decomp))
- (not (cdr decomp))))
- (let ((dec-aux decomp)
- (fold-decomp t)
- char-aux found)
- (while (and dec-aux (not found))
- (setq char-aux (pop dec-aux))
- ;; Is char-aux a number or letter, per unicode standard?
- (setq found (memq (get-char-code-property char-aux 'general-category)
- '(Lu Ll Lt Lm Lo Nd Nl No))))
- (if found
- ;; Check if the decomp has more than one letter,
- ;; because then we don't want the first letter to
- ;; match the decomposition. This is because we
- ;; want 'f' to match 'ff' but not 'ff'.
- (dolist (char-aux dec-aux)
- (when (and fold-decomp
- (memq (get-char-code-property char-aux 'general-category)
- '(Lu Ll Lt Lm Lo Nd Nl No)))
- (setq fold-decomp nil)))
- ;; If there's no number or letter on the
- ;; decomp, take the first character in it.
- (setq found (car-safe decomp)))
- ;; Finally, we only fold multi-char decomp if at
- ;; least one of the chars is non-spacing (combining).
- (when fold-decomp
- (setq fold-decomp nil)
- (dolist (char-aux decomp)
- (when (and (not fold-decomp)
- (> (get-char-code-property char-aux 'canonical-combining-class) 0))
- (setq fold-decomp t))))
- ;; Add char to the list of characters that char-aux can
- ;; represent. Also possibly add its decomp, so we can
- ;; match multi-char representations like (format "a%c" 769)
- (when (and found (not (eq char char-aux)))
- (let ((chars (cons (char-to-string char) (aref equiv char-aux))))
- (aset equiv char-aux
- (if fold-decomp
- (cons (apply #'string decomp) chars)
- chars))))))))
+ (unless (and (not (cdr decomp))
+ (eq char (car decomp)))
+ (if (symbolp (car decomp))
+ ;; Discard a possible formatting tag.
+ (setq decomp (cdr decomp))
+ ;; If there's no formatting tag, ensure that char matches
+ ;; its decomp exactly. This is because we want 'ä' to
+ ;; match 'ä', but we don't want '¹' to match '1'.
+ (aset equiv char
+ (cons (apply #'string decomp)
+ (aref equiv char))))
+
+ ;; Allow the entire decomp to match char. If decomp has
+ ;; multiple characters, this is done by adding an entry
+ ;; to the alist of the first character in decomp. This
+ ;; allows 'ff' to match 'ff', 'ä' to match 'ä', and '1' to
+ ;; match '¹'.
+ (let ((make-decomp-match-char
+ (lambda (decomp char)
+ (if (cdr decomp)
+ (aset equiv-multi (car decomp)
+ (cons (cons (apply #'string (cdr decomp))
+ (regexp-quote (string char)))
+ (aref equiv-multi (car decomp))))
+ (aset equiv (car decomp)
+ (cons (char-to-string char)
+ (aref equiv (car decomp))))))))
+ (funcall make-decomp-match-char decomp char)
+ ;; Do it again, without the non-spacing characters.
+ ;; This allows 'a' to match 'ä'.
+ (let ((simpler-decomp nil)
+ (found-one nil))
+ (dolist (c decomp)
+ (if (> (get-char-code-property c 'canonical-combining-class) 0)
+ (setq found-one t)
+ (push c simpler-decomp)))
+ (when (and simpler-decomp found-one)
+ (funcall make-decomp-match-char simpler-decomp char)
+ ;; Finally, if the decomp only had one spacing
+ ;; character, we allow this character to match the
+ ;; decomp. This is to let 'a' match 'ä'.
+ (unless (cdr simpler-decomp)
+ (aset equiv (car simpler-decomp)
+ (cons (apply #'string decomp)
+ (aref equiv (car simpler-decomp)))))))))))