X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/7258f9caa46b374afecc1788dbc79a0412977308..937640a621a4ce2e5e56eaecca37a2a28a584318:/lisp/language/tibet-util.el diff --git a/lisp/language/tibet-util.el b/lisp/language/tibet-util.el index efa07fa1fa..3b9e6afbce 100644 --- a/lisp/language/tibet-util.el +++ b/lisp/language/tibet-util.el @@ -1,4 +1,4 @@ -;;; tibet-util.el --- Support for inputting Tibetan characters +;;; tibet-util.el --- utilities for Tibetan -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. ;; Licensed to the Free Software Foundation. @@ -26,13 +26,32 @@ ;; Created: Feb. 17. 1997 -;; History: +;;; History: ;; 1997.03.13 Modification in treatment of text properties; ;; Support for some special signs and punctuations. ;; 1999.10.25 Modification for a new composition way by K.Handa. +;;; Commentary: + ;;; Code: +(defconst tibetan-obsolete-glyphs + `(("$(7!=(B" . "$(8!=(B") ; 2 col <-> 1 col + ("$(7!?(B" . "$(8!?(B") + ("$(7!@(B" . "$(8!@(B") + ("$(7!A(B" . "$(8!A(B") + ("$(7"`(B" . "$(8"`(B") + ("$(7!;(B" . "$(8!;(B") + ("$(7!D(B" . "$(8!D(B") + ;; Yes these are dirty. But ... + ("$(7!>(B $(7!>(B" . ,(compose-string "$(7!>(B $(7!>(B" 0 3 [?$(7!>(B (Br . Bl) ? (Br . Bl) ?$(7!>(B])) + ("$(7!4!5!5(B" . ,(compose-string + "$(7#R#S#S#S(B" 0 4 + [?$(7#R(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B])) + ("$(7!4!5(B" . ,(compose-string "$(7#R#S#S(B" 0 3 [?$(7#R(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B])) + ("$(7!6(B" . ,(compose-string "$(7#R#S!I(B" 0 3 [?$(7#R(B (Br . Bl) ?$(7#S(B (br . tr) ?$(7!I(B])) + ("$(7!4(B" . ,(compose-string "$(7#R#S(B" 0 2 [?$(7#R(B (Br . Bl) ?$(7#S(B])))) + ;;;###autoload (defun tibetan-char-p (ch) "Check if char CH is Tibetan character. @@ -50,7 +69,7 @@ Returns non-nil if CH is Tibetan. Otherwise, returns nil." (i 0) ch this-trans) (while (< i len) - (let ((idx (string-match tibetan-precomposition-rule-alist str i))) + (let ((idx (string-match tibetan-precomposition-rule-regexp str i))) (if (eq idx i) ;; Ith character and the followings matches precomposable ;; Tibetan sequence. @@ -116,13 +135,13 @@ The returned string has no composition information." ;;; (Sanskrit visarga, though it is a vowel modifier, is considered ;;; to be a punctuation.) ;;; -;;; Here are examples of the words "bsgrubs" and "h'uM" +;;; Here are examples of the words "bsgrubs" and "hfauM" ;;; -;;; 4$(7"70"714%qx!"U0"G###C"U14"70"714"G0"G1(B 4$(7"Hx!"Rx!"Ur'"_0"H"A"U"_1(B +;;; 4$(7"70"714%qx!"U0"G###C"U14"70"714"G0"G1(B 4$(7"Hx!"Rx!"Ur'"_0"H"R"U"_1(B ;;; ;;; M ;;; b s b s h -;;; g ' +;;; g fa ;;; r u ;;; u ;;; @@ -144,7 +163,7 @@ The returned string has no composition information." ;; If 'a follows a consonant, turn it into the subjoined form. ;; * Disabled by Tomabechi 2000/06/09 * ;; Because in Unicode, $(7"A(B may follow directly a consonant without - ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B + ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B ;;(if (and (= char ?$(7"A(B) ;; (aref (char-category-set (car last)) ?0)) ;; (setq char ?$(7"R(B)) ;; modified for new font by Tomabechi 1999/12/10 @@ -153,8 +172,9 @@ The returned string has no composition information." ;; Added by Tomabechi 2000/06/08 (if (memq char '(?$(7"T(B ?$(7"V(B ?$(7"W(B ?$(7"X(B ?$(7"Y(B ?$(7"Z(B ?$(7"b(B)) (setq comp-vowel - (cddr (assoc (char-to-string char) - tibetan-composite-vowel-alist)) + (copy-sequence + (cddr (assoc (char-to-string char) + tibetan-composite-vowel-alist))) char (cadr (assoc (char-to-string char) tibetan-composite-vowel-alist)))) @@ -258,7 +278,7 @@ The returned string has no composition information." (defun tibetan-decompose-region (from to) "Decompose Tibetan text in the region FROM and TO. This is different from decompose-region because precomposed Tibetan characters -are decomposed into normal Tiebtan character sequences." +are decomposed into normal Tibetan character sequences." (interactive "r") (save-restriction (narrow-to-region from to) @@ -278,7 +298,7 @@ are decomposed into normal Tiebtan character sequences." (defun tibetan-decompose-string (str) "Decompose Tibetan string STR. This is different from decompose-string because precomposed Tibetan characters -are decomposed into normal Tiebtan character sequences." +are decomposed into normal Tibetan character sequences." (let ((new "") (len (length str)) (idx 0) @@ -347,6 +367,65 @@ See also docstring of the function tibetan-compose-region." ;; Should return nil as annotations. nil)) + +;;; +;;; Unicode-related definitions. +;;; + +(defvar tibetan-canonicalize-for-unicode-alist + '(("$(7"Q(B" . "") ;; remove vowel a + ("$(7"T(B" . "$(7"R"S(B") ;; decompose vowels whose use is ``discouraged'' in Unicode 3.0 + ("$(7"V(B" . "$(7"R"U(B") + ("$(7"W(B" . "$(7#C"a(B") + ("$(7"X(B" . "$(7#C"R"a(B") + ("$(7"Y(B" . "$(7#D"a(B") + ("$(7"Z(B" . "$(7#D"R"a(B") + ("$(7"b(B" . "$(7"R"a(B")) + "Rules for canonicalizing Tibetan vowels for Unicode.") + +(defvar tibetan-canonicalize-for-unicode-regexp + "[$(7"Q"T"V"W"X"Y"Z"b(B]" + "Regexp for Tibetan vowels to be canonicalized in Unicode.") + +(defun tibetan-canonicalize-for-unicode-region (from to) + (save-restriction + (narrow-to-region from to) + (goto-char from) + (while (re-search-forward tibetan-canonicalize-for-unicode-regexp nil t) + (let ( + ;;(from (match-beginning 0)) + ;;(to (match-end 0)) + (canonical-form + (cdr (assoc (match-string 0) + tibetan-canonicalize-for-unicode-alist)))) + ;;(goto-char from) + ;;(delete-region from to) + ;;(insert canonical-form) + (replace-match canonical-form) + )))) + +(defvar tibetan-strict-unicode t + "*Flag to control Tibetan canonicalizing for Unicode. + +If non-nil, the vowel a is removed and composite vowels are decomposed +before writing buffer in Unicode. See also +`tibetan-canonicalize-for-unicode-regexp' and +`tibetan-canonicalize-for-unicode-alist'.") + +;;;###autoload +(defun tibetan-pre-write-canonicalize-for-unicode (from to) + (let ((old-buf (current-buffer)) + (strict-unicode tibetan-strict-unicode)) + (set-buffer (generate-new-buffer " *temp*")) + (if (stringp from) + (insert from) + (insert-buffer-substring old-buf from to)) + (if strict-unicode + (tibetan-canonicalize-for-unicode-region (point-min) (point-max))) + ;; Should return nil as annotations. + nil)) + (provide 'tibet-util) -;;; language/tibet-util.el ends here. +;;; arch-tag: 7a7333e8-1584-446c-b39c-a02b9def265d +;;; tibet-util.el ends here