-;;; tibet-util.el --- utilities for Tibetan -*- coding: iso-2022-7bit; -*-
+;;; tibet-util.el --- utilities for Tibetan -*- coding: utf-8-emacs; -*-
-;; Copyright (C) 1997, 2001-2012 Free Software Foundation, Inc.
+;; Copyright (C) 1997, 2001-2016 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
;; National Institute of Advanced Industrial Science and Technology (AIST)
;;; Code:
(defconst tibetan-obsolete-glyphs
- `(("\e$(7!=\e(B" . "\e$(7!=\e(B") ; 2 col <-> 1 col
- ("\e$(7!?\e(B" . "\e$(7!?\e(B")
- ("\e$(7!@\e(B" . "\e$(7!@\e(B")
- ("\e$(7!A\e(B" . "\e$(7!A\e(B")
- ("\e$(7"`\e(B" . "\e$(7"`\e(B")
- ("\e$(7!;\e(B" . "\e$(7!;\e(B")
- ("\e$(7!D\e(B" . "\e$(7!D\e(B")
+ `(("།" . "།") ; 2 col <-> 1 col
+ ("༏" . "༏")
+ ("༐" . "༐")
+ ("༑" . "༑")
+ ("ཿ" . "ཿ")
+ ("་" . "་")
+ ("༔" . "༔")
;; Yes these are dirty. But ...
- ("\e$(7!>\e(B \e$(7!>\e(B" . ,(compose-string "\e$(7!>\e(B \e$(7!>\e(B" 0 3 [?\e$(7!>\e(B (Br . Bl) ? (Br . Bl) ?\e$(7!>\e(B]))
- ("\e$(7!4!5!5\e(B" . ,(compose-string
- "\e$(7#R#S#S#S\e(B" 0 4
- [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B]))
- ("\e$(7!4!5\e(B" . ,(compose-string "\e$(7#R#S#S\e(B" 0 3 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B]))
- ("\e$(7!6\e(B" . ,(compose-string "\e$(7#R#S!I\e(B" 0 3 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (br . tr) ?\e$(7!I\e(B]))
- ("\e$(7!4\e(B" . ,(compose-string "\e$(7#R#S\e(B" 0 2 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B]))))
+ ("༎ ༎" . ,(compose-string "༎ ༎" 0 3 [?༎ (Br . Bl) ? (Br . Bl) ?༎]))
+ ("༄༅༅" . ,(compose-string
+ "࿁࿂࿂࿂" 0 4
+ [?࿁ (Br . Bl) ?࿂ (Br . Bl) ?࿂ (Br . Bl) ?࿂]))
+ ("༄༅" . ,(compose-string "࿁࿂࿂" 0 3 [?࿁ (Br . Bl) ?࿂ (Br . Bl) ?࿂]))
+ ("༆" . ,(compose-string "࿁࿂༙" 0 3 [?࿁ (Br . Bl) ?࿂ (br . tr) ?༙]))
+ ("༄" . ,(compose-string "࿁࿂" 0 2 [?࿁ (Br . Bl) ?࿂]))))
;;;###autoload
(defun tibetan-char-p (ch)
;;;
;;; Here are examples of the words "bsgrubs" and "hfauM"
;;;
-;;; \e$(7"7"G###C"U"7"G\e(B \e$(7"H"R"U"_\e(B
+;;; བསྒྲུབས ཧཱུཾ
;;;
;;; M
;;; b s b s h
;;; r u
;;; u
;;;
-;;; Consonants `'' (\e$(7"A\e(B), `w' (\e$(7">\e(B), `y' (\e$(7"B\e(B), `r' (\e$(7"C\e(B) take special
+;;; Consonants `'' (འ), `w' (ཝ), `y' (ཡ), `r' (ར) take special
;;; forms when they are used as subjoined consonant. Consonant `r'
;;; takes another special form when used as superjoined in such a case
;;; as "rka", while it does not change its form when conjoined with
;; Special treatment for 'a chung.
;; If 'a follows a consonant, turn it into the subjoined form.
;; * Disabled by Tomabechi 2000/06/09 *
- ;; Because in Unicode, \e$(7"A\e(B may follow directly a consonant without
- ;; any intervening vowel, as in \e$(7"9"""Q"A!;\e(B=\e$(7"9\e(B \e$(7""\e(B \e$(7"A\e(B not \e$(7"9\e(B \e$(7""\e(B \e$(7"Q\e(B \e$(7"A\e(B
- ;;(if (and (= char ?\e$(7"A\e(B)
+ ;; Because in Unicode, འ may follow directly a consonant without
+ ;; any intervening vowel, as in མཁའ་=མ ཁ འ not མ ཁ འ
+ ;;(if (and (= char ?འ)
;; (aref (char-category-set (car last)) ?0))
- ;; (setq char ?\e$(7"R\e(B)) ;; modified for new font by Tomabechi 1999/12/10
+ ;; (setq char ?ཱ)) ;; modified for new font by Tomabechi 1999/12/10
;; Composite vowel signs are decomposed before being added
;; Added by Tomabechi 2000/06/08
- (if (memq char '(?\e$(7"T\e(B ?\e$(7"V\e(B ?\e$(7"W\e(B ?\e$(7"X\e(B ?\e$(7"Y\e(B ?\e$(7"Z\e(B ?\e$(7"b\e(B))
+ (if (memq char '(?ཱི ?ཱུ ?ྲྀ ?ཷ ?ླྀ ?ཹ ?ཱྀ))
(setq comp-vowel
(copy-sequence
(cddr (assoc (char-to-string char)
;; Compose lower vowel sign vertically under.
((aref (char-category-set char) ?3)
- (if (or (eq char ?\e$(7"Q\e(B) ;; `\e$(7"Q\e(B' and `\e$,1FP\e(B' should not visible when composed.
+ (if (or (eq char ?) ;; `' and `' should not visible when composed.
(eq char #xF70))
(setq rule nil)
(setq rule stack-under)))
;; Transform ra-mgo (superscribed r) if followed by a subjoined
;; consonant other than w, ', y, r.
- ((and (= (car last) ?\e$(7"C\e(B)
- (not (memq char '(?\e$(7#>\e(B ?\e$(7"R\e(B ?\e$(7#B\e(B ?\e$(7#C\e(B))))
- (setcar last ?\e$(7!"\e(B) ;; modified for newfont by Tomabechi 1999/12/10
+ ((and (= (car last) ?ར)
+ (not (memq char '(?ྭ ?ཱ ?ྱ ?ྲ))))
+ (setcar last ?) ;; modified for newfont by Tomabechi 1999/12/10
(setq rule stack-under))
;; Transform initial base consonant if followed by a subjoined
;; consonant but 'a.
(t
(let ((laststr (char-to-string (car last))))
- (if (and (/= char ?\e$(7"R\e(B) ;; modified for new font by Tomabechi
- (string-match "[\e$(7"!\e(B-\e$(7"="?"@"D\e(B-\e$(7"J"K\e(B]" laststr))
+ (if (and (/= char ?ཱ) ;; modified for new font by Tomabechi
+ (string-match "[ཀ-ཛྷཞཟལ-ཀྵཪ]" laststr))
(setcar last (string-to-char
(cdr (assoc (char-to-string (car last))
tibetan-base-to-subjoined-alist)))))
(defun tibetan-compose-string (str)
"Compose Tibetan string STR."
(let ((idx 0))
- ;; `\e$(7"A\e(B' is included in the pattern for subjoined consonants
+ ;; `འ' is included in the pattern for subjoined consonants
;; because we treat it specially in tibetan-add-components.
;; (This feature is removed by Tomabechi 2000/06/08)
(while (setq idx (string-match tibetan-composable-pattern str idx))
(save-restriction
(narrow-to-region beg end)
(goto-char (point-min))
- ;; `\e$(7"A\e(B' is included in the pattern for subjoined consonants
+ ;; `འ' is included in the pattern for subjoined consonants
;; because we treat it specially in tibetan-add-components.
;; (This feature is removed by Tomabechi 2000/06/08)
(while (re-search-forward tibetan-composable-pattern nil t)
;;;
(defvar tibetan-canonicalize-for-unicode-alist
- '(("\e$(7"Q\e(B" . "") ;; remove vowel a
- ("\e$(7"T\e(B" . "\e$(7"R"S\e(B") ;; decompose vowels whose use is ``discouraged'' in Unicode 3.0
- ("\e$(7"V\e(B" . "\e$(7"R"U\e(B")
- ("\e$(7"W\e(B" . "\e$(7#C"a\e(B")
- ("\e$(7"X\e(B" . "\e$(7#C"R"a\e(B")
- ("\e$(7"Y\e(B" . "\e$(7#D"a\e(B")
- ("\e$(7"Z\e(B" . "\e$(7#D"R"a\e(B")
- ("\e$(7"b\e(B" . "\e$(7"R"a\e(B"))
+ '(("" . "") ;; remove vowel a
+ ("ཱི" . "ཱི") ;; decompose vowels whose use is ``discouraged'' in Unicode 3.0
+ ("ཱུ" . "ཱུ")
+ ("ྲྀ" . "ྲྀ")
+ ("ཷ" . "ྲཱྀ")
+ ("ླྀ" . "ླྀ")
+ ("ཹ" . "ླཱྀ")
+ ("ཱྀ" . "ཱྀ"))
"Rules for canonicalizing Tibetan vowels for Unicode.")
(defvar tibetan-canonicalize-for-unicode-regexp
- "[\e$(7"Q"T"V"W"X"Y"Z"b\e(B]"
+ "[ཱཱིུྲྀཷླྀཹཱྀ]"
"Regexp for Tibetan vowels to be canonicalized in Unicode.")
(defun tibetan-canonicalize-for-unicode-region (from to)