1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
25 ;; Created: Nov. 08. 2002
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
34 ;; Tamil Composable Pattern
44 ;; 2. syllable : only ligature-formed pattern forms composition.
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
52 (defconst tamil-consonant
53 "[
\e$,1<5
\e(B-
\e$,1<Y
\e(B]")
55 (defconst tamil-composable-pattern
57 "\\([
\e$,1<%
\e(B-
\e$,1<4
\e(B]\\)\\|"
58 "[
\e$,1<"<#
\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:
\e$,1<5<m<W
\e(B\\)\\|[
\e$,1<5
\e(B-
\e$,1<Y
\e(B]\\)[
\e$,1<m<^
\e(B-
\e$,1<l
\e(B]?\\)\\|"
60 "\\(
\e$,1<W<m<P<`
\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
64 (defun tamil-compose-region (from to)
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
73 (defun tamil-compose-string (string)
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
80 (defun tamil-post-read-conversion (len)
83 (let ((buffer-modified-p (buffer-modified-p)))
84 (narrow-to-region (point) (+ (point) len))
85 (tamil-compose-region (point-min) (point-max))
86 (set-buffer-modified-p buffer-modified-p)
87 (- (point-max) (point-min))))))
89 (defun tamil-range (from to)
90 "Make the list of the integers of range FROM to TO."
92 (while (<= from to) (setq result (cons to result) to (1- to))) result))
94 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
95 "Return a regular expression that matches all keys in hashtable HASHTBL."
96 (let ((max-specpdl-size 1000))
100 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
102 (function (lambda (x y) (> (length x) (length y))))))))
106 (defun tamil-composition-function (from to pattern &optional string)
107 "Compose Tamil characters in REGION, or STRING if specified.
108 Assume that the REGION or STRING must fully match the composable
110 (if string (tamil-compose-syllable-string string)
111 (tamil-compose-syllable-region from to))
114 ;; Register a function to compose Tamil characters.
116 (function (lambda (ucs)
117 (aset composition-function-table (decode-char 'ucs ucs)
118 (list (cons tamil-composable-pattern
119 'tamil-composition-function)))))
120 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
122 ;; Notes on conversion steps.
124 ;; 1. chars to glyphs
125 ;; Simple replacement of characters to glyphs is done.
127 ;; 2. glyphs reordering.
128 ;; following "
\e$,4)j
\e(B", "
\e$,4)k
\e(B", "
\e$,4)l
\e(B" goes to the front.
130 ;; 3. glyphs to glyphs
131 ;; reordered vowels are ligatured to consonants.
134 ;; left modifiers will be attached at the left.
135 ;; others will be attached right.
137 (defvar tml-char-glyph
139 ;;("
\e$,1<"
\e(B" . "")
140 ("
\e$,1<#
\e(B" . "
\e$,4*G
\e(B")
141 ;; Independent Vowels
142 ("
\e$,1<%
\e(B" . "
\e$,4*<
\e(B")
143 ("
\e$,1<&
\e(B" . "
\e$,4*=
\e(B")
144 ("
\e$,1<'
\e(B" . "
\e$,4*>
\e(B")
145 ("
\e$,1<(
\e(B" . "
\e$,4*?
\e(B")
146 ("
\e$,1<)
\e(B" . "
\e$,4*@
\e(B")
147 ("
\e$,1<*
\e(B" . "
\e$,4*A
\e(B")
148 ("
\e$,1<.
\e(B" . "
\e$,4*B
\e(B")
149 ("
\e$,1</
\e(B" . "
\e$,4*C
\e(B")
150 ("
\e$,1<0
\e(B" . "
\e$,4*D
\e(B")
151 ("
\e$,1<2
\e(B" . "
\e$,4*E
\e(B")
152 ("
\e$,1<3
\e(B" . "
\e$,4*F
\e(B")
153 ("
\e$,1<4
\e(B" . "
\e$,4*E*W
\e(B")
155 ("
\e$,1<5<m<W<m
\e(B" . "
\e$,4):
\e(B") ; ks.
156 ("
\e$,1<5<m<W
\e(B" . "
\e$,4*^
\e(B") ; ks
157 ("
\e$,1<5
\e(B" . "
\e$,4*H
\e(B")
159 ("
\e$,1<9
\e(B" . "
\e$,4*I
\e(B")
160 ("
\e$,1<:
\e(B" . "
\e$,4*J
\e(B")
161 ("
\e$,1<<
\e(B" . "
\e$,4*\
\e(B")
162 ("
\e$,1<<<m
\e(B" . "
\e$,4)8
\e(B")
163 ("
\e$,1<>
\e(B" . "
\e$,4*K
\e(B")
164 ("
\e$,1<?
\e(B" . "
\e$,4*L
\e(B")
165 ("
\e$,1<C
\e(B" . "
\e$,4*M
\e(B")
166 ("
\e$,1<D
\e(B" . "
\e$,4*N
\e(B")
167 ("
\e$,1<H
\e(B" . "
\e$,4*O
\e(B")
168 ("
\e$,1<I
\e(B" . "
\e$,4*Y
\e(B")
169 ("
\e$,1<I<m
\e(B" . "
\e$,4)a
\e(B")
170 ("
\e$,1<J
\e(B" . "
\e$,4*P
\e(B")
171 ("
\e$,1<N
\e(B" . "
\e$,4*Q
\e(B")
172 ("
\e$,1<O
\e(B" . "
\e$,4*R
\e(B")
173 ("
\e$,1<P
\e(B" . "
\e$,4*S
\e(B")
174 ("
\e$,1<Q
\e(B" . "
\e$,4*X
\e(B")
175 ("
\e$,1<R
\e(B" . "
\e$,4*T
\e(B")
176 ("
\e$,1<S
\e(B" . "
\e$,4*W
\e(B")
177 ("
\e$,1<T
\e(B" . "
\e$,4*V
\e(B")
178 ("
\e$,1<U
\e(B" . "
\e$,4*U
\e(B")
179 ("
\e$,1<W
\e(B" . "
\e$,4*[
\e(B")
180 ("
\e$,1<W<m
\e(B" . "
\e$,4)7
\e(B")
181 ("
\e$,1<W<m<P<`
\e(B" . "
\e$,4*_
\e(B")
182 ("
\e$,1<X
\e(B" . "
\e$,4*Z
\e(B")
183 ("
\e$,1<X<m
\e(B" . "
\e$,4)6
\e(B")
184 ("
\e$,1<Y
\e(B" . "
\e$,4*]
\e(B")
185 ("
\e$,1<Y<m
\e(B" . "
\e$,4)9
\e(B")
187 ;; Dependent vowel signs
188 ("
\e$,1<^
\e(B" . "
\e$,4)c
\e(B")
189 ("
\e$,1<_
\e(B" . "
\e$,4)d
\e(B")
190 ("
\e$,1<`
\e(B" . "
\e$,4)f
\e(B")
191 ("
\e$,1<a
\e(B" . "
\e$,4)g
\e(B")
192 ("
\e$,1<b
\e(B" . "
\e$,4)h
\e(B")
193 ("
\e$,1<f
\e(B" . "
\e$,4)j
\e(B")
194 ("
\e$,1<g
\e(B" . "
\e$,4)k
\e(B")
195 ("
\e$,1<h
\e(B" . "
\e$,4)l
\e(B")
196 ("
\e$,1<j
\e(B" . "
\e$,4)j)c
\e(B")
197 ("
\e$,1<k
\e(B" . "
\e$,4)k)c
\e(B")
198 ("
\e$,1<l
\e(B" . "
\e$,4)j*W
\e(B")
201 ("
\e$,1<m
\e(B" . "
\e$,4)b
\e(B")
202 ("
\e$,1<w
\e(B" . "nil") ;; not supported?
205 (defvar tml-char-glyph-hash
206 (let* ((hash (make-hash-table :test 'equal)))
207 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
211 (defvar tml-char-glyph-regexp
212 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
214 ;; Tamil languages needed to be reordered.
216 (defvar tml-consonants-regexp
217 "[
\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]
\e(B]")
219 (defvar tml-glyph-reorder-key-glyphs "[
\e$,4)j)k)l
\e(B]")
221 (defvar tml-glyph-reordering-regexp-list
223 (concat "\\(" tml-consonants-regexp "\\)\\([
\e$,4)j)k)l
\e(B]\\)") "\\2\\1"))
225 ;; Tamil vowel modifiers to be ligatured.
226 (defvar tml-glyph-glyph
228 ("
\e$,4*H)d
\e(B" . "
\e$,4(a
\e(B") ; ki
229 ("
\e$,4*^)d
\e(B" . "
\e$,4(v
\e(B") ; ksi
230 ("
\e$,4*^)f
\e(B" . "
\e$,4)2
\e(B") ; ksi~
231 ("
\e$,4*I)d
\e(B" . "
\e$,4(b
\e(B") ; n^i
232 ("
\e$,4*J)d
\e(B" . "
\e$,4(c
\e(B") ; ci
233 ("
\e$,4*K)d
\e(B" . "
\e$,4(d
\e(B") ; n~i
234 ("
\e$,4*L)d
\e(B" . "
\e$,4)n
\e(B") ; t.i
235 ("
\e$,4*M)d
\e(B" . "
\e$,4(e
\e(B") ; n.i
236 ("
\e$,4*N)d
\e(B" . "
\e$,4(f
\e(B") ; ti
237 ("
\e$,4*O)d
\e(B" . "
\e$,4(g
\e(B") ; ni
238 ("
\e$,4*P)d
\e(B" . "
\e$,4(h
\e(B") ; pi
239 ("
\e$,4*Q)d
\e(B" . "
\e$,4(i
\e(B") ; mi
240 ("
\e$,4*R)d
\e(B" . "
\e$,4(j
\e(B") ; yi
241 ("
\e$,4*S)d
\e(B" . "
\e$,4(k
\e(B") ; ri
242 ("
\e$,4*T)d
\e(B" . "
\e$,4(l
\e(B") ; li
243 ("
\e$,4*U)d
\e(B" . "
\e$,4(m
\e(B") ; vi
244 ("
\e$,4*V)d
\e(B" . "
\e$,4(n
\e(B") ; l_i
245 ("
\e$,4*W)d
\e(B" . "
\e$,4(o
\e(B") ; l.i
246 ("
\e$,4*X)d
\e(B" . "
\e$,4(p
\e(B") ; r_i
247 ("
\e$,4*Y)d
\e(B" . "
\e$,4(q
\e(B") ; n_i
248 ("
\e$,4*Z)d
\e(B" . "
\e$,4(r
\e(B") ; si
249 ("
\e$,4*[)d
\e(B" . "
\e$,4(s
\e(B") ; s'i
250 ("
\e$,4*\)d
\e(B" . "
\e$,4(t
\e(B") ; ji
251 ("
\e$,4*])d
\e(B" . "
\e$,4(u
\e(B") ; hi
253 ("
\e$,4*H)f
\e(B" . "
\e$,4(w
\e(B") ; ki~
254 ("
\e$,4*I)f
\e(B" . "
\e$,4(x
\e(B") ; n^i~
255 ("
\e$,4*J)f
\e(B" . "
\e$,4(y
\e(B") ; ci~
256 ("
\e$,4*K)f
\e(B" . "
\e$,4(z
\e(B") ; n~i~
257 ("
\e$,4*L)f
\e(B" . "
\e$,4)o
\e(B") ; t.i~
258 ("
\e$,4*M)f
\e(B" . "
\e$,4)!
\e(B") ; n.i~
259 ("
\e$,4*N)f
\e(B" . "
\e$,4)"
\e(B") ; ti~
260 ("
\e$,4*O)f
\e(B" . "
\e$,4)#
\e(B") ; ni~
261 ("
\e$,4*P)f
\e(B" . "
\e$,4)$
\e(B") ; pi~
262 ("
\e$,4*Q)f
\e(B" . "
\e$,4)%
\e(B") ; mi~
263 ("
\e$,4*R)f
\e(B" . "
\e$,4)&
\e(B") ; yi~
264 ("
\e$,4*S)f
\e(B" . "
\e$,4)'
\e(B") ; ri~
265 ("
\e$,4*T)f
\e(B" . "
\e$,4)(
\e(B") ; li~
266 ("
\e$,4*U)f
\e(B" . "
\e$,4))
\e(B") ; vi~
267 ("
\e$,4*V)f
\e(B" . "
\e$,4)*
\e(B") ; l_i~
268 ("
\e$,4*W)f
\e(B" . "
\e$,4)+
\e(B") ; l.i~
269 ("
\e$,4*X)f
\e(B" . "
\e$,4),
\e(B") ; r_i~
270 ("
\e$,4*Y)f
\e(B" . "
\e$,4)-
\e(B") ; n_i~
271 ("
\e$,4*Z)f
\e(B" . "
\e$,4).
\e(B") ; si~
272 ("
\e$,4*[)f
\e(B" . "
\e$,4)/
\e(B") ; s'i~
273 ("
\e$,4*\)f
\e(B" . "
\e$,4)0
\e(B") ; ji~
274 ("
\e$,4*])f
\e(B" . "
\e$,4)1
\e(B") ; hi~
276 ("
\e$,4*H)g
\e(B" . "
\e$,4)p
\e(B") ; ku
277 ("
\e$,4*I)g
\e(B" . "
\e$,4)q
\e(B") ; n^u
278 ("
\e$,4*J)g
\e(B" . "
\e$,4)r
\e(B") ; cu
279 ("
\e$,4*K)g
\e(B" . "
\e$,4)s
\e(B") ; n~u
280 ("
\e$,4*L)g
\e(B" . "
\e$,4)t
\e(B") ; t.u
281 ("
\e$,4*M)g
\e(B" . "
\e$,4)u
\e(B") ; n.u
282 ("
\e$,4*N)g
\e(B" . "
\e$,4)v
\e(B") ; tu
283 ("
\e$,4*O)g
\e(B" . "
\e$,4)x
\e(B") ; nu
284 ("
\e$,4*P)g
\e(B" . "
\e$,4)y
\e(B") ; pu
285 ("
\e$,4*Q)g
\e(B" . "
\e$,4)z
\e(B") ; mu
286 ("
\e$,4*R)g
\e(B" . "
\e$,4){
\e(B") ; yu
287 ("
\e$,4*S)g
\e(B" . "
\e$,4)|
\e(B") ; ru
288 ("
\e$,4*T)g
\e(B" . "
\e$,4)}
\e(B") ; lu
289 ("
\e$,4*U)g
\e(B" . "
\e$,4)~
\e(B") ; vu
290 ("
\e$,4*V)g
\e(B" . "
\e$,4)
\7f\e(B") ; l_u
291 ("
\e$,4*W)g
\e(B" . "
\e$,4*
\e(B") ; l.u
292 ("
\e$,4*X)g
\e(B" . "
\e$,4*!
\e(B") ; r_u
293 ("
\e$,4*Y)g
\e(B" . "
\e$,4*"
\e(B") ; n_u
295 ("
\e$,4*H)h
\e(B" . "
\e$,4*#
\e(B") ; ku~
296 ("
\e$,4*I)h
\e(B" . "
\e$,4*$
\e(B") ; n^u~
297 ("
\e$,4*J)h
\e(B" . "
\e$,4*%
\e(B") ; cu~
298 ("
\e$,4*K)h
\e(B" . "
\e$,4*&
\e(B") ; n~u~
299 ("
\e$,4*L)h
\e(B" . "
\e$,4*'
\e(B") ; t.u~
300 ("
\e$,4*M)h
\e(B" . "
\e$,4*(
\e(B") ; n.u~
301 ("
\e$,4*N)h
\e(B" . "
\e$,4*)
\e(B") ; tu~
302 ("
\e$,4*O)h
\e(B" . "
\e$,4*+
\e(B") ; nu~
303 ("
\e$,4*P)h
\e(B" . "
\e$,4*,
\e(B") ; pu~
304 ("
\e$,4*Q)h
\e(B" . "
\e$,4*-
\e(B") ; mu~
305 ("
\e$,4*R)h
\e(B" . "
\e$,4*.
\e(B") ; yu~
306 ("
\e$,4*S)h
\e(B" . "
\e$,4*/
\e(B") ; ru~
307 ("
\e$,4*T)h
\e(B" . "
\e$,4*6
\e(B") ; lu~
308 ("
\e$,4*U)h
\e(B" . "
\e$,4*7
\e(B") ; vu~
309 ("
\e$,4*V)h
\e(B" . "
\e$,4*8
\e(B") ; l_u~
310 ("
\e$,4*W)h
\e(B" . "
\e$,4*9
\e(B") ; l.u~
311 ("
\e$,4*X)h
\e(B" . "
\e$,4*:
\e(B") ; r_u~
312 ("
\e$,4*Y)h
\e(B" . "
\e$,4*;
\e(B") ; n_u~
315 (defvar tml-glyph-glyph-hash
316 (let* ((hash (make-hash-table :test 'equal)))
317 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
321 (defvar tml-glyph-glyph-regexp
322 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
324 (defun tamil-compose-syllable-string (string)
326 (insert (decompose-string string))
327 (tamil-compose-syllable-region (point-min) (point-max))
330 (defun tamil-compose-syllable-region (from to)
331 "Compose tamil syllable in region FROM to TO."
332 (let (glyph-str match-str glyph-reorder-regexps)
335 (narrow-to-region from to)
336 (goto-char (point-min))
337 ;; char-glyph-conversion
338 (while (re-search-forward tml-char-glyph-regexp nil t)
339 (setq match-str (match-string 0))
341 (concat glyph-str (gethash match-str tml-char-glyph-hash))))
343 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
344 (if (string-match (car tml-glyph-reordering-regexp-list)
347 (replace-match (cdr tml-glyph-reordering-regexp-list)
348 nil nil glyph-str))))
349 ;; glyph-glyph-conversion
350 (when (string-match tml-glyph-glyph-regexp glyph-str)
351 (setq match-str (match-string 0 glyph-str))
353 (replace-match (gethash match-str tml-glyph-glyph-hash)
355 ;; concatenate and attach reference-points.
362 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
364 (compose-region from to glyph-str)))))
368 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
369 ;;; tml-util.el ends here