]> code.delx.au - gnu-emacs/blob - lisp/language/devan-util.el
Switch license to GPLv3 or later.
[gnu-emacs] / lisp / language / devan-util.el
1 ;;; devan-util.el --- Support for composing Devanagari characters
2
3 ;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8
9 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
10 ;; Keywords: multilingual, Devanagari
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 3 or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
28
29 ;; Created: Feb. 17. 2001
30
31 ;;; Commentary:
32
33 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
34 ;; composition of Devanagari script characters.
35
36 ;;; Code:
37
38 ;;;###autoload
39
40 ;; Devanagari Composable Pattern
41 ;; C .. Consonants
42 ;; V .. Vowel
43 ;; H .. Halant
44 ;; M .. Matra
45 ;; V .. Vowel
46 ;; A .. Anuswar
47 ;; D .. Chandrabindu
48 ;; (N .. Zerowidth Non Joiner)
49 ;; (J .. Zerowidth Joiner. )
50 ;; 1. vowel
51 ;; V(A/D)?
52 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
53 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
54
55 (defconst devanagari-consonant
56 "[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]")
57
58 (defconst devanagari-composable-pattern
59 (concat
60 "\\([\e$,15E\e(B-\e$,15T6@6A\e(B][\e$,15A5B\e(B]?\\)\\|[\e$,15C6D\e(B]"
61 "\\|\\("
62 "\\(?:\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?"
63 "[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\\(?:\e$,16-\e(B\\|[\e$,15~\e(B-\e$,16-6B6C\e(B]?[\e$,15B5A\e(B]?\\)?"
64 "\\)")
65 "Regexp matching a composable sequence of Devanagari characters.")
66
67 ;;;###autoload
68 (defun devanagari-compose-region (from to)
69 (interactive "r")
70 (save-excursion
71 (save-restriction
72 (narrow-to-region from to)
73 (goto-char (point-min))
74 (while (re-search-forward devanagari-composable-pattern nil t)
75 (devanagari-compose-syllable-region (match-beginning 0)
76 (match-end 0))))))
77 (defun devanagari-compose-string (string)
78 (with-temp-buffer
79 (insert (decompose-string string))
80 (devanagari-compose-region (point-min) (point-max))
81 (buffer-string)))
82
83 ;;;###autoload
84 (defun devanagari-post-read-conversion (len)
85 (save-excursion
86 (save-restriction
87 (let ((buffer-modified-p (buffer-modified-p)))
88 (narrow-to-region (point) (+ (point) len))
89 (devanagari-compose-region (point-min) (point-max))
90 (set-buffer-modified-p buffer-modified-p)
91 (- (point-max) (point-min))))))
92
93 (defun devanagari-range (from to)
94 "Make the list of the integers of range FROM to TO."
95 (let (result)
96 (while (<= from to) (setq result (cons to result) to (1- to))) result))
97
98 (defun devanagari-regexp-of-hashtbl-keys (hashtbl)
99 "Return a regular expression that matches all keys in hashtable HASHTBL."
100 (let ((max-specpdl-size 1000))
101 (regexp-opt
102 (sort
103 (let (dummy)
104 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
105 dummy)
106 (function (lambda (x y) (> (length x) (length y))))))))
107
108 (defun devanagari-composition-function (from to pattern &optional string)
109 "Compose Devanagari characters in REGION, or STRING if specified.
110 Assume that the REGION or STRING must fully match the composable
111 PATTERN regexp."
112 (if string (devanagari-compose-syllable-string string)
113 (devanagari-compose-syllable-region from to))
114 (- to from))
115
116 ;; Register a function to compose Devanagari characters.
117 (mapc
118 (function (lambda (ucs)
119 (aset composition-function-table (decode-char 'ucs ucs)
120 (list (cons devanagari-composable-pattern
121 'devanagari-composition-function)))))
122 (nconc '(#x0903) (devanagari-range #x0905 #x0939) (devanagari-range #x0958 #x0961)))
123
124 ;; Notes on conversion steps.
125
126 ;; 1. chars to glyphs
127 ;;
128 ;; Rules will not be applied to the halant appeared at the end of the
129 ;; text. Also, the preceding/following "r" will be treated as special case.
130
131 ;; 2. glyphs reordering.
132 ;;
133 ;; The glyphs are split by halant, and each glyph groups are
134 ;; re-ordered in the following order.
135 ;;
136 ;; Note that `consonant-glyph' mentioned here does not contain the
137 ;; vertical bar (right modifier) attached at the right of the
138 ;; consonant.
139 ;;
140 ;; If the glyph-group contains right modifier,
141 ;; (1) consonant-glyphs/vowels, with nukta sign
142 ;; (2) spacing
143 ;; (3) right modifier (may be matra)
144 ;; (4) top matra
145 ;; (5) preceding "r"
146 ;; (6) anuswar
147 ;; (7) following "r"
148 ;; (8) bottom matra or halant.
149 ;;
150 ;; Otherwise,
151 ;; (1) consonant-glyph/vowels, with nukta sign
152 ;; (3) left matra
153 ;; (4) top matra
154 ;; (5) preceding "r"
155 ;; (6) anuswar
156 ;; (7) following "r"
157 ;; (8) bottom matra or halant.
158 ;; (2) spacing
159
160 ;; 3. glyph to glyph
161 ;;
162 ;; For better display, some glyph display would be tuned.
163
164 ;; 4. Composition.
165 ;;
166 ;; left modifiers will be attached at the left.
167 ;; others will be attached right.
168
169 ;; Problem::
170 ;; Can we generalize this methods to other Indian scripts?
171
172 (defvar dev-char-glyph
173 '(("\e$,15E\e(B" . "\e$,4 K\e(B")
174 ("\e$,15F\e(B" . "\e$,4 K")\e(B")
175 ("\e$,15~\e(B" . "\e$,4")\e(B")
176 ("\e$,15G\e(B" . "\e$,4 \\e(B")
177 ("\e$,15\7f\e(B" . "\e$,4"*\e(B")
178 ("\e$,15\7f5A\e(B" . "\e$,4"*\e(B\\e$,4"&\e(B")
179 ("\e$,15H\e(B" . "\e$,4 \"'\e(B")
180 ("\e$,15H5A\e(B" . "\e$,4 \"'"&\e(B")
181 ("\e$,16 \e(B" . "\e$,4"2\e(B")
182 ("\e$,16 5A\e(B" . "\e$,4"2"&\e(B")
183 ("\e$,15I\e(B" . "\e$,4 ]\e(B")
184 ("\e$,16!\e(B" . "\e$,4"6\e(B")
185 ("\e$,15J\e(B" . "\e$,4 ^"P\e(B")
186 ("\e$,16"\e(B" . "\e$,4":\e(B")
187 ("\e$,15K\e(B" . "\e$,4 `"Q\e(B")
188 ("\e$,16#\e(B" . "\e$,4">\e(B")
189 ;;("\e$,15L\e(B" . nil) ; not implemented.
190 ("\e$,16$\e(B" . "\e$,4"?\e(B")
191 ("\e$,15M\e(B" . "\e$,4 b"L\e(B")
192 ("\e$,15M5A\e(B" . "\e$,4 b"$\e(B")
193 ("\e$,15M5B\e(B" . "\e$,4 b"$\e(B")
194 ("\e$,16%\e(B" . "\\e$,4"L\e(B")
195 ("\e$,15N\e(B" . "\e$,4 b"@\e(B")
196 ("\e$,15N5A\e(B" . "\e$,4 b"@"&\e(B")
197 ("\e$,16&\e(B" . "\\e$,4"@\e(B")
198 ("\e$,16&5A\e(B" . "\\e$,4"@\e(B\\e$,4"&\e(B")
199 ("\e$,15O\e(B" . "\e$,4 b\e(B")
200 ("\e$,16'\e(B" . "\\e$,4"D\e(B")
201 ("\e$,16'5A\e(B" . "\\e$,4"D\e(B\\e$,4"&\e(B")
202 ("\e$,15P\e(B" . "\e$,4 b"D\e(B")
203 ("\e$,15P5A\e(B" . "\e$,4 b"D"&\e(B")
204 ("\e$,16(\e(B" . "\\e$,4"H\e(B")
205 ("\e$,16(5A\e(B" . "\\e$,4"H\e(B\\e$,4"&\e(B")
206 ("\e$,15Q\e(B" . "\e$,4 K")"L\e(B") ;; special rule for reodering.
207 ("\e$,15Q5A\e(B" . "\e$,4 K")"$\e(B")
208 ("\e$,15Q5B\e(B" . "\e$,4 K")"$\e(B")
209 ("\e$,16)\e(B" . "\\e$,4")"L\e(B")
210 ("\e$,16)5A\e(B" . "\\e$,4")"$\e(B")
211 ("\e$,16)5B\e(B" . "\\e$,4")"$\e(B")
212 ("\e$,15R\e(B" . "\e$,4 K")"@\e(B")
213 ("\e$,15R5A\e(B" . "\e$,4 K")"@"&\e(B")
214 ("\e$,16*\e(B" . "\\e$,4")"@\e(B")
215 ("\e$,16*5A\e(B" . "\\e$,4")"@"&\e(B")
216 ("\e$,15S\e(B" . "\e$,4 K")"D\e(B")
217 ("\e$,15S5A\e(B" . "\e$,4 K")"D"&\e(B")
218 ("\e$,16+\e(B" . "\\e$,4")"D\e(B")
219 ("\e$,16+5A\e(B" . "\\e$,4")"D"&\e(B")
220 ("\e$,15T\e(B" . "\e$,4 K")"H\e(B")
221 ("\e$,15T5A\e(B" . "\e$,4 K")"H"&\e(B")
222 ("\e$,16,\e(B" . "\\e$,4")"H\e(B")
223 ("\e$,16,5A\e(B" . "\\e$,4")"H"&\e(B")
224 ("\e$,16@\e(B" . "\e$,4 a"Q\e(B")
225 ;;("\e$,16B\e(B" . nil)
226 ;;("\e$,16A\e(B" . nil)
227 ;;("\e$,16C\e(B" . nil)
228
229 ;; GRUTTALS
230 ("\e$,15U\e(B" . "\e$,4 e"R\e(B")
231 ("\e$,15U6-\e(B" . "\e$,4 c\e(B")
232 ("\e$,15U6-5p\e(B" . "\e$,4 g"R\e(B")
233 ("\e$,15U6-5d\e(B" . "\e$,4 h"R\e(B")
234 ("\e$,15U6-5w\e(B" . "\e$,4 i")\e(B")
235 ("\e$,15U6-5w6-\e(B" . "\e$,4 i\e(B")
236
237 ("\e$,15V\e(B" . "\e$,4 j")\e(B")
238 ("\e$,15V6-\e(B" . "\e$,4 j\e(B")
239 ("\e$,15V6-5p\e(B" . "\e$,4 l")\e(B")
240 ("\e$,15V6-5p6-\e(B" . "\e$,4 l\e(B")
241
242 ("\e$,15W\e(B" . "\e$,4 m")\e(B")
243 ("\e$,15W6-\e(B" . "\e$,4 m\e(B")
244 ("\e$,15W6-5p\e(B" . "\e$,4 o")\e(B")
245 ("\e$,15W6-5p6-\e(B" . "\e$,4 o\e(B")
246
247 ("\e$,15X\e(B" . "\e$,4 p")\e(B")
248 ("\e$,15X6-\e(B" . "\e$,4 p\e(B")
249 ("\e$,15X6-5p\e(B" . "\e$,4 q")\e(B")
250 ("\e$,15X6-5p6-\e(B" . "\e$,4 q\e(B")
251
252 ("\e$,15Y\e(B" . "\e$,4 r"S\e(B")
253 ;; PALATALS
254 ("\e$,15Z\e(B" . "\e$,4 s")\e(B")
255 ("\e$,15Z6-\e(B" . "\e$,4 s\e(B")
256 ("\e$,15Z6-5p\e(B" . "\e$,4 t")\e(B")
257 ("\e$,15Z6-5p6-\e(B" . "\e$,4 t\e(B")
258
259 ("\e$,15[\e(B" . "\e$,4 u"T\e(B")
260
261 ("\e$,15\\e(B" . "\e$,4 v")\e(B")
262 ("\e$,15\6-\e(B" . "\e$,4 v\e(B")
263 ("\e$,15\6-5p\e(B" . "\e$,4 x")\e(B")
264 ("\e$,15\6-5p6-\e(B" . "\e$,4 x\e(B")
265 ("\e$,15\6-5^\e(B" . "\e$,4 y")\e(B")
266 ("\e$,15\6-5^6-\e(B" . "\e$,4 y\e(B")
267
268 ("\e$,15]\e(B" . "\e$,4 z")\e(B")
269 ("\e$,15]6-\e(B" . "\e$,4 z\e(B")
270 ("\e$,15]6-5p\e(B" . "\e$,4 {")\e(B")
271 ("\e$,15]6-5p6-\e(B" . "\e$,4 {\e(B")
272
273 ("\e$,15^\e(B" . "\e$,4 |")\e(B")
274 ("\e$,15^6-\e(B" . "\e$,4 |\e(B")
275 ;; CEREBRALS
276 ("\e$,15_\e(B" . "\e$,4 }"U\e(B")
277 ("\e$,15_6-5_\e(B" . "\e$,4 ~"U\e(B")
278 ("\e$,15_6-5`\e(B" . "\e$,4 \7f"U\e(B")
279
280 ("\e$,15`\e(B" . "\e$,4! "V\e(B")
281 ("\e$,15`6-5`\e(B" . "\e$,4!!"V\e(B")
282
283 ("\e$,15a\e(B" . "\e$,4!""W\e(B")
284 ("\e$,15a6-5a\e(B" . "\e$,4!$"W\e(B")
285 ("\e$,15a6-5b\e(B" . "\e$,4!%"W\e(B")
286
287 ("\e$,15b\e(B" . "\e$,4!&"X\e(B")
288
289 ("\e$,15c\e(B" . "\e$,4!(")\e(B")
290 ("\e$,15c6-\e(B" . "\e$,4!(\e(B")
291 ;; DENTALS
292 ("\e$,15d\e(B" . "\e$,4!)")\e(B")
293 ("\e$,15d6-\e(B" . "\e$,4!)\e(B")
294 ("\e$,15d6-5p\e(B" . "\e$,4!*")\e(B")
295 ("\e$,15d6-5p6-\e(B" . "\e$,4!*\e(B")
296 ("\e$,15d6-5d\e(B" . "\e$,4!+")\e(B")
297 ("\e$,15d6-5d6-\e(B" . "\e$,4!+\e(B")
298
299 ("\e$,15e\e(B" . "\e$,4!,")\e(B")
300 ("\e$,15e6-\e(B" . "\e$,4!,\e(B")
301 ("\e$,15e6-5p\e(B" . "\e$,4!-")\e(B")
302 ("\e$,15e6-5p6-\e(B" . "\e$,4!-\e(B")
303
304 ("\e$,15f\e(B" . "\e$,4!."Y\e(B")
305 ("\e$,15f6#\e(B" . "\e$,4!/"Y\e(B")
306 ("\e$,15f6-5p\e(B" . "\e$,4!0"Y\e(B")
307 ("\e$,15f6-5f\e(B" . "\e$,4!1"Y\e(B")
308 ("\e$,15f6-5g\e(B" . "\e$,4!2"Y\e(B")
309 ("\e$,15f6-5n\e(B" . "\e$,4!3\e(B")
310 ("\e$,15f6-5o\e(B" . "\e$,4!4\e(B")
311 ("\e$,15f6-5u\e(B" . "\e$,4!5"Y\e(B")
312
313 ("\e$,15g\e(B" . "\e$,4!6")\e(B")
314 ("\e$,15g6-\e(B" . "\e$,4!6\e(B")
315 ("\e$,15g6-5p\e(B" . "\e$,4!7")\e(B")
316 ("\e$,15g6-5p6-\e(B" . "\e$,4!7\e(B")
317
318 ("\e$,15h\e(B" . "\e$,4!8")\e(B")
319 ("\e$,15h6-\e(B" . "\e$,4!8\e(B")
320 ("\e$,15h6-5p\e(B" . "\e$,4!9")\e(B")
321 ("\e$,15h6-5p6-\e(B" . "\e$,4!9")\e(B")
322 ("\e$,15h6-5h\e(B" . "\e$,4!:")\e(B")
323 ("\e$,15h6-5h6-\e(B" . "\e$,4!:\e(B")
324
325 ("\e$,15i\e(B" . "\e$,4!8"#")\e(B")
326 ;; LABIALS
327 ("\e$,15j\e(B" . "\e$,4!;")\e(B")
328 ("\e$,15j6-\e(B" . "\e$,4!;\e(B")
329 ("\e$,15j6-5p\e(B" . "\e$,4!<")\e(B")
330 ("\e$,15j6-5p6-\e(B" . "\e$,4!<\e(B")
331
332 ("\e$,15k\e(B" . "\e$,4!a"[\e(B")
333 ("\e$,15k6-\e(B" . "\e$,4!=\e(B")
334 ("\e$,15k6-5p\e(B" . "\e$,4!c"[\e(B")
335
336 ("\e$,15l\e(B" . "\e$,4!d")\e(B")
337 ("\e$,15l6-\e(B" . "\e$,4!d\e(B")
338 ("\e$,15l6-5p\e(B" . "\e$,4!e")\e(B")
339 ("\e$,15l6-5p6-\e(B" . "\e$,4!e\e(B")
340
341 ("\e$,15m\e(B" . "\e$,4!f")\e(B")
342 ("\e$,15m6-\e(B" . "\e$,4!f\e(B")
343 ("\e$,15m6-5p\e(B" . "\e$,4!g")\e(B")
344 ("\e$,15m6-5p6-\e(B" . "\e$,4!g\e(B")
345
346 ("\e$,15n\e(B" . "\e$,4!h")\e(B")
347 ("\e$,15n6-\e(B" . "\e$,4!h\e(B")
348 ("\e$,15n6-5p\e(B" . "\e$,4!i")\e(B")
349 ("\e$,15n6-5p6-\e(B" . "\e$,4!i\e(B")
350 ;; SEMIVOWELS
351 ("\e$,15o\e(B" . "\e$,4!j")\e(B")
352 ("\e$,15o6-\e(B" . "\e$,4!j\e(B")
353 ("\e$,15o6-5p\e(B" . "\e$,4!k")\e(B")
354 ("\e$,15o6-5p6-\e(B" . "\e$,4!k\e(B")
355 ("\e$,16-5o\e(B" . "\e$,4!l\e(B") ;; when every ohter lig. fails.
356
357 ("\e$,15p\e(B" . "\e$,4!n"W\e(B")
358 ;; ("\e$,15p6-\e(B" . "\\e$,4"'\e(B") ;; special case. only the topmost pos.
359 ("\e$,15q\e(B" . "\e$,4!n"#"W\e(B")
360 ("\e$,15q6-\e(B" . "\e$,4!m\e(B") ;; IS 13194 speical rule.
361 ("\e$,15p6!\e(B" . "\e$,4!o"[\e(B")
362 ("\e$,15p6"\e(B" . "\e$,4!p"\\e(B")
363
364 ("\e$,15r\e(B" . "\e$,4!q")\e(B")
365 ("\e$,15r6-\e(B" . "\e$,4!q\e(B")
366 ("\e$,15s\e(B" . "\e$,4!s\e(B")
367 ("\e$,15s6-\e(B" . "\e$,4!r\e(B")
368 ("\e$,15t\e(B" . "\e$,4!s"#\e(B")
369 ("\e$,15t6-\e(B" . "\e$,4!r"#\e(B")
370
371 ("\e$,15u\e(B" . "\e$,4!t")\e(B")
372 ("\e$,15u6-\e(B" . "\e$,4!t\e(B")
373 ("\e$,15u6-5p\e(B" . "\e$,4!u")\e(B")
374 ("\e$,15u6-5p6-\e(B" . "\e$,4!u\e(B")
375 ;; SIBILANTS
376 ("\e$,15v\e(B" . "\e$,4!v")\e(B")
377 ("\e$,15v6-\e(B" . "\e$,4!v\e(B")
378 ("\e$,15v6-5u\e(B" . "\e$,4!w")\e(B")
379 ("\e$,15v6-5u6-\e(B" . "\e$,4!w\e(B")
380 ("\e$,15v6-5p\e(B" . "\e$,4!x")\e(B")
381 ("\e$,15v6-5p6-\e(B" . "\e$,4!x\e(B")
382
383 ("\e$,15w\e(B" . "\e$,4!y")\e(B")
384 ("\e$,15w6-\e(B" . "\e$,4!y\e(B")
385 ("\e$,15x\e(B" . "\e$,4!z")\e(B")
386 ("\e$,15x6-\e(B" . "\e$,4!z\e(B")
387 ("\e$,15x6-5p\e(B" . "\e$,4!{")\e(B")
388 ("\e$,15x6-5p6-\e(B" . "\e$,4!{\e(B")
389
390 ("\e$,15y\e(B" . "\e$,4!}\e(B")
391 ("\e$,15y6-\e(B" . "\e$,4!|\e(B")
392 ("\e$,15y6#\e(B" . "\e$,4!~\e(B")
393 ("\e$,15y6-5p\e(B" . "\e$,4!\7f\e(B")
394 ("\e$,15y6-5n\e(B" . "\e$,4" \e(B")
395 ("\e$,15y6-5o\e(B" . "\e$,4"!\e(B")
396 ;; NUKTAS
397 ("\e$,168\e(B" . "\e$,4 f"R"S\e(B")
398 ("\e$,1686-\e(B" . "\e$,4 d\e(B")
399 ("\e$,169\e(B" . "\e$,4 k")\e(B")
400 ("\e$,1696-\e(B" . "\e$,4 k\e(B")
401 ("\e$,16:\e(B" . "\e$,4 n")\e(B")
402 ("\e$,16:6-\e(B" . "\e$,4 n\e(B")
403 ("\e$,16;\e(B" . "\e$,4 w")\e(B")
404 ("\e$,16;6-\e(B" . "\e$,4 w\e(B")
405 ("\e$,16<\e(B" . "\e$,4!#"W\e(B")
406 ("\e$,16=\e(B" . "\e$,4!'"X\e(B")
407 ("\e$,16>\e(B" . "\e$,4!b"[\e(B")
408 ("\e$,16>6-\e(B" . "\e$,4!>\e(B")
409 ("\e$,16?\e(B" . "\e$,4!j"#")\e(B")
410 ;; misc modifiers.
411 ("\e$,15A\e(B" . "\\e$,4"$\e(B")
412 ("\e$,15B\e(B" . "\\e$,4"&\e(B")
413 ("\e$,15C\e(B" . "\e$,4 F\e(B")
414 ("\e$,15|\e(B" . "\e$,4"#\e(B")
415 ("\e$,15}\e(B" . "\e$,4 E\e(B")
416 ("\e$,16-\e(B" . "\e$,4""\e(B")
417 ("\e$,16-5p\e(B" . "\e$,4"%\e(B") ;; following "r"
418 ;; ("\e$,160\e(B" . "\e$,4 D\e(B")
419 ("\e$,16D\e(B" . "\e$,4 J\e(B")
420 ;; ("\e$,16F\e(B" . "")
421 ;; ("\e$,16G\e(B" . "")
422 ;; ("\e$,16H\e(B" . "")
423 ;; ("\e$,16I\e(B" . "")
424 ;; ("\e$,16J\e(B" . "")
425 ;; ("\e$,16K\e(B" . "")
426 ;; ("\e$,16L\e(B" . "")
427 ;; ("\e$,16M\e(B" . "")
428 ;; ("\e$,16N\e(B" . "")
429 ;; ("\e$,16O\e(B" . "")
430 )
431 "Devanagari characters to glyphs conversion table.
432 Default value contains only the basic rules. You may add your own
433 preferred rule from the sanskrit fonts." )
434
435 (defvar dev-char-glyph-hash
436 (let* ((hash (make-hash-table :test 'equal)))
437 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
438 dev-char-glyph)
439 hash))
440
441 (defvar dev-char-glyph-regexp
442 (devanagari-regexp-of-hashtbl-keys dev-char-glyph-hash))
443
444 ;; glyph-to-glyph conversion table.
445 ;; it is supposed that glyphs are ordered in
446 ;; [consonant/nukta] - [matra/halant] - [preceding-r] - [anuswar].
447
448 (defvar dev-glyph-glyph
449 '(("\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"(\e(B")
450 ("\\e$,4"'\e(B\\e$,4"$\e(B" . "\\e$,4"(\e(B")
451 ("\e$,4"*\e(B\\e$,4"&\e(B" . "\e$,4"+\e(B")
452 ("\e$,4"*\e(B\\e$,4"'\e(B" . "\e$,4",\e(B")
453 ("\e$,4"*\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\e$,4"-\e(B")
454 ("\e$,4"2\e(B\\e$,4"&\e(B" . "\e$,4"3\e(B")
455 ("\e$,4"2\e(B\\e$,4"'\e(B" . "\e$,4"4\e(B")
456 ("\e$,4"2\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\e$,4"5\e(B")
457 ("\e$,4"#\e(B\\e$,4"6\e(B" . "\e$,4"7\e(B")
458 ("\e$,4"%\e(B\\e$,4"6\e(B" . "\e$,4"8\e(B")
459 ;;("\e$,4"6\e(B" . "\e$,4"9\e(B")
460 ("\e$,4"#\e(B\\e$,4":\e(B" . "\e$,4";\e(B")
461 ("\e$,4"%\e(B\\e$,4":\e(B" . "\e$,4"<\e(B")
462 ;;("\e$,4":\e(B" . "\e$,4"=\e(B")
463 ("\\e$,4"@\e(B\\e$,4"&\e(B" . "\\e$,4"A\e(B")
464 ("\\e$,4"@\e(B\\e$,4"'\e(B" . "\\e$,4"B\e(B")
465 ("\\e$,4"@\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"C\e(B")
466 ("\\e$,4"D\e(B\\e$,4"&\e(B" . "\\e$,4"E\e(B")
467 ("\\e$,4"D\e(B\\e$,4"'\e(B" . "\\e$,4"F\e(B")
468 ("\\e$,4"D\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"G\e(B")
469 ("\\e$,4"H\e(B\\e$,4"&\e(B" . "\\e$,4"I\e(B")
470 ("\\e$,4"H\e(B\\e$,4"'\e(B" . "\\e$,4"J\e(B")
471 ("\\e$,4"H\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"K\e(B")
472 ("\\e$,4"L\e(B\\e$,4"&\e(B" . "\\e$,4"M\e(B")
473 ("\\e$,4"L\e(B\\e$,4"'\e(B" . "\\e$,4"N\e(B")
474 ("\\e$,4"L\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"O\e(B")
475 ))
476 (defvar dev-glyph-glyph-hash
477 (let* ((hash (make-hash-table :test 'equal)))
478 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
479 dev-glyph-glyph)
480 hash))
481 (defvar dev-glyph-glyph-regexp
482 (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-hash))
483
484
485 ;; yet another glyph-to-glyph conversions.
486 (defvar dev-glyph-glyph-2
487 '(("\e$,4"*\e(B" . "\e$,4".\e(B")
488 ("\e$,4"+\e(B" . "\e$,4"/\e(B")
489 ("\e$,4",\e(B" . "\e$,4"0\e(B")
490 ("\e$,4"-\e(B" . "\e$,4"1\e(B")))
491 (defvar dev-glyph-glyph-2-hash
492 (let* ((hash (make-hash-table :test 'equal)))
493 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
494 dev-glyph-glyph-2)
495 hash))
496 (defvar dev-glyph-glyph-2-regexp
497 (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash))
498
499
500 (defun dev-charseq (from &optional to)
501 (if (null to) (setq to from))
502 (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari)))
503 (devanagari-range from to)))
504
505 (defvar dev-glyph-cvn
506 (append
507 (dev-charseq #x2b)
508 (dev-charseq #x3c #xc1)
509 (dev-charseq #xc3))
510 "Devanagari Consonants/Vowels/Nukta Glyphs")
511
512 (defvar dev-glyph-space
513 (dev-charseq #xf0 #xfe)
514 "Devanagari Spacing Glyphs")
515
516 (defvar dev-glyph-right-modifier
517 (append
518 (dev-charseq #xc9)
519 (dev-charseq #xd2 #xd5))
520 "Devanagari Modifiers attached at the right side.")
521
522 (defvar dev-glyph-right-modifier-regexp
523 (concat "[" dev-glyph-right-modifier "]"))
524
525 (defvar dev-glyph-left-matra
526 (dev-charseq #xca #xd1)
527 "Devanagari Matras attached at the left side.")
528
529 (defvar dev-glyph-top-matra
530 (dev-charseq #xe0 #xef)
531 "Devanagari Matras attached at the top side.")
532
533 (defvar dev-glyph-bottom-modifier
534 (append
535 (dev-charseq #xd6 #xdf)
536 (dev-charseq #xc2))
537 "Devanagari Modifiers attached at the bottom.")
538
539 (defvar dev-glyph-order
540 `((,dev-glyph-cvn . 1)
541 (,dev-glyph-space . 2)
542 (,dev-glyph-right-modifier . 3)
543 (,dev-glyph-left-matra . 3) ;; processed by reference point.
544 (,dev-glyph-top-matra . 4)
545 (,(dev-charseq #xc7 #xc8) . 5)
546 (,(dev-charseq #xc4) . 6)
547 (,(dev-charseq #xc6) . 6)
548 (,(dev-charseq #xc5) . 7)
549 (,dev-glyph-bottom-modifier . 8)))
550
551 (mapc
552 (function (lambda (x)
553 (mapc
554 (function (lambda (y)
555 (put-char-code-property y 'composition-order (cdr x))))
556 (car x))))
557 dev-glyph-order)
558
559 (mapc
560 (function (lambda (x)
561 (put-char-code-property x 'reference-point '(3 . 5))))
562 dev-glyph-left-matra)
563
564 (defun devanagari-compose-syllable-string (string)
565 (with-temp-buffer
566 (insert (decompose-string string))
567 (devanagari-compose-syllable-region (point-min) (point-max))
568 (buffer-string)))
569
570 (defun devanagari-compose-syllable-region (from to)
571 "Compose devanagari syllable in region FROM to TO."
572 (let ((glyph-str nil) (cons-num 0) glyph-str-list
573 (last-halant nil) (preceding-r nil) (last-modifier nil)
574 (last-char (char-before to)) match-str
575 glyph-block split-pos)
576 (save-excursion
577 (save-restriction
578 ;;; *** char-to-glyph conversion ***
579 ;; Special rule 1. -- Last halant must be preserved.
580 (if (eq last-char ?\e$,16-\e(B)
581 (progn
582 (setq last-halant t)
583 (narrow-to-region from (1- to)))
584 (narrow-to-region from to)
585 ;; note if the last char is modifier.
586 (if (or (eq last-char ?\e$,15A\e(B) (eq last-char ?\e$,15B\e(B))
587 (setq last-modifier t)))
588 (goto-char (point-min))
589 ;; Special rule 2. -- preceding "r halant" must be modifier.
590 (when (looking-at "\e$,15p6-\e(B.")
591 (setq preceding-r t)
592 (goto-char (+ 2 (point))))
593 ;; translate the rest characters into glyphs
594 (while (re-search-forward dev-char-glyph-regexp nil t)
595 (setq match-str (match-string 0))
596 (setq glyph-str
597 (concat glyph-str
598 (gethash match-str dev-char-glyph-hash)))
599 ;; count the number of consonant-glyhs.
600 (if (string-match devanagari-consonant match-str)
601 (setq cons-num (1+ cons-num))))
602 ;; preceding-r must be attached before the anuswar if exists.
603 (if preceding-r
604 (if last-modifier
605 (setq glyph-str (concat (substring glyph-str 0 -1)
606 "\e$,4"'\e(B" (substring glyph-str -1)))
607 (setq glyph-str (concat glyph-str "\e$,4"'\e(B"))))
608 (if last-halant (setq glyph-str (concat glyph-str "\e$,4""\e(B")))
609 ;;; *** glyph-to-glyph conversion ***
610 (when (string-match dev-glyph-glyph-regexp glyph-str)
611 (setq glyph-str
612 (replace-match (gethash (match-string 0 glyph-str)
613 dev-glyph-glyph-hash)
614 nil t glyph-str))
615 (if (and (> cons-num 1)
616 (string-match dev-glyph-glyph-2-regexp glyph-str))
617 (setq glyph-str
618 (replace-match (gethash (match-string 0 glyph-str)
619 dev-glyph-glyph-2-hash)
620 nil t glyph-str))))
621 ;;; *** glyph reordering ***
622 (while (setq split-pos (string-match "\e$,4""\e(B\\|.$" glyph-str))
623 (setq glyph-block (substring glyph-str 0 (1+ split-pos)))
624 (setq glyph-str (substring glyph-str (1+ split-pos)))
625 (setq
626 glyph-block
627 (if (string-match dev-glyph-right-modifier-regexp glyph-block)
628 (sort (string-to-list glyph-block)
629 (function (lambda (x y)
630 (< (get-char-code-property x 'composition-order)
631 (get-char-code-property y 'composition-order)))))
632 (sort (string-to-list glyph-block)
633 (function (lambda (x y)
634 (let ((xo (get-char-code-property x 'composition-order))
635 (yo (get-char-code-property y 'composition-order)))
636 (if (= xo 2) nil (if (= yo 2) t (< xo yo)))))))))
637 (setq glyph-str-list (nconc glyph-str-list glyph-block)))
638 ;; concatenate and attach reference-points.
639 (setq glyph-str
640 (cdr
641 (apply
642 'nconc
643 (mapcar
644 (function (lambda (x)
645 (list
646 (or (get-char-code-property x 'reference-point)
647 '(5 . 3) ;; default reference point.
648 )
649 x)))
650 glyph-str-list))))))
651 (compose-region from to glyph-str)))
652
653 (provide 'devan-util)
654
655 ;;; arch-tag: 9bc4d6e3-f2b9-4110-886e-ff9b66b7eebc
656 ;;; devan-util.el ends here