]> code.delx.au - gnu-emacs/blob - lisp/language/mlm-util.el
Add a provide statement.
[gnu-emacs] / lisp / language / mlm-util.el
1 ;;; mlm-util.el --- support for composing malayalam characters -*-coding: iso-2022-7bit;-*-
2
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Malayalam
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;; Created: Feb. 11. 2003
26
27 ;;; Commentary:
28
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Malayalam script characters.
31
32 ;;; Code:
33
34 ;; Malayalam Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Halant
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; (N .. Zerowidth Non Joiner)
43 ;; (J .. Zerowidth Joiner. )
44 ;; 1. vowel
45 ;; V(A|visargam)?
46 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
47 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
48
49 (defconst malayalam-consonant
50 "[\e$,1@5\e(B-\e$,1@Y\e(B]")
51
52 (defconst malayalam-composable-pattern
53 (concat
54 "\\([\e$,1@%\e(B-\e$,1@4\e(B][\e$,1@"\e(B]?\\)\\|\e$,1@#\e(B"
55 "\\|\\("
56 "\\(?:\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?"
57 "[\e$,1@5\e(B-\e$,1@Y\e(B]\\(?:\e$,1@m\e(B\\|[\e$,1@^\e(B-\e$,1@c@f@g@h@j@j@k@l\e(B]?[\e$,1@"@m\e(B]?\\)?"
58 "\\)")
59 "Regexp matching a composable sequence of Malayalam characters.")
60
61 ;;;###autoload
62 (defun malayalam-compose-region (from to)
63 (interactive "r")
64 (save-excursion
65 (save-restriction
66 (narrow-to-region from to)
67 (goto-char (point-min))
68 (while (re-search-forward malayalam-composable-pattern nil t)
69 (malayalam-compose-syllable-region (match-beginning 0)
70 (match-end 0))))))
71 (defun malayalam-compose-string (string)
72 (with-temp-buffer
73 (insert (decompose-string string))
74 (malayalam-compose-region (point-min) (point-max))
75 (buffer-string)))
76
77 ;;;###autoload
78 (defun malayalam-post-read-conversion (len)
79 (save-excursion
80 (save-restriction
81 (let ((buffer-modified-p (buffer-modified-p)))
82 (narrow-to-region (point) (+ (point) len))
83 (malayalam-compose-region (point-min) (point-max))
84 (set-buffer-modified-p buffer-modified-p)
85 (- (point-max) (point-min))))))
86
87 (defun malayalam-range (from to)
88 "Make the list of the integers of range FROM to TO."
89 (let (result)
90 (while (<= from to) (setq result (cons to result) to (1- to))) result))
91
92 (defun malayalam-regexp-of-hashtbl-keys (hashtbl)
93 "Return a regular expression that matches all keys in hashtable HASHTBL."
94 (let ((max-specpdl-size 1000))
95 (regexp-opt
96 (sort
97 (let (dummy)
98 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
99 dummy)
100 (function (lambda (x y) (> (length x) (length y))))))))
101
102
103 ;;;###autoload
104 (defun malayalam-composition-function (from to pattern &optional string)
105 "Compose Malayalam characters in REGION, or STRING if specified.
106 Assume that the REGION or STRING must fully match the composable
107 PATTERN regexp."
108 (if string (malayalam-compose-syllable-string string)
109 (malayalam-compose-syllable-region from to))
110 (- to from))
111
112 ;; Register a function to compose Malayalam characters.
113 (mapc
114 (function (lambda (ucs)
115 (aset composition-function-table (decode-char 'ucs ucs)
116 (list (cons malayalam-composable-pattern
117 'malayalam-composition-function)))))
118 (nconc '(#x0d02 #x0d03) (malayalam-range #x0d05 #x0d39)))
119
120 ;; Notes on conversion steps.
121
122 ;; 1. chars to glyphs
123 ;;
124 ;; Simple replacement of characters to glyphs is done.
125
126 ;; 2. glyphs reordering.
127 ;;
128 ;; Two special reordering rule takes place.
129 ;; a. following "\e$,46[\e(B" goes to the front.
130 ;; b. following "\e$,46S6S\e(B", "\e$,46S\e(B" or "\e$,46T\e(B" goes to the front.
131 ;; This reordering occurs only to the last cluster of consonants.
132 ;; Preceding consonants with halant characters are not affected.
133
134 ;; 3. Composition.
135 ;;
136 ;; left modifiers will be attached at the left.
137 ;; others will be attached right.
138
139 (defvar mlm-char-glyph
140 '(;; various signs
141 ("\e$,1@"\e(B" . "\e$,46W\e(B")
142 ("\e$,1@#\e(B" . "\e$,46X\e(B")
143 ;; Independent Vowels
144 ("\e$,1@%\e(B" . "\e$,46!\e(B")
145 ("\e$,1@&\e(B" . "\e$,46"\e(B")
146 ("\e$,1@'\e(B" . "\e$,46#\e(B")
147 ("\e$,1@(\e(B" . "\e$,46#6U\e(B")
148 ("\e$,1@)\e(B" . "\e$,46$\e(B")
149 ("\e$,1@*\e(B" . "\e$,46$6U\e(B")
150 ("\e$,1@+\e(B" . "\e$,46%\e(B")
151 ("\e$,1@,\e(B" . "nil") ;; not in present use, not supported.
152 ("\e$,1@.\e(B" . "\e$,46&\e(B")
153 ("\e$,1@/\e(B" . "\e$,46'\e(B")
154 ("\e$,1@0\e(B" . "\e$,46S6&\e(B")
155 ("\e$,1@2\e(B" . "\e$,46(\e(B")
156 ("\e$,1@3\e(B" . "\e$,46(6M\e(B")
157 ("\e$,1@4\e(B" . "\e$,46(6U\e(B")
158 ;; Consonants
159 ("\e$,1@5\e(B" . "\e$,46)\e(B")
160 ("\e$,1@5@m@5\e(B" . "\e$,47!\e(B")
161 ("\e$,1@5@m@S\e(B" . "\e$,47"\e(B")
162 ("\e$,1@5@m@W\e(B" . "\e$,47#\e(B")
163 ("\e$,1@5@m@?\e(B" . "\e$,47N\e(B")
164 ("\e$,1@5@m@D\e(B" . "\e$,47`\e(B")
165 ("\e$,1@5@a\e(B" . "\e$,47f\e(B")
166 ("\e$,1@5@m@5@a\e(B" . "\e$,47g\e(B")
167 ("\e$,1@5@a\e(B" . "\e$,47f\e(B")
168 ("\e$,1@5@m@5@a\e(B" . "\e$,47g\e(B")
169
170 ("\e$,1@6\e(B" . "\e$,46*\e(B")
171
172 ("\e$,1@7\e(B" . "\e$,46+\e(B")
173 ("\e$,1@7@m@7\e(B" . "\e$,47$\e(B")
174 ("\e$,1@7@m@R\e(B" . "\e$,47%\e(B")
175 ("\e$,1@7@m@N\e(B" . "\e$,47\\e(B")
176 ("\e$,1@7@m@H\e(B" . "\e$,47a\e(B")
177
178 ("\e$,1@8\e(B" . "\e$,46,\e(B")
179
180 ("\e$,1@9\e(B" . "\e$,46-\e(B")
181 ("\e$,1@9@m@5\e(B" . "\e$,47&\e(B")
182 ("\e$,1@9@m@9\e(B" . "\e$,47'\e(B")
183 ("\e$,1@9@m@5@a\e(B" . "\e$,47h\e(B")
184
185 ("\e$,1@:\e(B" . "\e$,46.\e(B")
186 ("\e$,1@:@m@:\e(B" . "\e$,47(\e(B") ;; duplicate
187 ("\e$,1@:@m@;\e(B" . "\e$,47Q\e(B")
188
189 ("\e$,1@;\e(B" . "\e$,46/\e(B")
190
191 ("\e$,1@<\e(B" . "\e$,460\e(B")
192 ("\e$,1@<@m@<\e(B" . "\e$,47V\e(B")
193 ("\e$,1@<@m@>\e(B" . "\e$,47Z\e(B")
194
195 ("\e$,1@=\e(B" . "\e$,461\e(B")
196
197 ("\e$,1@>\e(B" . "\e$,462\e(B")
198 ("\e$,1@>@m@:\e(B" . "\e$,47)\e(B")
199 ("\e$,1@>@m@>\e(B" . "\e$,47*\e(B")
200
201 ("\e$,1@?\e(B" . "\e$,463\e(B")
202 ("\e$,1@?@m@?\e(B" . "\e$,47+\e(B")
203
204 ("\e$,1@@\e(B" . "\e$,464\e(B")
205 ("\e$,1@A\e(B" . "\e$,465\e(B")
206 ("\e$,1@A@m@A\e(B" . "\e$,47M\e(B")
207 ("\e$,1@B\e(B" . "\e$,466\e(B")
208
209 ("\e$,1@C\e(B" . "\e$,467\e(B")
210 ("\e$,1@C@a@m\e(B" . "\e$,47,\e(B") ;; half consonant
211 ("\e$,1@C@m@?\e(B" . "\e$,47-\e(B")
212 ("\e$,1@C@m@C\e(B" . "\e$,47.\e(B")
213 ("\e$,1@C@m@N\e(B" . "\e$,47W\e(B")
214 ("\e$,1@C@m@A\e(B" . "\e$,47^\e(B")
215 ("\e$,1@C@a\e(B" . "\e$,47i\e(B")
216
217 ("\e$,1@D\e(B" . "\e$,468\e(B")
218 ("\e$,1@D@m@D\e(B" . "\e$,47/\e(B")
219 ("\e$,1@D@m@E\e(B" . "\e$,470\e(B")
220 ("\e$,1@D@m@X\e(B" . "\e$,47U\e(B")
221 ("\e$,1@D@m@M\e(B" . "\e$,47[\e(B")
222 ("\e$,1@D@m@N\e(B" . "\e$,47_\e(B")
223
224 ("\e$,1@E\e(B" . "\e$,469\e(B")
225
226 ("\e$,1@F\e(B" . "\e$,46:\e(B")
227 ("\e$,1@F@m@F\e(B" . "\e$,471\e(B")
228 ("\e$,1@F@m@G\e(B" . "\e$,472\e(B")
229
230 ("\e$,1@G\e(B" . "\e$,46;\e(B")
231
232 ("\e$,1@H\e(B" . "\e$,46<\e(B")
233 ("\e$,1@H@a@m\e(B" . "\e$,473\e(B") ;; half consonant
234 ("\e$,1@H@m@D\e(B" . "\e$,474\e(B")
235 ("\e$,1@H@m@F\e(B" . "\e$,475\e(B")
236 ("\e$,1@H@m@H\e(B" . "\e$,476\e(B")
237 ("\e$,1@H@m@N\e(B" . "\e$,477\e(B")
238 ("\e$,1@H@m@G\e(B" . "\e$,47T\e(B")
239 ("\e$,1@H@m@E\e(B" . "\e$,47Y\e(B")
240 ("\e$,1@H@m@Q\e(B" . "\e$,47b\e(B")
241 ("\e$,1@H@a\e(B" . "\e$,47k\e(B")
242 ("\e$,1@H@m@H@a\e(B" . "\e$,47l\e(B")
243
244 ("\e$,1@J\e(B" . "\e$,46=\e(B")
245 ("\e$,1@J@m@J\e(B" . "\e$,478\e(B") ;; duplicate
246 ("\e$,1@J@m@R\e(B" . "\e$,479\e(B") ;; lakar
247
248 ("\e$,1@K\e(B" . "\e$,46>\e(B")
249
250 ("\e$,1@L\e(B" . "\e$,46?\e(B")
251 ("\e$,1@L@m@L\e(B" . "\e$,47:\e(B") ;; duplicate
252 ("\e$,1@L@m@R\e(B" . "\e$,47;\e(B") ;; lakar
253 ("\e$,1@L@m@G\e(B" . "\e$,47O\e(B")
254 ("\e$,1@L@m@F\e(B" . "\e$,47P\e(B")
255
256 ("\e$,1@M\e(B" . "\e$,46@\e(B")
257
258 ("\e$,1@N\e(B" . "\e$,46A\e(B")
259 ("\e$,1@N@m@J\e(B" . "\e$,47<\e(B")
260 ("\e$,1@N@m@N\e(B" . "\e$,47=\e(B")
261 ("\e$,1@N@m@R\e(B" . "\e$,47>\e(B") ;; lakar
262
263 ("\e$,1@O\e(B" . "\e$,46B\e(B")
264 ("\e$,1@O@m@O\e(B" . "\e$,47?\e(B") ;; duplicate
265 ("\e$,1@O@m@5@m@5\e(B" . "\e$,47m\e(B")
266
267 ("\e$,1@P\e(B" . "\e$,46C\e(B")
268 ("\e$,1@P@a@m\e(B" . "\e$,47@\e(B")
269 ("\e$,1@P@a\e(B" . "\e$,47j\e(B")
270
271 ("\e$,1@Q\e(B" . "\e$,46D\e(B")
272 ("\e$,1@Q@m\e(B" . "\e$,47@\e(B") ;; same glyph as "\e$,1@P@m\e(B"
273 ("\e$,1@Q@a@m\e(B" . "\e$,47@\e(B") ;; same glyph as "\e$,1@P@m\e(B"
274 ;;("\e$,1@Q@m@Q\e(B" . "\e$,47A\e(B")
275 ("\e$,1@Q@m@Q\e(B" . "\e$,47d\e(B")
276
277 ("\e$,1@R\e(B" . "\e$,46E\e(B")
278 ("\e$,1@R@a@m\e(B" . "\e$,47B\e(B")
279 ("\e$,1@R@m@R\e(B" . "\e$,47C\e(B") ;; lakar
280 ("\e$,1@R@m@J\e(B" . "\e$,47e\e(B")
281
282 ("\e$,1@S\e(B" . "\e$,46F\e(B")
283 ("\e$,1@S@a@m\e(B" . "\e$,47D\e(B")
284 ("\e$,1@S@m@S\e(B" . "\e$,47E\e(B")
285
286 ("\e$,1@T\e(B" . "\e$,46G\e(B")
287
288 ("\e$,1@U\e(B" . "\e$,46H\e(B")
289 ("\e$,1@U@m@U\e(B" . "\e$,47F\e(B")
290
291 ("\e$,1@V\e(B" . "\e$,46I\e(B")
292 ("\e$,1@V@m@R\e(B" . "\e$,47G\e(B")
293 ("\e$,1@V@m@V\e(B" . "\e$,47H\e(B")
294 ("\e$,1@V@m@:\e(B" . "\e$,47]\e(B")
295
296 ("\e$,1@W\e(B" . "\e$,46J\e(B")
297 ("\e$,1@W@m@?\e(B" . "\e$,47c\e(B")
298
299 ("\e$,1@X\e(B" . "\e$,46K\e(B")
300 ("\e$,1@X@m@R\e(B" . "\e$,47I\e(B")
301 ("\e$,1@X@m@X\e(B" . "\e$,47J\e(B")
302 ("\e$,1@X@m@Q@m@Q\e(B" . "\e$,47L\e(B")
303 ("\e$,1@X@m@E\e(B" . "\e$,47X\e(B")
304
305 ("\e$,1@Y\e(B" . "\e$,46L\e(B")
306 ("\e$,1@Y@m@R\e(B" . "\e$,47K\e(B")
307 ("\e$,1@Y@m@N\e(B" . "\e$,47R\e(B")
308 ("\e$,1@Y@m@H\e(B" . "\e$,47S\e(B")
309
310 ;; Dependent vowel signs
311 ("\e$,1@^\e(B" . "\e$,46M\e(B")
312 ("\e$,1@_\e(B" . "\e$,46N\e(B")
313 ("\e$,1@`\e(B" . "\e$,46O\e(B")
314 ("\e$,1@a\e(B" . "\e$,46P\e(B")
315 ("\e$,1@b\e(B" . "\e$,46Q\e(B")
316 ("\e$,1@c\e(B" . "\e$,46R\e(B")
317 ("\e$,1@f\e(B" . "\e$,46S\e(B")
318 ("\e$,1@g\e(B" . "\e$,46T\e(B")
319 ("\e$,1@h\e(B" . "\e$,46S6S\e(B")
320 ("\e$,1@j\e(B" . "\e$,46S6M\e(B")
321 ("\e$,1@k\e(B" . "\e$,46T6M\e(B")
322 ("\e$,1@l\e(B" . "\e$,46U\e(B")
323 ;; Various signs
324 ("\e$,1@m\e(B" . "\e$,46V\e(B")
325 ("\e$,1@m@O\e(B" . "\e$,46Y\e(B") ;; yakar
326 ("\e$,1@m@O@a\e(B" . "\e$,46\\e(B") ;; yakar + u
327 ("\e$,1@m@O@b\e(B" . "\e$,46]\e(B") ;; yakar + uu
328 ("\e$,1@m@U\e(B" . "\e$,46Z\e(B") ;; vakar modifier
329 ("\e$,1@m@P\e(B" . "\e$,46[\e(B") ;; rakar modifier is the same to rra modifier.
330 ("\e$,1@m@P@m\e(B" . "\e$,46R\e(B") ;; halant + rakar + halant
331 ("\e$,1@m@Q\e(B" . "\e$,46[\e(B") ;; rrakar modifier
332 ("\e$,1@m@Q@m\e(B" . "\e$,46R\e(B") ;; halant + rrakar + halant
333 ("\e$,1@m@m\e(B" . "\e$,46V\e(B") ;; double omission sign to stop forming half consonant.
334 ("\e$,1@w\e(B" . "\e$,46U\e(B") ;; not in present use, already at 0D4C.
335 ))
336
337 (defvar mlm-char-glyph-hash
338 (let* ((hash (make-hash-table :test 'equal)))
339 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
340 mlm-char-glyph)
341 hash))
342
343 (defvar mlm-char-glyph-regexp
344 (malayalam-regexp-of-hashtbl-keys mlm-char-glyph-hash))
345
346 ;; Malayalam languages needed to be reordered in a complex mannar.
347
348 (defvar mlm-consonants
349 (concat
350 "\e$,46)6*6+6,6-6.6/606162636465666768696:6;6<6=6>6?6@6A6B6C6D6E6F6G6H6I6J6K6L\e(B"
351 "\e$,47!7"7#7$7%7&7'7(7)7*7+7,7-7.7/707172737475767778797:7;7<7=7>7?7@7A7B7C7D7E7F7G7H7I7J7K7L7M7N7O7P7Q7R7S7T7U7V7W7X7Y7Z7[7\7]7^7_7`7a7b7c7d7e\e(B"
352 ))
353
354 (defvar mlm-consonants-regexp
355 (concat "\\(\e$,46[\e(B?[" mlm-consonants "][\e$,46Y6Z\e(B]?\\)"))
356
357 (defvar mlm-glyph-reorder-key-glyphs "[\e$,46[6S6T\e(B]")
358
359 (defvar mlm-glyph-reordering-regexp-list
360 `((,(concat "\\([" mlm-consonants "][\e$,46Y6Z\e(B]?\\)\e$,46[\e(B") . "\e$,46[\e(B\\1")
361 (,(concat mlm-consonants-regexp "\e$,46S6S\e(B") . "\e$,46S6S\e(B\\1")
362 (,(concat mlm-consonants-regexp "\e$,46S\e(B") . "\e$,46S\e(B\\1")
363 (,(concat mlm-consonants-regexp "\e$,46T\e(B") . "\e$,46T\e(B\\1")))
364
365 (defun malayalam-compose-syllable-string (string)
366 (with-temp-buffer
367 (insert (decompose-string string))
368 (malayalam-compose-syllable-region (point-min) (point-max))
369 (buffer-string)))
370
371 (defun malayalam-compose-syllable-region (from to)
372 "Compose malayalam syllable in region FROM to TO."
373 (let (glyph-str
374 match-str
375 glyph-reorder-regexps
376 glyph-reorder-replace
377 glyph-reorder-regexp)
378 (save-excursion
379 (save-restriction
380 (narrow-to-region from to)
381 (goto-char (point-min))
382 ;; char-glyph-conversion
383 (while (re-search-forward mlm-char-glyph-regexp nil t)
384 (setq match-str (match-string 0))
385 (setq glyph-str
386 (concat glyph-str (gethash match-str mlm-char-glyph-hash))))
387 (when (string-match mlm-glyph-reorder-key-glyphs glyph-str)
388 ;; glyph reordering
389 (setq glyph-reorder-regexps mlm-glyph-reordering-regexp-list)
390 (while glyph-reorder-regexps
391 (setq glyph-reorder-regexp (caar glyph-reorder-regexps))
392 (setq glyph-reorder-replace (cdar glyph-reorder-regexps))
393 (setq glyph-reorder-regexps (cdr glyph-reorder-regexps))
394 (if (string-match glyph-reorder-regexp glyph-str)
395 (setq glyph-str
396 (replace-match glyph-reorder-replace nil nil
397 glyph-str)))))
398 ;; concatenate and attach reference-points.
399 (setq glyph-str
400 (cdr
401 (apply
402 'nconc
403 (mapcar
404 (function
405 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
406 glyph-str))))
407 (compose-region from to glyph-str)))))
408
409 (provide 'mlm-util)
410
411 ;;; arch-tag: 7f25ee67-8f9d-49f2-837b-35c412c00eba
412 ;;; devan-util.el ends here