1 ;;; ethio-util.el --- utilities for Ethiopic -*- coding: utf-8; -*-
3 ;; Copyright (C) 1997, 1998, 2002, 2003, 2004, 2005, 2006, 2007
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
8 ;; Copyright (C) 2005, 2006
9 ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 ;; Registration Number: H15PRO110
12 ;; Keywords: mule, multilingual, Ethiopic
14 ;; This file is part of GNU Emacs.
16 ;; GNU Emacs is free software; you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation; either version 3, or (at your option)
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs; see the file COPYING. If not, write to the
28 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
29 ;; Boston, MA 02110-1301, USA.
31 ;; Author: TAKAHASHI Naoto <ntakahas@m17n.org>
39 (defvar rmail-current-message)
40 (defvar rmail-message-vector)
42 ;; Information for exiting Ethiopic environment.
43 (defvar exit-ethiopic-environment-data nil)
46 (defun setup-ethiopic-environment-internal ()
47 (let ((key-bindings '((" " . ethio-insert-space)
48 ([?\S- ] . ethio-insert-ethio-space)
49 ;; ([?\C-'] . ethio-gemination)
50 ([f3] . ethio-fidel-to-sera-buffer)
51 ([S-f3] . ethio-fidel-to-sera-region)
52 ([C-f3] . ethio-fidel-to-sera-marker)
53 ([f4] . ethio-sera-to-fidel-buffer)
54 ([S-f4] . ethio-sera-to-fidel-region)
55 ([C-f4] . ethio-sera-to-fidel-marker)
56 ([S-f5] . ethio-toggle-punctuation)
57 ([S-f6] . ethio-modify-vowel)
58 ([S-f7] . ethio-replace-space)
59 ;; ([S-f8] . ethio-input-special-character) ; deprecated
60 ([C-f9] . ethio-toggle-space)
61 ([S-f9] . ethio-replace-space) ; as requested
65 (setq kb (car (car key-bindings)))
66 (setq exit-ethiopic-environment-data
67 (cons (cons kb (global-key-binding kb))
68 exit-ethiopic-environment-data))
69 (global-set-key kb (cdr (car key-bindings)))
70 (setq key-bindings (cdr key-bindings))))
72 (add-hook 'find-file-hook 'ethio-find-file)
73 (add-hook 'write-file-functions 'ethio-write-file)
74 (add-hook 'after-save-hook 'ethio-find-file))
76 (defun exit-ethiopic-environment ()
77 "Exit Ethiopic language environment."
78 (while exit-ethiopic-environment-data
79 (global-set-key (car (car exit-ethiopic-environment-data))
80 (cdr (car exit-ethiopic-environment-data)))
81 (setq exit-ethiopic-environment-data
82 (cdr exit-ethiopic-environment-data)))
84 (remove-hook 'find-file-hook 'ethio-find-file)
85 (remove-hook 'write-file-functions 'ethio-write-file)
86 (remove-hook 'after-save-hook 'ethio-find-file))
89 ;; ETHIOPIC UTILITY FUNCTIONS
92 ;; If the filename ends in ".sera", editing is done in fidel
93 ;; but file I/O is done in SERA.
95 ;; If the filename ends in ".java", editing is done in fidel
96 ;; but file I/O is done in the \uXXXX style, where XXXX is
97 ;; the Unicode codepoint for the Ethiopic character.
99 ;; If the filename ends in ".tex", editing is done in fidel
100 ;; but file I/O is done in EthioTeX format.
106 (defvar ethio-primary-language 'tigrigna
107 "*Symbol that defines the primary language in SERA --> FIDEL conversion.
108 The value should be one of: `tigrigna', `amharic' or `english'.")
110 (defvar ethio-secondary-language 'english
111 "*Symbol that defines the secondary language in SERA --> FIDEL conversion.
112 The value should be one of: `tigrigna', `amharic' or `english'.")
114 (defvar ethio-use-colon-for-colon nil
115 "*Non-nil means associate ASCII colon with Ethiopic colon.
116 If nil, associate ASCII colon with Ethiopic word separator, i.e., two
117 vertically stacked dots. All SERA <--> FIDEL converters refer this
120 (defvar ethio-use-three-dot-question nil
121 "*Non-nil means associate ASCII question mark with Ethiopic old style question mark (three vertically stacked dots).
122 If nil, associate ASCII question mark with Ethiopic stylized question
123 mark. All SERA <--> FIDEL converters refer this variable.")
125 (defvar ethio-quote-vowel-always nil
126 "*Non-nil means always put an apostrophe before an isolated vowel (except at word initial) in FIDEL --> SERA conversion.
127 If nil, put an apostrophe only between a 6th-form consonant and an
130 (defvar ethio-W-sixth-always nil
131 "*Non-nil means convert the Wu-form of a 12-form consonant to \"W'\" instead of \"Wu\" in FIDEL --> SERA conversion.")
133 (defvar ethio-numeric-reduction 0
134 "*Degree of reduction in converting Ethiopic digits into Arabic digits.
136 For example, ({10}{9}{100}{80}{7}) is converted into:
137 `10`9`100`80`7 if `ethio-numeric-reduction' is 0,
138 `109100807 if `ethio-numeric-reduction' is 1,
139 `10900807 if `ethio-numeric-reduction' is 2.")
141 (defvar ethio-java-save-lowercase nil
142 "*Non-nil means save Ethiopic characters in lowercase hex numbers to Java files.
143 If nil, use uppercases.")
145 (defun ethio-prefer-amharic-p ()
146 (or (eq ethio-primary-language 'amharic)
147 (and (not (eq ethio-primary-language 'tigrigna))
148 (eq ethio-secondary-language 'amharic))))
150 (defun ethio-prefer-amharic (arg)
153 (robin-modify-package "ethiopic-sera" "'a" ?አ)
154 (robin-modify-package "ethiopic-sera" "a" "አ")
155 (robin-modify-package "ethiopic-sera" "'A" ?ኣ)
156 (robin-modify-package "ethiopic-sera" "A" "ኣ"))
157 (robin-modify-package "ethiopic-sera" "'A" ?አ)
158 (robin-modify-package "ethiopic-sera" "A" "አ")
159 (robin-modify-package "ethiopic-sera" "'a" ?ኣ)
160 (robin-modify-package "ethiopic-sera" "a" "ኣ")))
162 (defun ethio-use-colon-for-colon (arg)
165 (robin-modify-package "ethiopic-sera" ":" ?፥)
166 (robin-modify-package "ethiopic-sera" "`:" ?፡))
167 (robin-modify-package "ethiopic-sera" " : " ?፡)
168 (robin-modify-package "ethiopic-sera" ":" "፡")
169 (robin-modify-package "ethiopic-sera" "-:" ?፥)))
171 (defun ethio-use-three-dot-question (arg)
174 (robin-modify-package "ethiopic-sera" "?" ?፧)
175 (robin-modify-package "ethiopic-sera" "`?" ??))
176 (robin-modify-package "ethiopic-sera" "?" ??)
177 (robin-modify-package "ethiopic-sera" "`?" ?፧)))
179 (defun ethio-adjust-robin ()
180 (ethio-prefer-amharic (ethio-prefer-amharic-p))
181 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
182 (ethio-use-three-dot-question ethio-use-three-dot-question))
184 (add-hook 'robin-activate-hook 'ethio-adjust-robin)
191 (defun ethio-sera-to-fidel-buffer (&optional secondary force)
192 "Convert the current buffer from SERA to FIDEL.
194 The variable `ethio-primary-language' specifies the primary
195 language and `ethio-secondary-language' specifies the secondary.
197 If the 1st optional argument SECONDARY is non-nil, assume the
198 buffer begins with the secondary language; otherwise with the
201 If the 2nd optional argument FORCE is non-nil, perform conversion
202 even if the buffer is read-only.
204 See also the descriptions of the variables
205 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
208 (ethio-sera-to-fidel-region (point-min) (point-max) secondary force))
210 ;; To avoid byte-compiler warnings. It should never be set globally.
211 (defvar ethio-sera-being-called-by-w3)
212 ;; This variable will be bound by some third-party package.
213 (defvar sera-being-called-by-w3)
216 (defun ethio-sera-to-fidel-region (begin end &optional secondary force)
217 "Convert the characters in region from SERA to FIDEL.
219 The variable `ethio-primary-language' specifies the primary
220 language and `ethio-secondary-language' specifies the secondary.
222 If the 3rd argument SECONDARY is given and non-nil, assume the
223 region begins with the secondary language; otherwise with the
226 If the 4th argument FORCE is given and non-nil, perform
227 conversion even if the buffer is read-only.
229 See also the descriptions of the variables
230 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
233 (if (and buffer-read-only
235 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
238 (let ((ethio-primary-language ethio-primary-language)
239 (ethio-secondary-language ethio-secondary-language)
240 ;; The above two variables may be changed temporarily by tilde
241 ;; escapes during conversion. We bind them to the variables
242 ;; of the same names so that the original values are restored
243 ;; when this function exits.
244 (buffer-read-only nil)
245 (lang (if secondary ethio-secondary-language ethio-primary-language))
248 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
249 (ethio-use-three-dot-question ethio-use-three-dot-question)
252 (narrow-to-region begin end)
253 (goto-char (point-min))
258 (ethio-prefer-amharic t)
259 (ethio-sera-to-fidel-region-ethio 'amharic))
261 (ethio-prefer-amharic nil)
262 (ethio-sera-to-fidel-region-ethio 'tigrigna))
264 (ethio-sera-to-fidel-region-noethio))))
267 (if (eq lang ethio-primary-language)
268 ethio-secondary-language
269 ethio-primary-language)
272 ;; Restore user's preference.
273 (ethio-adjust-robin))
275 (defun ethio-sera-to-fidel-region-noethio ()
276 "Return next language as symbol: amharic, tigrigna, toggle or nil."
280 ;; No more "\", i.e. nothing to do.
281 ((not (search-forward "\\" nil 0))
284 ;; Hereafter point is put after a "\".
285 ;; First delete that "\", then check the following chars.
288 ((progn (delete-char -1) (setq lflag (ethio-process-language-flag)))
291 ;; "\\" : leave the second "\" and continue in the same language.
292 ((= (following-char) ?\\)
296 ;; "\ " : delete the following " " and toggle the language.
297 ((= (following-char) 32)
301 ;; A "\" but not a special sequence: simply toggle the language.
305 (defun ethio-sera-to-fidel-region-ethio (lang)
306 "Return next language as symbol: amharic, tigrigna, toggle or nil."
310 (if (re-search-forward "\\(`[1-9][0-9]*\\)\\|[\\<&]" nil t)
313 (robin-convert-region (point-min) (point-max) "ethiopic-sera")
314 (goto-char (point-max)))
318 ((= (following-char) ?`)
320 (ethio-process-digits)
324 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
325 ethio-sera-being-called-by-w3)
326 (and (boundp 'sera-being-called-by-w3)
327 sera-being-called-by-w3))
328 (search-forward (if (= (following-char) ?<) ">" ";") nil 0)
335 ;; Now we must be looking at a "\".
336 ;; First delete that "\", then check the following chars.
338 ((progn (delete-char 1) (= (following-char) 32))
342 ((looking-at "[,.;:'`?\\]+")
343 (goto-char (match-end 0))
346 ((/= (following-char) ?~)
349 ;; Now we must be looking at a "~".
351 ((setq lflag (ethio-process-language-flag))
354 ;; Delete the following "~" and check the following chars.
356 ((progn (delete-char 1) (looking-at "! ?"))
358 (if (re-search-forward "\\\\~! ?" nil 0)
364 (ethio-use-colon-for-colon t)
369 (ethio-use-colon-for-colon nil)
374 (ethio-use-three-dot-question t)
377 ((looking-at "\\? ?")
379 (ethio-use-three-dot-question nil)
382 ;; Unknown tilde escape. Recover the deleted chars.
387 (defun ethio-process-language-flag nil
388 "Process a language flag of the form \"~lang\" or \"~lang1~lang2\".
390 If looking at \"~lang1~lang2\", set `ethio-primary-language' and
391 `ethio-secondary-language' based on \"lang1\" and \"lang2\".
392 Then delete the language flag \"~lang1~lang2\" from the buffer.
393 Return value is the new primary language.
395 If looking at \"~lang\", delete that language flag \"~lang\" from
396 the buffer and return that language. In this case
397 `ethio-primary-language' and `ethio-secondary-language' are left
400 If an unsupported language flag is found, just return nil without
408 "~\\([a-z][a-z][a-z]?\\)~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
409 (setq lang1 (ethio-flag-to-language (match-string 1)))
410 (setq lang2 (ethio-flag-to-language (match-string 2))))
411 (setq ethio-primary-language lang1
412 ethio-secondary-language lang2)
413 (delete-region (point) (match-end 2))
414 (if (= (following-char) 32)
416 ethio-primary-language)
419 ((and (looking-at "~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
420 (setq lang1 (ethio-flag-to-language (match-string 1))))
421 (delete-region (point) (match-end 1))
422 (if (= (following-char) 32)
430 (defun ethio-flag-to-language (flag)
432 ((or (string= flag "en") (string= flag "eng")) 'english)
433 ((or (string= flag "ti") (string= flag "tir")) 'tigrigna)
434 ((or (string= flag "am") (string= flag "amh")) 'amharic)
437 (defun ethio-process-digits nil
438 "Convert Arabic digits to Ethiopic digits."
440 (while (and (>= (setq ch (following-char)) ?1)
444 ;; count up following zeros
446 (while (= (following-char) ?0)
452 ;; first digit is 10, 20, ..., or 90
454 (insert (aref [?፲ ?፳ ?፴ ?፵ ?፶ ?፷ ?፸ ?፹ ?፺] (- ch ?1)))
457 ;; first digit is 2, 3, ..., or 9
459 (insert (aref [?፪ ?፫ ?፬ ?፭ ?፮ ?፯ ?፰ ?፱] (- ch ?2))))
470 (insert-char ?፼ (/ z 4)))))
473 (defun ethio-sera-to-fidel-marker (&optional force)
474 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from SERA to FIDEL.
475 Assume that each region begins with `ethio-primary-language'.
476 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
478 (if (and buffer-read-only
480 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
483 (goto-char (point-min))
484 (while (search-forward "<sera>" nil t)
485 (ethio-sera-to-fidel-region
487 (if (search-forward "</sera>" nil t)
497 (defun ethio-language-to-flag (lang)
499 ((eq lang 'english) "eng")
500 ((eq lang 'tigrigna) "tir")
501 ((eq lang 'amharic) "amh")
505 (defun ethio-fidel-to-sera-buffer (&optional secondary force)
506 "Replace all the FIDEL characters in the current buffer to the SERA format.
507 The variable `ethio-primary-language' specifies the primary
508 language and `ethio-secondary-language' specifies the secondary.
510 If the 1st optional argument SECONDARY is non-nil, try to convert the
511 region so that it begins with the secondary language; otherwise with the
514 If the 2nd optional argument FORCE is non-nil, convert even if the
517 See also the descriptions of the variables
518 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
519 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
522 (ethio-fidel-to-sera-region (point-min) (point-max) secondary force))
525 (defun ethio-fidel-to-sera-region (begin end &optional secondary force)
526 "Replace all the FIDEL characters in the region to the SERA format.
528 The variable `ethio-primary-language' specifies the primary
529 language and `ethio-secondary-language' specifies the secondary.
531 If the 3rd argument SECONDARY is given and non-nil, convert
532 the region so that it begins with the secondary language; otherwise with
533 the primary language.
535 If the 4th argument FORCE is given and non-nil, convert even if the
538 See also the descriptions of the variables
539 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
540 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
543 (if (and buffer-read-only
545 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
549 (narrow-to-region begin end)
551 (let ((buffer-read-only nil)
553 ethio-secondary-language
554 ethio-primary-language))
555 (flag (if (ethio-prefer-amharic-p) "\\~amh " "\\~tir "))
558 (goto-char (point-min))
560 (unless (eq mode 'english)
561 (setq mode 'ethiopic))
562 (if (and (eq mode 'english) (looking-at "\\ce"))
563 (setq mode 'ethiopic))
564 (if (and (eq mode 'ethiopic) (looking-at "\\Ce"))
565 (setq mode 'english))
566 (insert (if (eq mode 'english) "\\~eng " flag))
570 (if (eq mode 'english)
572 (if (re-search-forward "\\(\\ce\\|\\\\\\)" nil 0)
575 ((eq (following-char) ?\\)
580 (setq mode 'ethiopic))))
582 ;; If we reach here, mode is ethiopic.
584 (if (re-search-forward "[a-z,.;:'`?\\<&]" nil 0)
587 (narrow-to-region p (point))
588 (robin-invert-region (point-min) (point-max) "ethiopic-sera")
590 ;; ethio-quote-vowel-alwyas
591 (goto-char (point-min))
592 (while (re-search-forward "'[eauio]" nil t)
595 (setq ch (preceding-char))
596 (if (or (and (>= ch ?a) (<= ch ?z))
597 (and (>= ch ?A) (<= ch ?Z)))
598 (if (and (not ethio-quote-vowel-always)
599 (memq ch '(?e ?a ?u ?i ?o ?E ?A ?I)))
603 ;; ethio-W-sixth-always
604 (unless ethio-W-sixth-always
605 (goto-char (point-min))
606 (while (search-forward "W'" nil t)
610 ;; ethio-numeric-reduction
611 (when (> ethio-numeric-reduction 0)
612 (goto-char (point-min))
613 (while (re-search-forward "\\([0-9]\\)`\\([0-9]\\)" nil t)
614 (replace-match "\\1\\2")
616 (when (= ethio-numeric-reduction 2)
617 (goto-char (point-min))
618 (while (re-search-forward "\\([0-9]\\)1\\(0+\\)" nil t)
619 (replace-match "\\1\\2")))
621 (goto-char (point-max)))
624 ((looking-at "[a-z]")
626 (setq mode 'english))
627 ((looking-at "[,.;:'`\\]+")
629 (goto-char (1+ (match-end 0))))
630 ((= (following-char) ??)
631 (if ethio-use-three-dot-question
635 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
636 ethio-sera-being-called-by-w3)
637 (and (boundp 'sera-being-called-by-w3)
638 sera-being-called-by-w3))
639 (search-forward (if (= (following-char) ?<) ">" "&") nil 0)
640 (forward-char 1)))))))))
643 (defun ethio-fidel-to-sera-marker (&optional force)
644 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from FIDEL to SERA.
645 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
648 (if (and buffer-read-only
650 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
653 (goto-char (point-min))
654 (while (search-forward "<sera>" nil t)
655 (ethio-fidel-to-sera-region
657 (if (search-forward "</sera>" nil t)
664 ;; vowel modification
668 (defun ethio-modify-vowel nil
669 "Modify the vowel of the FIDEL that is under the cursor."
672 (let ((consonant (ethio-get-consonant (following-char)))
675 (error "") ; not an Ethiopic char
676 (setq vowel (read-char "Modify vowel to: "))
678 (if (and (string= consonant "'") (= vowel ?W))
681 (narrow-to-region (point) (point))
682 (insert consonant vowel)
683 (robin-convert-region (point-min) (point-max) "ethiopic-sera"))))))
685 (defun ethio-get-consonant (ch)
686 "Return the consonant part of CH's SERA spelling in ethiopic-sera."
687 (let ((sera (get-char-code-property ch 'ethiopic-sera)))
690 ((= ch ?ኧ) "'") ; Only this has two vowel letters.
693 (if (memq (preceding-char) '(?e ?u ?i ?a ?o ?E ?I ?A ?'))
695 (buffer-substring (point-min) (point-max)))))))
702 (defun ethio-replace-space (ch begin end)
703 "Replace ASCII spaces with Ethiopic word separators in the region.
705 In the specified region, replace word separators surrounded by two
706 Ethiopic characters, depending on the first argument CH, which should
709 If CH = 1, word separator will be replaced with an ASCII space.
710 If CH = 2, with two ASCII spaces.
711 If CH = 3, with the Ethiopic colon-like word separator.
713 The 2nd and 3rd arguments BEGIN and END specify the region."
715 (interactive "*cReplace spaces to: 1 (sg col), 2 (dbl col), 3 (Ethiopic)\nr")
716 (if (not (memq ch '(?1 ?2 ?3)))
720 (narrow-to-region begin end)
724 ;; an Ethiopic word separator --> an ASCII space
725 (goto-char (point-min))
726 (while (search-forward "፡" nil t)
729 ;; two ASCII spaces between Ethiopic characters --> an ASCII space
730 (goto-char (point-min))
731 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
732 (replace-match "\\1 \\2")
736 ;; An Ethiopic word separator --> two ASCII spaces
737 (goto-char (point-min))
738 (while (search-forward "፡" nil t)
741 ;; An ASCII space between Ethiopic characters --> two ASCII spaces
742 (goto-char (point-min))
743 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
744 (replace-match "\\1 \\2")
748 ;; One or two ASCII spaces between Ethiopic characters
749 ;; --> An Ethiopic word separator
750 (goto-char (point-min))
751 (while (re-search-forward "\\(\\ce\\) ?\\(\\ce\\)" nil t)
752 (replace-match "\\1፡\\2")
755 ;; Three or more ASCII spaces between Ethiopic characters
756 ;; --> An Ethiopic word separator + (N - 2) ASCII spaces
757 (goto-char (point-min))
758 (while (re-search-forward "\\(\\ce\\) \\( +\\ce\\)" nil t)
759 (replace-match "\\1፡\\2")
760 (forward-char -1)))))))
766 ;; This function is deprecated.
768 (defun ethio-input-special-character (arg)
769 "This function is deprecated."
770 (interactive "*cInput number: 1. 2. 3. 4. 5.")
790 (defun ethio-fidel-to-tex-buffer nil
791 "Convert each fidel characters in the current buffer into a fidel-tex command."
793 (let ((buffer-read-only nil)
796 ;; Special treatment for geminated characters.
797 ;; Geminated characters la", etc. change into \geminateG{\laG}, etc.
798 (goto-char (point-min))
799 (while (re-search-forward "፟\\|" nil t)
800 (setq comp (find-composition (match-beginning 0)))
802 (replace-match "\\\\geminateG{}" t)
803 (decompose-region (car comp) (cadr comp))
806 (insert "\\geminateG{")
810 ;; Special Ethiopic punctuations.
811 (goto-char (point-min))
812 (while (re-search-forward "\\ce[»\\.\\?]\\|«\\ce" nil t)
814 ((= (setq ch (preceding-char)) ?\»)
816 (insert "\\rquoteG"))
829 ;; Ethiopic characters to TeX macros
830 (robin-invert-region (point-min) (point-max) "ethiopic-tex")
832 (goto-char (point-min))
833 (set-buffer-modified-p nil)))
836 (defun ethio-tex-to-fidel-buffer nil
837 "Convert fidel-tex commands in the current buffer into fidel chars."
839 (let ((buffer-read-only nil)
842 ;; TeX macros to Ethiopic characters
843 (robin-convert-region (point-min) (point-max) "ethiopic-tex")
845 ;; compose geminated characters
846 (goto-char (point-min))
847 (while (re-search-forward "\\\\geminateG{\\(\\ce?\\)}" nil t)
848 (replace-match "\\1፟"))
850 ;; remove redundant braces, if any
851 (goto-char (point-min))
852 (while (re-search-forward "{\\(\\ce\\)}" nil t)
853 (replace-match "\\1"))
855 (goto-char (point-min))
856 (set-buffer-modified-p nil)))
863 (defun ethio-fidel-to-java-buffer nil
864 "Convert Ethiopic characters into the Java escape sequences.
866 Each escape sequence is of the form \\uXXXX, where XXXX is the
867 character's codepoint (in hex) in Unicode.
869 If `ethio-java-save-lowercase' is non-nil, use [0-9a-f].
870 Otherwise, [0-9A-F]."
873 (goto-char (point-min))
874 (while (re-search-forward "[ሀ-፼]" nil t)
875 (setq ucode (preceding-char))
876 (delete-backward-char 1)
878 (format (if ethio-java-save-lowercase "\\u%4x" "\\u%4X")
882 (defun ethio-java-to-fidel-buffer nil
883 "Convert the Java escape sequences into corresponding Ethiopic characters."
884 (let ((case-fold-search t)
886 (goto-char (point-min))
887 (while (re-search-forward "\\\\u\\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\\)" nil t)
888 (setq ucode (read (concat "#x" (match-string 1))))
889 (when (and (>= ucode #x1200) (<= ucode #x137f))
890 (replace-match (char-to-string ucode))))))
897 (defun ethio-find-file nil
898 "Transliterate file content into Ethiopic dependig on filename suffix."
901 ((string-match "\\.sera$" (buffer-file-name))
903 (ethio-sera-to-fidel-buffer nil 'force)
904 (set-buffer-modified-p nil)))
906 ((string-match "\\.html$" (buffer-file-name))
907 (let ((ethio-sera-being-called-by-w3 t))
909 (ethio-sera-to-fidel-marker 'force)
910 (goto-char (point-min))
911 (while (re-search-forward "&[lr]aquo;" nil t)
912 (if (= (char-after (1+ (match-beginning 0))) ?l)
914 (replace-match "»")))
915 (set-buffer-modified-p nil))))
917 ((string-match "\\.tex$" (buffer-file-name))
919 (ethio-tex-to-fidel-buffer)
920 (set-buffer-modified-p nil)))
922 ((string-match "\\.java$" (buffer-file-name))
924 (ethio-java-to-fidel-buffer)
925 (set-buffer-modified-p nil)))
931 (defun ethio-write-file nil
932 "Transliterate Ethiopic characters in ASCII depending on the file extension."
935 ((string-match "\\.sera$" (buffer-file-name))
937 (ethio-fidel-to-sera-buffer nil 'force)
938 (goto-char (point-min))
939 (ethio-record-user-preference)
940 (set-buffer-modified-p nil)))
942 ((string-match "\\.html$" (buffer-file-name))
944 (let ((ethio-sera-being-called-by-w3 t))
945 (ethio-fidel-to-sera-marker 'force)
946 (goto-char (point-min))
947 (while (re-search-forward "[«»]" nil t)
948 (replace-match (if (= (preceding-char) ?«) "«" "»")))
949 (goto-char (point-min))
950 (if (search-forward "<sera>" nil t)
951 (ethio-record-user-preference))
952 (set-buffer-modified-p nil))))
954 ((string-match "\\.tex$" (buffer-file-name))
956 (ethio-fidel-to-tex-buffer)
957 (set-buffer-modified-p nil)))
959 ((string-match "\\.java$" (buffer-file-name))
961 (ethio-fidel-to-java-buffer)
962 (set-buffer-modified-p nil)))
967 (defun ethio-record-user-preference nil
968 (insert (if ethio-use-colon-for-colon "\\~-: " "\\~`: ")
969 (if ethio-use-three-dot-question "\\~`| " "\\~? ")))
972 ;; Ethiopic word separator vs. ASCII space
975 (defvar ethio-prefer-ascii-space t)
976 (make-variable-buffer-local 'ethio-prefer-ascii-space)
978 (defun ethio-toggle-space nil
979 "Toggle ASCII space and Ethiopic separator for keyboard input."
981 (setq ethio-prefer-ascii-space
982 (not ethio-prefer-ascii-space)))
984 (defun ethio-insert-space (arg)
985 "Insert ASCII spaces or Ethiopic word separators depending on context.
987 If the current word separator (indicated in mode-line) is the ASCII space,
988 insert an ASCII space. With ARG, insert that many ASCII spaces.
990 If the current word separator is the colon-like Ethiopic word
991 separator and the point is preceded by `an Ethiopic punctuation mark
992 followed by zero or more ASCII spaces', then insert also an ASCII
993 space. With ARG, insert that many ASCII spaces.
995 Otherwise, insert a colon-like Ethiopic word separator. With ARG, insert that
996 many Ethiopic word separators."
1000 (ethio-prefer-ascii-space
1001 (insert-char 32 arg))
1003 (skip-chars-backward " ")
1004 (memq (preceding-char)
1005 '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ?)))
1006 (insert-char 32 arg))
1008 (insert-char ?፡ arg))))
1011 (defun ethio-insert-ethio-space (arg)
1012 "Insert the Ethiopic word delimiter (the colon-like character).
1013 With ARG, insert that many delimiters."
1015 (insert-char ?፡ arg))
1022 (defun ethio-composition-function (pos to font-object string)
1024 (let ((pattern "\\ce\\(፟\\|\\)"))
1027 (eq (string-match pattern string pos) pos))
1028 (prog1 (match-end 0)
1029 (compose-string string pos (match-end 0))))
1030 (if (>= pos (point-min))
1033 (if (looking-at pattern)
1034 (prog1 (match-end 0)
1035 (compose-region pos (match-end 0)))))))))
1037 ;; This function is not used any more.
1038 (defun ethio-gemination nil
1039 "Compose the character before the point with the Ethiopic gemination mark.
1040 If the character is already composed, decompose it and remove the gemination
1043 (let ((ch (preceding-char)))
1045 ((and (= ch ?) (find-composition (1- (point))))
1046 (decompose-region (- (point) 2) (point)))
1047 ((and (>= ch #x1200) (<= ch #x137f))
1049 (compose-region (- (point) 2) (point)))
1057 (robin-define-package "ethiopic-sera"
1058 "SERA transliteration system for Ethiopic."
1064 ("hE" ?ሄ) ("hee" "ሄ")
1068 ("le" ?ለ) ("Le" "ለ")
1069 ("lu" ?ሉ) ("Lu" "ሉ")
1070 ("li" ?ሊ) ("Li" "ሊ")
1071 ("la" ?ላ) ("La" "ላ")
1072 ("lE" ?ሌ) ("LE" "ሌ") ("lee" "ሌ") ("Lee" "ሌ")
1074 ("lo" ?ሎ) ("Lo" "ሎ")
1075 ("lWa" ?ሏ) ("LWa" "ሏ") ("lW" "ሏ") ("LW" "ሏ")
1081 ("HE" ?ሔ) ("Hee" "ሔ")
1084 ("HWa" ?ሗ) ("HW" "ሗ")
1086 ("me" ?መ) ("Me" "መ")
1087 ("mu" ?ሙ) ("Mu" "ሙ")
1088 ("mi" ?ሚ) ("Mi" "ሚ")
1089 ("ma" ?ማ) ("Ma" "ማ")
1090 ("mE" ?ሜ) ("ME" "ሜ") ("mee" "ሜ") ("Mee" "ሜ")
1092 ("mo" ?ሞ) ("Mo" "ሞ")
1093 ("mWa" ?ሟ) ("MWa" "ሟ") ("mW" "ሟ") ("MW" "ሟ")
1095 ("`se" ?ሠ) ("sse" "ሠ") ("s2e" "ሠ")
1096 ("`su" ?ሡ) ("ssu" "ሡ") ("s2u" "ሡ")
1097 ("`si" ?ሢ) ("ssi" "ሢ") ("s2i" "ሢ")
1098 ("`sa" ?ሣ) ("ssa" "ሣ") ("s2a" "ሣ")
1099 ("`sE" ?ሤ) ("ssE" "ሤ") ("s2E" "ሤ")
1100 ("`see" "ሤ") ("ssee" "ሤ") ("s2ee" "ሤ")
1101 ("`s" ?ሥ) ("ss" "ሥ") ("s2" "ሥ")
1102 ("`so" ?ሦ) ("sso" "ሦ") ("s2o" "ሦ")
1103 ("`sWa" ?ሧ) ("ssWa" "ሧ") ("s2Wa" "ሧ")
1104 ("`sW" "ሧ") ("ssW" "ሧ") ("s2W" "ሧ")
1106 ("re" ?ረ) ("Re" "ረ")
1107 ("ru" ?ሩ) ("Ru" "ሩ")
1108 ("ri" ?ሪ) ("Ri" "ሪ")
1109 ("ra" ?ራ) ("Ra" "ራ")
1110 ("rE" ?ሬ) ("RE" "ሬ") ("ree" "ሬ") ("Ree" "ሬ")
1112 ("ro" ?ሮ) ("Ro" "ሮ")
1113 ("rWa" ?ሯ) ("RWa" "ሯ") ("rW" "ሯ") ("RW" "ሯ")
1119 ("sE" ?ሴ) ("see" "ሴ")
1122 ("sWa" ?ሷ) ("sW" "ሷ")
1128 ("xE" ?ሼ) ("xee" "ሼ")
1131 ("xWa" ?ሿ) ("xW" "ሿ")
1137 ("qE" ?ቄ) ("qee" "ቄ")
1142 ("qWa" ?ቋ) ("qW" "ቋ")
1143 ("qWE" ?ቌ) ("qWee" "ቌ")
1144 ("qW'" ?ቍ) ("qWu" "ቍ")
1150 ("QE" ?ቔ) ("Qee" "ቔ")
1155 ("QWa" ?ቛ) ("QW" "ቛ")
1156 ("QWE" ?ቜ) ("QWee" "ቜ")
1157 ("QW'" ?ቝ) ("QWu" "ቝ")
1159 ("be" ?በ) ("Be" "በ")
1160 ("bu" ?ቡ) ("Bu" "ቡ")
1161 ("bi" ?ቢ) ("Bi" "ቢ")
1162 ("ba" ?ባ) ("Ba" "ባ")
1163 ("bE" ?ቤ) ("BE" "ቤ") ("bee" "ቤ") ("Bee" "ቤ")
1165 ("bo" ?ቦ) ("Bo" "ቦ")
1166 ("bWa" ?ቧ) ("BWa" "ቧ") ("bW" "ቧ") ("BW" "ቧ")
1168 ("ve" ?ቨ) ("Ve" "ቨ")
1169 ("vu" ?ቩ) ("Vu" "ቩ")
1170 ("vi" ?ቪ) ("Vi" "ቪ")
1171 ("va" ?ቫ) ("Va" "ቫ")
1172 ("vE" ?ቬ) ("VE" "ቬ") ("vee" "ቬ") ("Vee" "ቬ")
1174 ("vo" ?ቮ) ("Vo" "ቮ")
1175 ("vWa" ?ቯ) ("VWa" "ቯ") ("vW" "ቯ") ("VW" "ቯ")
1181 ("tE" ?ቴ) ("tee" "ቴ")
1184 ("tWa" ?ቷ) ("tW" "ቷ")
1190 ("cE" ?ቼ) ("cee" "ቼ")
1193 ("cWa" ?ቿ) ("cW" "ቿ")
1195 ("`he" ?ኀ) ("hhe" "ኀ") ("h2e" "ኀ")
1196 ("`hu" ?ኁ) ("hhu" "ኁ") ("h2u" "ኁ")
1197 ("`hi" ?ኂ) ("hhi" "ኂ") ("h2i" "ኂ")
1198 ("`ha" ?ኃ) ("hha" "ኃ") ("h2a" "ኃ")
1199 ("`hE" ?ኄ) ("hhE" "ኄ") ("h2E" "ኄ")
1200 ("`hee" "ኄ") ("hhee" "ኄ") ("h2ee" "ኄ")
1201 ("`h" ?ኅ) ("hh" "ኅ") ("h2" "ኅ")
1202 ("`ho" ?ኆ) ("hho" "ኆ") ("h2o" "ኆ")
1203 ("`hWe" ?ኈ) ("hhWe" "ኈ") ("h2We" "ኈ") ("hWe" "ኈ")
1204 ("`hWi" ?ኊ) ("hhWi" "ኊ") ("h2Wi" "ኊ") ("hWi" "ኊ")
1205 ("`hWa" ?ኋ) ("hhWa" "ኋ") ("h2Wa" "ኋ") ("hWa" "ኋ")
1206 ("`hW" "ኋ") ("hhW" "ኋ") ("h2W" "ኋ")
1207 ("`hWE" ?ኌ) ("hhWE" "ኌ") ("h2WE" "ኌ") ("hWE" "ኌ")
1208 ("`hWee" "ኌ") ("hhWee" "ኌ") ("h2Wee" "ኌ") ("hWee" "ኌ")
1209 ("`hW'" ?ኍ) ("hhW'" "ኍ") ("h2W'" "ኍ") ("hW'" "ኍ")
1210 ("`hWu" "ኍ") ("hhWu" "ኍ") ("h2Wu" "ኍ") ("hWu" "ኍ")
1216 ("nE" ?ኔ) ("nee" "ኔ")
1219 ("nWa" ?ኗ) ("nW" "ኗ")
1225 ("NE" ?ኜ) ("Nee" "ኜ")
1228 ("NWa" ?ኟ) ("NW" "ኟ")
1231 ("'u" ?ኡ) ("u" "ኡ") ("'U" "ኡ") ("U" "ኡ")
1235 ("'I" ?እ) ("I" "እ") ("'e" "እ") ("e" "እ")
1236 ("'o" ?ኦ) ("o" "ኦ") ("'O" "ኦ") ("O" "ኦ")
1237 ("'ea" ?ኧ) ("ea" "ኧ")
1243 ("kE" ?ኬ) ("kee" "ኬ")
1248 ("kWa" ?ኳ) ("kW" "ኳ")
1249 ("kWE" ?ኴ) ("kWee" "ኴ")
1250 ("kW'" ?ኵ) ("kWu" "ኵ")
1256 ("KE" ?ኼ) ("Kee" "ኼ")
1261 ("KWa" ?ዃ) ("KW" "ዃ")
1262 ("KWE" ?ዄ) ("KWee" "ዄ")
1263 ("KW'" ?ዅ) ("KWu" "ዅ")
1269 ("wE" ?ዌ) ("wee" "ዌ")
1273 ("`e" ?ዐ) ("ae" "ዐ") ("aaa" "ዐ") ("e2" "ዐ")
1274 ("`u" ?ዑ) ("uu" "ዑ") ("u2" "ዑ") ("`U" "ዑ") ("UU" "ዑ") ("U2" "ዑ")
1275 ("`i" ?ዒ) ("ii" "ዒ") ("i2" "ዒ")
1276 ("`a" ?ዓ) ("aa" "ዓ") ("a2" "ዓ") ("`A" "ዓ") ("AA" "ዓ") ("A2" "ዓ")
1277 ("`E" ?ዔ) ("EE" "ዔ") ("E2" "ዔ")
1278 ("`I" ?ዕ) ("II" "ዕ") ("I2" "ዕ") ("ee" "ዕ")
1279 ("`o" ?ዖ) ("oo" "ዖ") ("o2" "ዖ") ("`O" "ዖ") ("OO" "ዖ") ("O2" "ዖ")
1285 ("zE" ?ዜ) ("zee" "ዜ")
1288 ("zWa" ?ዟ) ("zW" "ዟ")
1294 ("ZE" ?ዤ) ("Zee" "ዤ")
1297 ("ZWa" ?ዧ) ("ZW" "ዧ")
1299 ("ye" ?የ) ("Ye" "የ")
1300 ("yu" ?ዩ) ("Yu" "ዩ")
1301 ("yi" ?ዪ) ("Yi" "ዪ")
1302 ("ya" ?ያ) ("Ya" "ያ")
1303 ("yE" ?ዬ) ("YE" "ዬ") ("yee" "ዬ") ("Yee" "ዬ")
1305 ("yo" ?ዮ) ("Yo" "ዮ")
1311 ("dE" ?ዴ) ("dee" "ዴ")
1314 ("dWa" ?ዷ) ("dW" "ዷ")
1320 ("DE" ?ዼ) ("Dee" "ዼ")
1323 ("DWa" ?ዿ) ("DW" "ዿ")
1325 ("je" ?ጀ) ("Je" "ጀ")
1326 ("ju" ?ጁ) ("Ju" "ጁ")
1327 ("ji" ?ጂ) ("Ji" "ጂ")
1328 ("ja" ?ጃ) ("Ja" "ጃ")
1329 ("jE" ?ጄ) ("JE" "ጄ") ("jee" "ጄ") ("Jee" "ጄ")
1331 ("jo" ?ጆ) ("Jo" "ጆ")
1332 ("jWa" ?ጇ) ("jW" "ጇ") ("JWa" "ጇ") ("JW" "ጇ")
1338 ("gE" ?ጌ) ("gee" "ጌ")
1343 ("gWa" ?ጓ) ("gW" "ጓ")
1344 ("gWE" ?ጔ) ("gWee" "ጔ")
1345 ("gW'" ?ጕ) ("gWu" "ጕ")
1351 ("GE" ?ጜ) ("Gee" "ጜ")
1359 ("TE" ?ጤ) ("Tee" "ጤ")
1362 ("TWa" ?ጧ) ("TW" "ጧ")
1368 ("CE" ?ጬ) ("Cee" "ጬ")
1371 ("CWa" ?ጯ) ("CW" "ጯ")
1377 ("PE" ?ጴ) ("Pee" "ጴ")
1380 ("PWa" ?ጷ) ("PW" "ጷ")
1386 ("SE" ?ጼ) ("See" "ጼ")
1389 ("SWa" ?ጿ) ("`SWa" "ጿ") ("SSWa" "ጿ") ("S2Wa" "ጿ")
1390 ("SW" "ጿ") ("`SW" "ጿ") ("SSW" "ጿ") ("S2W" "ጿ")
1392 ("`Se" ?ፀ) ("SSe" "ፀ") ("S2e" "ፀ")
1393 ("`Su" ?ፁ) ("SSu" "ፁ") ("S2u" "ፁ")
1394 ("`Si" ?ፂ) ("SSi" "ፂ") ("S2i" "ፂ")
1395 ("`Sa" ?ፃ) ("SSa" "ፃ") ("S2a" "ፃ")
1396 ("`SE" ?ፄ) ("SSE" "ፄ") ("S2E" "ፄ")
1397 ("`See" "ፄ") ("SSee" "ፄ") ("S2ee" "ፄ")
1398 ("`S" ?ፅ) ("SS" "ፅ") ("S2" "ፅ")
1399 ("`So" ?ፆ) ("SSo" "ፆ") ("S2o" "ፆ")
1401 ("fe" ?ፈ) ("Fe" "ፈ")
1402 ("fu" ?ፉ) ("Fu" "ፉ")
1403 ("fi" ?ፊ) ("Fi" "ፊ")
1404 ("fa" ?ፋ) ("Fa" "ፋ")
1405 ("fE" ?ፌ) ("FE" "ፌ") ("fee" "ፌ") ("Fee" "ፌ")
1407 ("fo" ?ፎ) ("Fo" "ፎ")
1408 ("fWa" ?ፏ) ("FWa" "ፏ") ("fW" "ፏ") ("FW" "ፏ")
1414 ("pE" ?ፔ) ("pee" "ፔ")
1417 ("pWa" ?ፗ) ("pW" "ፗ")
1419 ("rYa" ?ፘ) ("RYa" "ፘ") ("rY" "ፘ") ("RY" "ፘ")
1420 ("mYa" ?ፙ) ("MYa" "ፙ") ("mY" "ፙ") ("MY" "ፙ")
1421 ("fYa" ?ፚ) ("FYa" "ፚ") ("fY" "ፚ") ("FY" "ፚ")
1423 (" : " ?፡) (":" "፡") ("`:" "፡")
1429 ("`?" ?፧) ("??" "፧")
1430 (":|:" ?፨) ("**" "፨")
1432 ;; Explicit syllable delimiter
1435 ;; Quick ASCII input
1500 (register-input-method
1501 "ethiopic-sera" "Ethiopic"
1502 'robin-use-package "et" "An input method for Ethiopic.")
1504 (robin-define-package "ethiopic-tex"
1505 "TeX transliteration system for Ethiopic."
1507 ("\\heG" ?ሀ) ; U+1200 ..
1524 ("\\HeG" ?ሐ) ; U+1210 ..
1541 ("\\sseG" ?ሠ) ; U+1220 ..
1558 ("\\seG" ?ሰ) ; U+1230 ..
1575 ("\\qeG" ?ቀ) ; U+1240 ..
1592 ("\\QeG" ?ቐ) ; U+1250 ..
1609 ("\\beG" ?በ) ; U+1260 ..
1626 ("\\teG" ?ተ) ; U+1270 ..
1643 ("\\hheG" ?ኀ) ; U+1280 ..
1660 ("\\neG" ?ነ) ; U+1290 ..
1677 ("\\eG" ?አ) ; U+12A0 ..
1694 ("\\kWeG" ?ኰ) ; U+12B0 ..
1711 ("\\KWeG" ?ዀ) ; U+12C0 ..
1728 ("\\eeG" ?ዐ) ; U+12D0 ..
1745 ("\\ZeG" ?ዠ) ; U+12E0 ..
1762 ("\\deG" ?ደ) ; U+12F0 ..
1779 ("\\jeG" ?ጀ) ; U+1300 ..
1796 ("\\gWeG" ?ጐ) ; U+1310 ..
1813 ("\\TeG" ?ጠ) ; U+1320 ..
1830 ("\\PeG" ?ጰ) ; U+1330 ..
1847 ("\\SSeG" ?ፀ) ; U+1340 ..
1864 ("\\peG" ?ፐ) ; U+1350 ..
1881 ;; reserved ; U+1360 ..
1898 ("\\smntG" ?፰) ; U+1370 ..
1916 ;; private extension
1919 ("\\yWaG" ?) ; U+1A00EF (was U+12EF)
1921 ("\\GWaG" ?) ; U+1A011F (was U+131F)
1923 ("\\qqeG" ?) ; U+1A0180 .. (was U+1380 ..)
1940 ("\\kkeG" ?) ; U+1A0190 .. (was U+1390 ..)
1957 ("\\XeG" ?) ; U+1A01A0 .. (was U+13A0 ..)
1974 ("\\ggeG" ?) ; U+1A01B0 .. (was U+13B0 ..)
1991 ("\\ornamentG" ?) ; U+1A01C0 .. (was U+FDF0 ..)
2001 ;; Gemination () is handled in a special way.
2004 ;; Assign reverse conversion to Fidel chars.
2005 ;; Then override forward conversion with ASCII chars.
2006 ;; ASCII chars should not have reverse conversions.
2007 ("\\dotG" ?) ("\\dotG" ".")
2008 ("\\lquoteG" ?) ("\\lquoteG" "«")
2009 ("\\rquoteG" ?) ("\\rquoteG" "»")
2010 ("\\qmarkG" ?) ("\\qmarkG" "?")
2013 ;; New characters in Unicode 4.1.
2015 ;; In forward conversion, these characters override the old private
2016 ;; extensions above. The old private extensions still keep their
2017 ;; reverse conversion.
2073 ;; The ethiopic-tex package is not used for keyboard input, therefore
2074 ;; not registered with the register-input-method function.
2076 (provide 'ethio-util)
2078 ;;; ethio-util.el ends here
2080 ;;; arch-tag: c8feb3d6-39bf-4b0a-b6ef-26f03fbc8140
2081 ;;; ethio-util.el ends here