1 ;;; ethio-util.el --- utilities for Ethiopic -*- coding: utf-8-emacs; -*-
3 ;; Copyright (C) 1997, 2001 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number: H15PRO 110
9 ;; Keywords: mule, multilingual, Ethiopic
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
28 ;; Author: TAKAHASHI Naoto <ntakahas@m17n.org>
36 ;; Information for exiting Ethiopic environment.
37 (defvar exit-ethiopic-environment-data nil)
40 (defun setup-ethiopic-environment-internal ()
41 (let ((key-bindings '((" " . ethio-insert-space)
42 ([?\S- ] . ethio-insert-ethio-space)
43 ;; ([?\C-'] . ethio-gemination)
44 ([f3] . ethio-fidel-to-sera-buffer)
45 ([S-f3] . ethio-fidel-to-sera-region)
46 ([C-f3] . ethio-fidel-to-sera-marker)
47 ([f4] . ethio-sera-to-fidel-buffer)
48 ([S-f4] . ethio-sera-to-fidel-region)
49 ([C-f4] . ethio-sera-to-fidel-marker)
50 ([S-f5] . ethio-toggle-punctuation)
51 ([S-f6] . ethio-modify-vowel)
52 ([S-f7] . ethio-replace-space)
53 ;; ([S-f8] . ethio-input-special-character) ; deprecated
54 ([C-f9] . ethio-toggle-space)
55 ([S-f9] . ethio-replace-space) ; as requested
59 (setq kb (car (car key-bindings)))
60 (setq exit-ethiopic-environment-data
61 (cons (cons kb (global-key-binding kb))
62 exit-ethiopic-environment-data))
63 (global-set-key kb (cdr (car key-bindings)))
64 (setq key-bindings (cdr key-bindings))))
66 (add-hook 'find-file-hook 'ethio-find-file)
67 (add-hook 'write-file-functions 'ethio-write-file)
68 (add-hook 'after-save-hook 'ethio-find-file))
70 (defun exit-ethiopic-environment ()
71 "Exit Ethiopic language environment."
72 (while exit-ethiopic-environment-data
73 (global-set-key (car (car exit-ethiopic-environment-data))
74 (cdr (car exit-ethiopic-environment-data)))
75 (setq exit-ethiopic-environment-data
76 (cdr exit-ethiopic-environment-data)))
78 (remove-hook 'find-file-hook 'ethio-find-file)
79 (remove-hook 'write-file-functions 'ethio-write-file)
80 (remove-hook 'after-save-hook 'ethio-find-file))
83 ;; ETHIOPIC UTILITY FUNCTIONS
86 ;; If the filename ends in ".sera", editing is done in fidel
87 ;; but file I/O is done in SERA.
89 ;; If the filename ends in ".java", editing is done in fidel
90 ;; but file I/O is done in the \uXXXX style, where XXXX is
91 ;; the Unicode codepoint for the Ethiopic character.
93 ;; If the filename ends in ".tex", editing is done in fidel
94 ;; but file I/O is done in EthioTeX format.
100 (defvar ethio-primary-language 'tigrigna
101 "*Symbol that defines the primary language in SERA --> FIDEL conversion.
102 The value should be one of: `tigrigna', `amharic' or `english'.")
104 (defvar ethio-secondary-language 'english
105 "*Symbol that defines the secondary language in SERA --> FIDEL conversion.
106 The value should be one of: `tigrigna', `amharic' or `english'.")
108 (defvar ethio-use-colon-for-colon nil
109 "*Non-nil means associate ASCII colon with Ethiopic colon.
110 If nil, associate ASCII colon with Ethiopic word separator, i.e., two
111 vertically stacked dots. All SERA <--> FIDEL converters refer this
114 (defvar ethio-use-three-dot-question nil
115 "*Non-nil means associate ASCII question mark with Ethiopic old style question mark (three vertically stacked dots).
116 If nil, associate ASCII question mark with Ethiopic stylised question
117 mark. All SERA <--> FIDEL converters refer this variable.")
119 (defvar ethio-quote-vowel-always nil
120 "*Non-nil means always put an apostrophe before an isolated vowel (except at word initial) in FIDEL --> SERA conversion.
121 If nil, put an apostrophe only between a 6th-form consonant and an
124 (defvar ethio-W-sixth-always nil
125 "*Non-nil means convert the Wu-form of a 12-form consonant to \"W'\" instead of \"Wu\" in FIDEL --> SERA conversion.")
127 (defvar ethio-numeric-reduction 0
128 "*Degree of reduction in converting Ethiopic digits into Arabic digits.
130 For example, ({10}{9}{100}{80}{7}) is converted into:
131 `10`9`100`80`7 if `ethio-numeric-reduction' is 0,
132 `109100807 if `ethio-numeric-reduction' is 1,
133 `10900807 if `ethio-numeric-reduction' is 2.")
135 (defvar ethio-java-save-lowercase nil
136 "*Non-nil means save Ethiopic characters in lowercase hex numbers to Java files.
137 If nil, use uppercases.")
139 (defun ethio-prefer-amharic-p ()
140 (or (eq ethio-primary-language 'amharic)
141 (and (not (eq ethio-primary-language 'tigrigna))
142 (eq ethio-secondary-language 'amharic))))
144 (defun ethio-prefer-amharic (arg)
147 (robin-modify-package "ethiopic-sera" "'a" ?አ)
148 (robin-modify-package "ethiopic-sera" "a" "አ")
149 (robin-modify-package "ethiopic-sera" "'A" ?ኣ)
150 (robin-modify-package "ethiopic-sera" "A" "ኣ"))
151 (robin-modify-package "ethiopic-sera" "'A" ?አ)
152 (robin-modify-package "ethiopic-sera" "A" "አ")
153 (robin-modify-package "ethiopic-sera" "'a" ?ኣ)
154 (robin-modify-package "ethiopic-sera" "a" "ኣ")))
156 (defun ethio-use-colon-for-colon (arg)
159 (robin-modify-package "ethiopic-sera" ":" ?፥)
160 (robin-modify-package "ethiopic-sera" "`:" ?፡))
161 (robin-modify-package "ethiopic-sera" " : " ?፡)
162 (robin-modify-package "ethiopic-sera" ":" "፡")
163 (robin-modify-package "ethiopic-sera" "-:" ?፥)))
165 (defun ethio-use-three-dot-question (arg)
168 (robin-modify-package "ethiopic-sera" "?" ?፧)
169 (robin-modify-package "ethiopic-sera" "`?" ??))
170 (robin-modify-package "ethiopic-sera" "?" ??)
171 (robin-modify-package "ethiopic-sera" "`?" ?፧)))
173 (defun ethio-adjust-robin ()
174 (ethio-prefer-amharic (ethio-prefer-amharic-p))
175 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
176 (ethio-use-three-dot-question ethio-use-three-dot-question))
178 (add-hook 'robin-activate-hook 'ethio-adjust-robin)
185 (defun ethio-sera-to-fidel-buffer (&optional secondary force)
186 "Convert the current buffer from SERA to FIDEL.
188 The variable `ethio-primary-language' specifies the primary
189 language and `ethio-secondary-language' specifies the secondary.
191 If the 1st optional argument SECONDARY is non-nil, assume the
192 buffer begins with the secondary language; otherwise with the
195 If the 2nd optional argument FORCE is non-nil, perform conversion
196 even if the buffer is read-only.
198 See also the descriptions of the variables
199 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
202 (ethio-sera-to-fidel-region (point-min) (point-max) secondary force))
204 ;; To avoid byte-compiler warnings. It should never be set globally.
205 (defvar ethio-sera-being-called-by-w3)
206 ;; This variable will be bound by some third-party package.
207 (defvar sera-being-called-by-w3)
210 (defun ethio-sera-to-fidel-region (begin end &optional secondary force)
211 "Convert the characters in region from SERA to FIDEL.
213 The variable `ethio-primary-language' specifies the primary
214 language and `ethio-secondary-language' specifies the secondary.
216 If the 3rd argument SECONDARY is given and non-nil, assume the
217 region begins with the secondary language; otherwise with the
220 If the 4th argument FORCE is given and non-nil, perform
221 conversion even if the buffer is read-only.
223 See also the descriptions of the variables
224 `ethio-use-colon-for-colon' and `ethio-use-three-dot-question'."
227 (if (and buffer-read-only
229 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
232 (let ((ethio-primary-language ethio-primary-language)
233 (ethio-secondary-language ethio-secondary-language)
234 ;; The above two variables may be changed temporarily by tilde
235 ;; escapes during conversion. We bind them to the variables
236 ;; of the same names so that the original values are restored
237 ;; when this function exits.
238 (buffer-read-only nil)
239 (lang (if secondary ethio-secondary-language ethio-primary-language))
242 (ethio-use-colon-for-colon ethio-use-colon-for-colon)
243 (ethio-use-three-dot-question ethio-use-three-dot-question)
246 (narrow-to-region begin end)
247 (goto-char (point-min))
252 (ethio-prefer-amharic t)
253 (ethio-sera-to-fidel-region-ethio 'amharic))
255 (ethio-prefer-amharic nil)
256 (ethio-sera-to-fidel-region-ethio 'tigrigna))
258 (ethio-sera-to-fidel-region-noethio))))
261 (if (eq lang ethio-primary-language)
262 ethio-secondary-language
263 ethio-primary-language)
266 ;; Restore user's preference.
267 (ethio-adjust-robin))
269 (defun ethio-sera-to-fidel-region-noethio ()
270 "Return next language as symbol: amharic, tigrigna, toggle or nil."
274 ;; No more "\", i.e. nothing to do.
275 ((not (search-forward "\\" nil 0))
278 ;; Hereafter point is put after a "\".
279 ;; First delete that "\", then check the following chars.
282 ((progn (delete-char -1) (setq lflag (ethio-process-language-flag)))
285 ;; "\\" : leave the second "\" and continue in the same language.
286 ((= (following-char) ?\\)
290 ;; "\ " : delete the following " " and toggle the language.
291 ((= (following-char) 32)
295 ;; A "\" but not a special sequence: simply toggle the language.
299 (defun ethio-sera-to-fidel-region-ethio (lang)
300 "Return next language as symbol: amharic, tigrigna, toggle or nil."
304 (if (re-search-forward "\\(`[1-9][0-9]*\\)\\|[\\<&]" nil t)
307 (robin-convert-region (point-min) (point-max) "ethiopic-sera")
308 (goto-char (point-max)))
312 ((= (following-char) ?`)
314 (ethio-process-digits)
318 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
319 ethio-sera-being-called-by-w3)
320 (and (boundp 'sera-being-called-by-w3)
321 sera-being-called-by-w3))
322 (search-forward (if (= (following-char) ?<) ">" ";") nil 0)
329 ;; Now we must be looking at a "\".
330 ;; First delete that "\", then check the following chars.
332 ((progn (delete-char 1) (= (following-char) 32))
336 ((looking-at "[,.;:'`?\\]+")
337 (goto-char (match-end 0))
340 ((/= (following-char) ?~)
343 ;; Now we must be looking at a "~".
345 ((setq lflag (ethio-process-language-flag))
348 ;; Delete the following "~" and check the following chars.
350 ((progn (delete-char 1) (looking-at "! ?"))
352 (if (re-search-forward "\\\\~! ?" nil 0)
358 (ethio-use-colon-for-colon t)
363 (ethio-use-colon-for-colon nil)
368 (ethio-use-three-dot-question t)
371 ((looking-at "\\? ?")
373 (ethio-use-three-dot-question nil)
376 ;; Unknown tilde escape. Recover the deleted chars.
381 (defun ethio-process-language-flag nil
382 "Process a language flag of the form \"~lang\" or \"~lang1~lang2\".
384 If looking at \"~lang1~lang2\", set `ethio-primary-language' and
385 `ethio-secondary-language' based on \"lang1\" and \"lang2\".
386 Then delete the language flag \"~lang1~lang2\" from the buffer.
387 Return value is the new primary language.
389 If looking at \"~lang\", delete that language flag \"~lang\" from
390 the buffer and return that language. In this case
391 `ethio-primary-language' and `ethio-secondary-language' are left
394 If an unsupported language flag is found, just return nil without
402 "~\\([a-z][a-z][a-z]?\\)~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
403 (setq lang1 (ethio-flag-to-language (match-string 1)))
404 (setq lang2 (ethio-flag-to-language (match-string 2))))
405 (setq ethio-primary-language lang1
406 ethio-secondary-language lang2)
407 (delete-region (point) (match-end 2))
408 (if (= (following-char) 32)
410 ethio-primary-language)
413 ((and (looking-at "~\\([a-z][a-z][a-z]?\\)[ \t\n\\]")
414 (setq lang1 (ethio-flag-to-language (match-string 1))))
415 (delete-region (point) (match-end 1))
416 (if (= (following-char) 32)
424 (defun ethio-flag-to-language (flag)
426 ((or (string= flag "en") (string= flag "eng")) 'english)
427 ((or (string= flag "ti") (string= flag "tir")) 'tigrigna)
428 ((or (string= flag "am") (string= flag "amh")) 'amharic)
431 (defun ethio-process-digits nil
432 "Convert Arabic digits to Ethiopic digits."
434 (while (and (>= (setq ch (following-char)) ?1)
438 ;; count up following zeros
440 (while (= (following-char) ?0)
446 ;; first digit is 10, 20, ..., or 90
448 (insert (aref [?፲ ?፳ ?፴ ?፵ ?፶ ?፷ ?፸ ?፹ ?፺] (- ch ?1)))
451 ;; first digit is 2, 3, ..., or 9
453 (insert (aref [?፪ ?፫ ?፬ ?፭ ?፮ ?፯ ?፰ ?፱] (- ch ?2))))
464 (insert-char ?፼ (/ z 4)))))
467 (defun ethio-sera-to-fidel-marker (&optional force)
468 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from SERA to FIDEL.
469 Assume that each region begins with `ethio-primary-language'.
470 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
472 (if (and buffer-read-only
474 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
477 (goto-char (point-min))
478 (while (search-forward "<sera>" nil t)
479 (ethio-sera-to-fidel-region
481 (if (search-forward "</sera>" nil t)
491 (defun ethio-language-to-flag (lang)
493 ((eq lang 'english) "eng")
494 ((eq lang 'tigrigna) "tir")
495 ((eq lang 'amharic) "amh")
499 (defun ethio-fidel-to-sera-buffer (&optional secondary force)
500 "Replace all the FIDEL characters in the current buffer to the SERA format.
501 The variable `ethio-primary-language' specifies the primary
502 language and `ethio-secondary-language' specifies the secondary.
504 If the 1st optional argument SECONDARY is non-nil, try to convert the
505 region so that it begins with the secondary language; otherwise with the
508 If the 2nd optional argument FORCE is non-nil, convert even if the
511 See also the descriptions of the variables
512 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
513 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
516 (ethio-fidel-to-sera-region (point-min) (point-max) secondary force))
519 (defun ethio-fidel-to-sera-region (begin end &optional secondary force)
520 "Replace all the FIDEL characters in the region to the SERA format.
522 The variable `ethio-primary-language' specifies the primary
523 language and `ethio-secondary-language' specifies the secondary.
525 If the 3rd argument SECONDARY is given and non-nil, convert
526 the region so that it begins with the secondary language; otherwise with
527 the primary language.
529 If the 4th argument FORCE is given and non-nil, convert even if the
532 See also the descriptions of the variables
533 `ethio-use-colon-for-colon', `ethio-use-three-dot-question',
534 `ethio-quote-vowel-always' and `ethio-numeric-reduction'."
537 (if (and buffer-read-only
539 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
543 (narrow-to-region begin end)
545 (let ((buffer-read-only nil)
547 ethio-secondary-language
548 ethio-primary-language))
549 (flag (if (ethio-prefer-amharic-p) "\\~amh " "\\~tir "))
552 (goto-char (point-min))
554 (unless (eq mode 'english)
555 (setq mode 'ethiopic))
556 (if (and (eq mode 'english) (looking-at "\\ce"))
557 (setq mode 'ethiopic))
558 (if (and (eq mode 'ethiopic) (looking-at "\\Ce"))
559 (setq mode 'english))
560 (insert (if (eq mode 'english) "\\~eng " flag))
564 (if (eq mode 'english)
566 (if (re-search-forward "\\(\\ce\\|\\\\\\)" nil 0)
569 ((eq (following-char) ?\\)
574 (setq mode 'ethiopic))))
576 ;; If we reach here, mode is ethiopic.
578 (if (re-search-forward "[a-z,.;:'`?\\<&]" nil 0)
581 (narrow-to-region p (point))
582 (robin-invert-region (point-min) (point-max) "ethiopic-sera")
584 ;; ethio-quote-vowel-alwyas
585 (goto-char (point-min))
586 (while (re-search-forward "'[eauio]" nil t)
589 (setq ch (preceding-char))
590 (if (or (and (>= ch ?a) (<= ch ?z))
591 (and (>= ch ?A) (<= ch ?Z)))
592 (if (and (not ethio-quote-vowel-always)
593 (memq ch '(?e ?a ?u ?i ?o ?E ?A ?I)))
597 ;; ethio-W-sixth-always
598 (unless ethio-W-sixth-always
599 (goto-char (point-min))
600 (while (search-forward "W'" nil t)
604 ;; ethio-numeric-reduction
605 (when (> ethio-numeric-reduction 0)
606 (goto-char (point-min))
607 (while (re-search-forward "\\([0-9]\\)`\\([0-9]\\)" nil t)
608 (replace-match "\\1\\2")
610 (when (= ethio-numeric-reduction 2)
611 (goto-char (point-min))
612 (while (re-search-forward "\\([0-9]\\)1\\(0+\\)" nil t)
613 (replace-match "\\1\\2")))
615 (goto-char (point-max)))
618 ((looking-at "[a-z]")
620 (setq mode 'english))
621 ((looking-at "[,.;:'`\\]+")
623 (goto-char (1+ (match-end 0))))
624 ((= (following-char) ??)
625 (if ethio-use-three-dot-question
629 (if (or (and (boundp 'ethio-sera-being-called-by-w3)
630 ethio-sera-being-called-by-w3)
631 (and (boundp 'sera-being-called-by-w3)
632 sera-being-called-by-w3))
633 (search-forward (if (= (following-char) ?<) ">" "&") nil 0)
634 (forward-char 1)))))))))
637 (defun ethio-fidel-to-sera-marker (&optional force)
638 "Convert the regions surrounded by \"<sera>\" and \"</sera>\" from FIDEL to SERA.
639 The markers \"<sera>\" and \"</sera>\" themselves are not deleted."
642 (if (and buffer-read-only
644 (not (y-or-n-p "Buffer is read-only. Force to convert? ")))
647 (goto-char (point-min))
648 (while (search-forward "<sera>" nil t)
649 (ethio-fidel-to-sera-region
651 (if (search-forward "</sera>" nil t)
658 ;; vowel modification
662 (defun ethio-modify-vowel nil
663 "Modify the vowel of the FIDEL that is under the cursor."
666 (let ((consonant (ethio-get-consonant (following-char)))
669 (error "") ; not an Ethiopic char
670 (setq vowel (read-char "Modify vowel to: "))
672 (if (and (string= consonant "'") (= vowel ?W))
675 (narrow-to-region (point) (point))
676 (insert consonant vowel)
677 (robin-convert-region (point-min) (point-max) "ethiopic-sera"))))))
679 (defun ethio-get-consonant (ch)
680 "Return the consonant part of CH's SERA spelling in ethiopic-sera."
681 (let ((sera (get-char-code-property ch 'ethiopic-sera)))
684 ((= ch ?ኧ) "'") ; Only this has two vowel letters.
687 (if (memq (preceding-char) '(?e ?u ?i ?a ?o ?E ?I ?A ?'))
689 (buffer-substring (point-min) (point-max)))))))
696 (defun ethio-replace-space (ch begin end)
697 "Replace ASCII spaces with Ethiopic word separators in the region.
699 In the specified region, replace word separators surrounded by two
700 Ethiopic characters, depending on the first argument CH, which should
703 If CH = 1, word separator will be replaced with an ASCII space.
704 If CH = 2, with two ASCII spaces.
705 If CH = 3, with the Ethiopic colon-like word separator.
707 The 2nd and 3rd arguments BEGIN and END specify the region."
709 (interactive "*cReplace spaces to: 1 (sg col), 2 (dbl col), 3 (Ethiopic)\nr")
710 (if (not (memq ch '(?1 ?2 ?3)))
714 (narrow-to-region begin end)
718 ;; an Ethiopic word separator --> an ASCII space
719 (goto-char (point-min))
720 (while (search-forward "፡" nil t)
723 ;; two ASCII spaces between Ethiopic characters --> an ASCII space
724 (goto-char (point-min))
725 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
726 (replace-match "\\1 \\2")
730 ;; An Ethiopic word separator --> two ASCII spaces
731 (goto-char (point-min))
732 (while (search-forward "፡" nil t)
735 ;; An ASCII space between Ethiopic characters --> two ASCII spaces
736 (goto-char (point-min))
737 (while (re-search-forward "\\(\\ce\\) \\(\\ce\\)" nil t)
738 (replace-match "\\1 \\2")
742 ;; One or two ASCII spaces between Ethiopic characters
743 ;; --> An Ethiopic word separator
744 (goto-char (point-min))
745 (while (re-search-forward "\\(\\ce\\) ?\\(\\ce\\)" nil t)
746 (replace-match "\\1፡\\2")
749 ;; Three or more ASCII spaces between Ethiopic characters
750 ;; --> An Ethiopic word separator + (N - 2) ASCII spaces
751 (goto-char (point-min))
752 (while (re-search-forward "\\(\\ce\\) \\( +\\ce\\)" nil t)
753 (replace-match "\\1፡\\2")
754 (forward-char -1)))))))
760 ;; This function is deprecated.
762 (defun ethio-input-special-character (arg)
763 "This function is deprecated."
764 (interactive "*cInput number: 1. 2. 3. 4. 5.")
784 (defun ethio-fidel-to-tex-buffer nil
785 "Convert each fidel characters in the current buffer into a fidel-tex command."
787 (let ((buffer-read-only nil)
790 ;; Special treatment for geminated characters.
791 ;; Geminated characters la", etc. change into \geminateG{\laG}, etc.
792 (goto-char (point-min))
793 (while (search-forward "" nil t)
794 (setq comp (find-composition (match-beginning 0)))
796 (replace-match "\\\\geminateG{}" t)
797 (decompose-region (car comp) (cadr comp))
800 (insert "\\geminateG{")
804 ;; Special Ethiopic punctuations.
805 (goto-char (point-min))
806 (while (re-search-forward "\\ce[»\\.\\?]\\|«\\ce" nil t)
808 ((= (setq ch (preceding-char)) ?\»)
810 (insert "\\rquoteG"))
823 ;; Ethiopic characters to TeX macros
824 (robin-invert-region (point-min) (point-max) "ethiopic-tex")
826 (goto-char (point-min))
827 (set-buffer-modified-p nil)))
830 (defun ethio-tex-to-fidel-buffer nil
831 "Convert fidel-tex commands in the current buffer into fidel chars."
833 (let ((buffer-read-only nil)
836 ;; TeX macros to Ethiopic characters
837 (robin-convert-region (point-min) (point-max) "ethiopic-tex")
839 ;; compose geminated characters
840 (goto-char (point-min))
841 (while (re-search-forward "\\\\geminateG{\\(\\ce?\\)}" nil t)
842 (replace-match "\\1"))
844 ;; remove redundant braces, if any
845 (goto-char (point-min))
846 (while (re-search-forward "{\\(\\ce\\)}" nil t)
847 (replace-match "\\1"))
849 (goto-char (point-min))
850 (set-buffer-modified-p nil)))
857 (defun ethio-fidel-to-java-buffer nil
858 "Convert Ethiopic characters into the Java escape sequences.
860 Each escape sequence is of the form \uXXXX, where XXXX is the
861 character's codepoint (in hex) in Unicode.
863 If `ethio-java-save-lowercase' is non-nil, use [0-9a-f].
864 Otherwise, [0-9A-F]."
867 (goto-char (point-min))
868 (while (re-search-forward "[ሀ-፼]" nil t)
869 (setq ucode (preceding-char))
870 (delete-backward-char 1)
872 (format (if ethio-java-save-lowercase "\\u%4x" "\\u%4X")
876 (defun ethio-java-to-fidel-buffer nil
877 "Convert the Java escape sequences into corresponding Ethiopic characters."
878 (let ((case-fold-search t)
880 (goto-char (point-min))
881 (while (re-search-forward "\\\\u\\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\\)" nil t)
882 (setq ucode (read (concat "#x" (match-string 1))))
883 (when (and (>= ucode #x1200) (<= ucode #x137f))
884 (replace-match (char-to-string ucode))))))
891 (defun ethio-find-file nil
892 "Transliterate file content into Ethiopic dependig on filename suffix."
895 ((string-match "\\.sera$" (buffer-file-name))
897 (ethio-sera-to-fidel-buffer nil 'force)
898 (set-buffer-modified-p nil)))
900 ((string-match "\\.html$" (buffer-file-name))
901 (let ((ethio-sera-being-called-by-w3 t))
903 (ethio-sera-to-fidel-marker 'force)
904 (goto-char (point-min))
905 (while (re-search-forward "&[lr]aquo;" nil t)
906 (if (= (char-after (1+ (match-beginning 0))) ?l)
908 (replace-match "»")))
909 (set-buffer-modified-p nil))))
911 ((string-match "\\.tex$" (buffer-file-name))
913 (ethio-tex-to-fidel-buffer)
914 (set-buffer-modified-p nil)))
916 ((string-match "\\.java$" (buffer-file-name))
918 (ethio-java-to-fidel-buffer)
919 (set-buffer-modified-p nil)))
925 (defun ethio-write-file nil
926 "Transliterate Ethiopic characters in ASCII depending on the file extension."
929 ((string-match "\\.sera$" (buffer-file-name))
931 (ethio-fidel-to-sera-buffer nil 'force)
932 (goto-char (point-min))
933 (ethio-record-user-preference)
934 (set-buffer-modified-p nil)))
936 ((string-match "\\.html$" (buffer-file-name))
938 (let ((ethio-sera-being-called-by-w3 t))
939 (ethio-fidel-to-sera-marker 'force)
940 (goto-char (point-min))
941 (while (re-search-forward "[«»]" nil t)
942 (replace-match (if (= (preceding-char) ?«) "«" "»")))
943 (goto-char (point-min))
944 (if (search-forward "<sera>" nil t)
945 (ethio-record-user-preference))
946 (set-buffer-modified-p nil))))
948 ((string-match "\\.tex$" (buffer-file-name))
950 (ethio-fidel-to-tex-buffer)
951 (set-buffer-modified-p nil)))
953 ((string-match "\\.java$" (buffer-file-name))
955 (ethio-fidel-to-java-buffer)
956 (set-buffer-modified-p nil)))
961 (defun ethio-record-user-preference nil
962 (insert (if ethio-use-colon-for-colon "\\~-: " "\\~`: ")
963 (if ethio-use-three-dot-question "\\~`| " "\\~? ")))
966 ;; Ethiopic word separator vs. ASCII space
969 (defvar ethio-prefer-ascii-space t)
970 (make-variable-buffer-local 'ethio-prefer-ascii-space)
972 (defun ethio-toggle-space nil
973 "Toggle ASCII space and Ethiopic separator for keyboard input."
975 (setq ethio-prefer-ascii-space
976 (not ethio-prefer-ascii-space)))
978 (defun ethio-insert-space (arg)
979 "Insert ASCII spaces or Ethiopic word separators depending on context.
981 If the current word separator (indicated in mode-line) is the ASCII space,
982 insert an ASCII space. With ARG, insert that many ASCII spaces.
984 If the current word separator is the colon-like Ethiopic word
985 separator and the point is preceded by `an Ethiopic punctuation mark
986 followed by zero or more ASCII spaces', then insert also an ASCII
987 space. With ARG, insert that many ASCII spaces.
989 Otherwise, insert a colon-like Ethiopic word separator. With ARG, insert that
990 many Ethiopic word separators."
994 (ethio-prefer-ascii-space
995 (insert-char 32 arg))
997 (skip-chars-backward " ")
998 (memq (preceding-char)
999 '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ?)))
1000 (insert-char 32 arg))
1002 (insert-char ?፡ arg))))
1005 (defun ethio-insert-ethio-space (arg)
1006 "Insert the Ethiopic word delimiter (the colon-like character).
1007 With ARG, insert that many delimiters."
1009 (insert-char ?፡ arg))
1016 (defun ethio-composition-function (pos &optional string)
1018 (let ((pattern "\\ce"))
1021 (eq (string-match pattern string pos) pos))
1022 (prog1 (match-end 0)
1023 (compose-string string pos (match-end 0))))
1024 (if (>= pos (point-min))
1027 (if (looking-at pattern)
1028 (prog1 (match-end 0)
1029 (compose-region pos (match-end 0)))))))))
1031 ;; This function is not used any more.
1032 (defun ethio-gemination nil
1033 "Compose the character before the point with the Ethiopic gemination mark.
1034 If the character is already composed, decompose it and remove the gemination
1037 (let ((ch (preceding-char)))
1039 ((and (= ch ?) (find-composition (1- (point))))
1040 (decompose-region (- (point) 2) (point)))
1041 ((and (>= ch #x1200) (<= ch #x137f))
1043 (compose-region (- (point) 2) (point)))
1051 (robin-define-package "ethiopic-sera"
1052 "SERA transliteration system for Ethiopic."
1058 ("hE" ?ሄ) ("hee" "ሄ")
1062 ("le" ?ለ) ("Le" "ለ")
1063 ("lu" ?ሉ) ("Lu" "ሉ")
1064 ("li" ?ሊ) ("Li" "ሊ")
1065 ("la" ?ላ) ("La" "ላ")
1066 ("lE" ?ሌ) ("LE" "ሌ") ("lee" "ሌ") ("Lee" "ሌ")
1068 ("lo" ?ሎ) ("Lo" "ሎ")
1069 ("lWa" ?ሏ) ("LWa" "ሏ") ("lW" "ሏ") ("LW" "ሏ")
1075 ("HE" ?ሔ) ("Hee" "ሔ")
1078 ("HWa" ?ሗ) ("HW" "ሗ")
1080 ("me" ?መ) ("Me" "መ")
1081 ("mu" ?ሙ) ("Mu" "ሙ")
1082 ("mi" ?ሚ) ("Mi" "ሚ")
1083 ("ma" ?ማ) ("Ma" "ማ")
1084 ("mE" ?ሜ) ("ME" "ሜ") ("mee" "ሜ") ("Mee" "ሜ")
1086 ("mo" ?ሞ) ("Mo" "ሞ")
1087 ("mWa" ?ሟ) ("MWa" "ሟ") ("mW" "ሟ") ("MW" "ሟ")
1089 ("`se" ?ሠ) ("sse" "ሠ") ("s2e" "ሠ")
1090 ("`su" ?ሡ) ("ssu" "ሡ") ("s2u" "ሡ")
1091 ("`si" ?ሢ) ("ssi" "ሢ") ("s2i" "ሢ")
1092 ("`sa" ?ሣ) ("ssa" "ሣ") ("s2a" "ሣ")
1093 ("`sE" ?ሤ) ("ssE" "ሤ") ("s2E" "ሤ")
1094 ("`see" "ሤ") ("ssee" "ሤ") ("s2ee" "ሤ")
1095 ("`s" ?ሥ) ("ss" "ሥ") ("s2" "ሥ")
1096 ("`so" ?ሦ) ("sso" "ሦ") ("s2o" "ሦ")
1097 ("`sWa" ?ሧ) ("ssWa" "ሧ") ("s2Wa" "ሧ")
1098 ("`sW" "ሧ") ("ssW" "ሧ") ("s2W" "ሧ")
1100 ("re" ?ረ) ("Re" "ረ")
1101 ("ru" ?ሩ) ("Ru" "ሩ")
1102 ("ri" ?ሪ) ("Ri" "ሪ")
1103 ("ra" ?ራ) ("Ra" "ራ")
1104 ("rE" ?ሬ) ("RE" "ሬ") ("ree" "ሬ") ("Ree" "ሬ")
1106 ("ro" ?ሮ) ("Ro" "ሮ")
1107 ("rWa" ?ሯ) ("RWa" "ሯ") ("rW" "ሯ") ("RW" "ሯ")
1113 ("sE" ?ሴ) ("see" "ሴ")
1116 ("sWa" ?ሷ) ("sW" "ሷ")
1122 ("xE" ?ሼ) ("xee" "ሼ")
1125 ("xWa" ?ሿ) ("xW" "ሿ")
1131 ("qE" ?ቄ) ("qee" "ቄ")
1136 ("qWa" ?ቋ) ("qW" "ቋ")
1137 ("qWE" ?ቌ) ("qWee" "ቌ")
1138 ("qW'" ?ቍ) ("qWu" "ቍ")
1144 ("QE" ?ቔ) ("Qee" "ቔ")
1149 ("QWa" ?ቛ) ("QW" "ቛ")
1150 ("QWE" ?ቜ) ("QWee" "ቜ")
1151 ("QW'" ?ቝ) ("QWu" "ቝ")
1153 ("be" ?በ) ("Be" "በ")
1154 ("bu" ?ቡ) ("Bu" "ቡ")
1155 ("bi" ?ቢ) ("Bi" "ቢ")
1156 ("ba" ?ባ) ("Ba" "ባ")
1157 ("bE" ?ቤ) ("BE" "ቤ") ("bee" "ቤ") ("Bee" "ቤ")
1159 ("bo" ?ቦ) ("Bo" "ቦ")
1160 ("bWa" ?ቧ) ("BWa" "ቧ") ("bW" "ቧ") ("BW" "ቧ")
1162 ("ve" ?ቨ) ("Ve" "ቨ")
1163 ("vu" ?ቩ) ("Vu" "ቩ")
1164 ("vi" ?ቪ) ("Vi" "ቪ")
1165 ("va" ?ቫ) ("Va" "ቫ")
1166 ("vE" ?ቬ) ("VE" "ቬ") ("vee" "ቬ") ("Vee" "ቬ")
1168 ("vo" ?ቮ) ("Vo" "ቮ")
1169 ("vWa" ?ቯ) ("VWa" "ቯ") ("vW" "ቯ") ("VW" "ቯ")
1175 ("tE" ?ቴ) ("tee" "ቴ")
1178 ("tWa" ?ቷ) ("tW" "ቷ")
1184 ("cE" ?ቼ) ("cee" "ቼ")
1187 ("cWa" ?ቿ) ("cW" "ቿ")
1189 ("`he" ?ኀ) ("hhe" "ኀ") ("h2e" "ኀ")
1190 ("`hu" ?ኁ) ("hhu" "ኁ") ("h2u" "ኁ")
1191 ("`hi" ?ኂ) ("hhi" "ኂ") ("h2i" "ኂ")
1192 ("`ha" ?ኃ) ("hha" "ኃ") ("h2a" "ኃ")
1193 ("`hE" ?ኄ) ("hhE" "ኄ") ("h2E" "ኄ")
1194 ("`hee" "ኄ") ("hhee" "ኄ") ("h2ee" "ኄ")
1195 ("`h" ?ኅ) ("hh" "ኅ") ("h2" "ኅ")
1196 ("`ho" ?ኆ) ("hho" "ኆ") ("h2o" "ኆ")
1197 ("`hWe" ?ኈ) ("hhWe" "ኈ") ("h2We" "ኈ") ("hWe" "ኈ")
1198 ("`hWi" ?ኊ) ("hhWi" "ኊ") ("h2Wi" "ኊ") ("hWi" "ኊ")
1199 ("`hWa" ?ኋ) ("hhWa" "ኋ") ("h2Wa" "ኋ") ("hWa" "ኋ")
1200 ("`hW" "ኋ") ("hhW" "ኋ") ("h2W" "ኋ")
1201 ("`hWE" ?ኌ) ("hhWE" "ኌ") ("h2WE" "ኌ") ("hWE" "ኌ")
1202 ("`hWee" "ኌ") ("hhWee" "ኌ") ("h2Wee" "ኌ") ("hWee" "ኌ")
1203 ("`hW'" ?ኍ) ("hhW'" "ኍ") ("h2W'" "ኍ") ("hW'" "ኍ")
1204 ("`hWu" "ኍ") ("hhWu" "ኍ") ("h2Wu" "ኍ") ("hWu" "ኍ")
1210 ("nE" ?ኔ) ("nee" "ኔ")
1213 ("nWa" ?ኗ) ("nW" "ኗ")
1219 ("NE" ?ኜ) ("Nee" "ኜ")
1222 ("NWa" ?ኟ) ("NW" "ኟ")
1225 ("'u" ?ኡ) ("u" "ኡ") ("'U" "ኡ") ("U" "ኡ")
1229 ("'I" ?እ) ("I" "እ") ("'e" "እ") ("e" "እ")
1230 ("'o" ?ኦ) ("o" "ኦ") ("'O" "ኦ") ("O" "ኦ")
1231 ("'ea" ?ኧ) ("ea" "ኧ")
1237 ("kE" ?ኬ) ("kee" "ኬ")
1242 ("kWa" ?ኳ) ("kW" "ኳ")
1243 ("kWE" ?ኴ) ("kWee" "ኴ")
1244 ("kW'" ?ኵ) ("kWu" "ኵ")
1250 ("KE" ?ኼ) ("Kee" "ኼ")
1255 ("KWa" ?ዃ) ("KW" "ዃ")
1256 ("KWE" ?ዄ) ("KWee" "ዄ")
1257 ("KW'" ?ዅ) ("KWu" "ዅ")
1263 ("wE" ?ዌ) ("wee" "ዌ")
1267 ("`e" ?ዐ) ("ae" "ዐ") ("aaa" "ዐ") ("e2" "ዐ")
1268 ("`u" ?ዑ) ("uu" "ዑ") ("u2" "ዑ") ("`U" "ዑ") ("UU" "ዑ") ("U2" "ዑ")
1269 ("`i" ?ዒ) ("ii" "ዒ") ("i2" "ዒ")
1270 ("`a" ?ዓ) ("aa" "ዓ") ("a2" "ዓ") ("`A" "ዓ") ("AA" "ዓ") ("A2" "ዓ")
1271 ("`E" ?ዔ) ("EE" "ዔ") ("E2" "ዔ")
1272 ("`I" ?ዕ) ("II" "ዕ") ("I2" "ዕ") ("ee" "ዕ")
1273 ("`o" ?ዖ) ("oo" "ዖ") ("o2" "ዖ") ("`O" "ዖ") ("OO" "ዖ") ("O2" "ዖ")
1279 ("zE" ?ዜ) ("zee" "ዜ")
1282 ("zWa" ?ዟ) ("zW" "ዟ")
1288 ("ZE" ?ዤ) ("Zee" "ዤ")
1291 ("ZWa" ?ዧ) ("ZW" "ዧ")
1293 ("ye" ?የ) ("Ye" "የ")
1294 ("yu" ?ዩ) ("Yu" "ዩ")
1295 ("yi" ?ዪ) ("Yi" "ዪ")
1296 ("ya" ?ያ) ("Ya" "ያ")
1297 ("yE" ?ዬ) ("YE" "ዬ") ("yee" "ዬ") ("Yee" "ዬ")
1299 ("yo" ?ዮ) ("Yo" "ዮ")
1305 ("dE" ?ዴ) ("dee" "ዴ")
1308 ("dWa" ?ዷ) ("dW" "ዷ")
1314 ("DE" ?ዼ) ("Dee" "ዼ")
1317 ("DWa" ?ዿ) ("DW" "ዿ")
1319 ("je" ?ጀ) ("Je" "ጀ")
1320 ("ju" ?ጁ) ("Ju" "ጁ")
1321 ("ji" ?ጂ) ("Ji" "ጂ")
1322 ("ja" ?ጃ) ("Ja" "ጃ")
1323 ("jE" ?ጄ) ("JE" "ጄ") ("jee" "ጄ") ("Jee" "ጄ")
1325 ("jo" ?ጆ) ("Jo" "ጆ")
1326 ("jWa" ?ጇ) ("jW" "ጇ") ("JWa" "ጇ") ("JW" "ጇ")
1332 ("gE" ?ጌ) ("gee" "ጌ")
1337 ("gWa" ?ጓ) ("gW" "ጓ")
1338 ("gWE" ?ጔ) ("gWee" "ጔ")
1339 ("gW'" ?ጕ) ("gWu" "ጕ")
1345 ("GE" ?ጜ) ("Gee" "ጜ")
1353 ("TE" ?ጤ) ("Tee" "ጤ")
1356 ("TWa" ?ጧ) ("TW" "ጧ")
1362 ("CE" ?ጬ) ("Cee" "ጬ")
1365 ("CWa" ?ጯ) ("CW" "ጯ")
1371 ("PE" ?ጴ) ("Pee" "ጴ")
1374 ("PWa" ?ጷ) ("PW" "ጷ")
1380 ("SE" ?ጼ) ("See" "ጼ")
1383 ("SWa" ?ጿ) ("`SWa" "ጿ") ("SSWa" "ጿ") ("S2Wa" "ጿ")
1384 ("SW" "ጿ") ("`SW" "ጿ") ("SSW" "ጿ") ("S2W" "ጿ")
1386 ("`Se" ?ፀ) ("SSe" "ፀ") ("S2e" "ፀ")
1387 ("`Su" ?ፁ) ("SSu" "ፁ") ("S2u" "ፁ")
1388 ("`Si" ?ፂ) ("SSi" "ፂ") ("S2i" "ፂ")
1389 ("`Sa" ?ፃ) ("SSa" "ፃ") ("S2a" "ፃ")
1390 ("`SE" ?ፄ) ("SSE" "ፄ") ("S2E" "ፄ")
1391 ("`See" "ፄ") ("SSee" "ፄ") ("S2ee" "ፄ")
1392 ("`S" ?ፅ) ("SS" "ፅ") ("S2" "ፅ")
1393 ("`So" ?ፆ) ("SSo" "ፆ") ("S2o" "ፆ")
1395 ("fe" ?ፈ) ("Fe" "ፈ")
1396 ("fu" ?ፉ) ("Fu" "ፉ")
1397 ("fi" ?ፊ) ("Fi" "ፊ")
1398 ("fa" ?ፋ) ("Fa" "ፋ")
1399 ("fE" ?ፌ) ("FE" "ፌ") ("fee" "ፌ") ("Fee" "ፌ")
1401 ("fo" ?ፎ) ("Fo" "ፎ")
1402 ("fWa" ?ፏ) ("FWa" "ፏ") ("fW" "ፏ") ("FW" "ፏ")
1408 ("pE" ?ፔ) ("pee" "ፔ")
1411 ("pWa" ?ፗ) ("pW" "ፗ")
1413 ("rYa" ?ፘ) ("RYa" "ፘ") ("rY" "ፘ") ("RY" "ፘ")
1414 ("mYa" ?ፙ) ("MYa" "ፙ") ("mY" "ፙ") ("MY" "ፙ")
1415 ("fYa" ?ፚ) ("FYa" "ፚ") ("fY" "ፚ") ("FY" "ፚ")
1417 (" : " ?፡) (":" "፡") ("`:" "፡")
1423 ("`?" ?፧) ("??" "፧")
1424 (":|:" ?፨) ("**" "፨")
1426 ;; Explicit syllable delimiter
1429 ;; Quick ASCII input
1494 (register-input-method
1495 "ethiopic-sera" "Ethiopic"
1496 'robin-use-package "et" "An input method for Ethiopic.")
1498 (robin-define-package "ethiopic-tex"
1499 "TeX transliteration system for Ethiopic."
1501 ("\\heG" ?ሀ) ; U+1200 ..
1518 ("\\HeG" ?ሐ) ; U+1210 ..
1535 ("\\sseG" ?ሠ) ; U+1220 ..
1552 ("\\seG" ?ሰ) ; U+1230 ..
1569 ("\\qeG" ?ቀ) ; U+1240 ..
1586 ("\\QeG" ?ቐ) ; U+1250 ..
1603 ("\\beG" ?በ) ; U+1260 ..
1620 ("\\teG" ?ተ) ; U+1270 ..
1637 ("\\hheG" ?ኀ) ; U+1280 ..
1654 ("\\neG" ?ነ) ; U+1290 ..
1671 ("\\eG" ?አ) ; U+12A0 ..
1688 ("\\kWeG" ?ኰ) ; U+12B0 ..
1705 ("\\KWeG" ?ዀ) ; U+12C0 ..
1722 ("\\eeG" ?ዐ) ; U+12D0 ..
1739 ("\\ZeG" ?ዠ) ; U+12E0 ..
1756 ("\\deG" ?ደ) ; U+12F0 ..
1773 ("\\jeG" ?ጀ) ; U+1300 ..
1790 ("\\gWeG" ?ጐ) ; U+1310 ..
1807 ("\\TeG" ?ጠ) ; U+1320 ..
1824 ("\\PeG" ?ጰ) ; U+1330 ..
1841 ("\\SSeG" ?ፀ) ; U+1340 ..
1858 ("\\peG" ?ፐ) ; U+1350 ..
1875 ;; reserved ; U+1360 ..
1892 ("\\smntG" ?፰) ; U+1370 ..
1910 ;; private extension
1913 ("\\yWaG" ?) ; U+1A00EF (was U+12EF)
1915 ("\\GWaG" ?) ; U+1A011F (was U+131F)
1917 ("\\qqeG" ?) ; U+1A0180 .. (was U+1380 ..)
1934 ("\\kkeG" ?) ; U+1A0190 .. (was U+1390 ..)
1951 ("\\XeG" ?) ; U+1A01A0 .. (was U+13A0 ..)
1968 ("\\ggeG" ?) ; U+1A01B0 .. (was U+13B0 ..)
1985 ("\\ornamentG" ?) ; U+1A01C0 .. (was U+FDF0 ..)
1995 ;; Gemination () is handled in a special way.
1998 ;; Assign reverse conversion to Fidel chars.
1999 ;; Then override forward conversion with ASCII chars.
2000 ;; ASCII chars should not have reverse conversions.
2001 ("\\dotG" ?) ("\\dotG" ".")
2002 ("\\lquoteG" ?) ("\\lquoteG" "«")
2003 ("\\rquoteG" ?) ("\\rquoteG" "»")
2004 ("\\qmarkG" ?) ("\\qmarkG" "?")
2007 ;; The ethiopic-tex package is not used for keyboard input, therefore
2008 ;; not registered with the register-input-method function.
2010 (provide 'ethio-util)
2012 ;;; ethio-util.el ends here
2014 ;;; arch-tag: c8feb3d6-39bf-4b0a-b6ef-26f03fbc8140
2015 ;;; ethio-util.el ends here