X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/51f32106f2de16e15de19e15ae714ff7f3e82b22..8a60950d6ee97101f282989169ba3972c7cb487d:/lisp/international/titdic-cnv.el diff --git a/lisp/international/titdic-cnv.el b/lisp/international/titdic-cnv.el index 004dc7b8b8..759df5fd94 100644 --- a/lisp/international/titdic-cnv.el +++ b/lisp/international/titdic-cnv.el @@ -1,4 +1,4 @@ -;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package +;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. ;; Licensed to the Free Software Foundation. @@ -22,7 +22,7 @@ ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;; Boston, MA 02111-1307, USA. -;;; Comments: +;;; Commentary: ;; Convert cxterm dictionary (of TIT format) to quail-package. ;; @@ -73,7 +73,7 @@ ;; dictionary. The extra docstring in this alist is to add more ;; information. ;; The command describe-input-method shows the automatically generated -;; docstring, then an extra docstrings while replacing the form \ +;; docstring, then an extra docstring while replacing the form \ ;; by the value of variable VAR. For instance, the form ;; \ is replaced by a description about ;; how to select a translation from a list of candidates. @@ -84,7 +84,7 @@ ("chinese-ccdospy" "$AKuF4(B" "Pinyin base input method for Chinese charset GB2312 \(`chinese-gb2312'). -Pinyin is the standared roman transliteration method for Chinese. +Pinyin is the standard Roman transliteration method for Chinese. For the detail of Pinyin system, see the documentation of the input method `chinese-py'. @@ -92,7 +92,7 @@ This input method works almost the same way as `chinese-py'. The difference is that you type a single key for these Pinyin spelling. Pinyin: zh en eng ang ch an ao ai ong sh ing yu($A(9(B) keyseq: a f g h i j k l s u y v -For expample: +For example: Chinese: $A0!(B $A9{(B $AVP(B $AND(B $A9b(B $ASq(B $AH+(B Pinyin: a guo zhong wen guang yu quan Keyseq: a1 guo4 as1 wf4 guh1 yu..6 qvj6 @@ -102,10 +102,6 @@ For expample: For double-width GB2312 characters correponding to ASCII, use the input method `chinese-qj'.") - ("chinese-ctlau" "$AAuTA(B") - - ("chinese-ctlaub" "$(0N,Gn(B") - ("chinese-ecdict" "$(05CKH(B" "In this input method, you enter a Chinese (Big5) charactere or word by typing the corresponding English word. For example, if you type @@ -167,17 +163,17 @@ and radicals is as below: first radical: a b c d e f g h i j k l m n o p q r s t u v w x y z - $APD(B $AZ"(B $AJ,(B $AX<(B $A;p(B $A?Z(B $A^P(B $Ac_(B $AZ%(B $A\3(B $AXi(B $AD>(B $Alj(B $Ab;(B $ATB(B $Afy(B $AJ/(B $AMu(B $A0K(B $AX/(B $AHU(B $AeA(B $Aak(B $AVq(B $AR;(B $AHK(B + $APD(B $AZ"(B $AJ,(B $AX<(B $A;p(B $A?Z(B $A^P(B $Ac_(B $AZ%(B $A\3(B $AXi(B $AD>(B $Alj(B $Ab;(B $ATB(B $Afy(B $AJ/(B $AMu(B $A0K(B $AX/(B $AHU(B $AeA(B $Aak(B $AVq(B $AR;(B $AHK(B last radical: a b c d e f g h i j k l m n o p q r s t u v w x y z - $ASV(B $AI=(B $AMA(B $A56(B $AZb(B $A?Z(B $ARB(B $Aqb(B $A4s(B $A6!(B $A[L(B $Ala(B $AJ.(B $A4u(B $AXg(B $ACE(B $A=q(B $AX-(B $AE.(B $ARR(B $A`m(B $AP!(B $A3'(B $A3f(B $A_.(B $A27(B + $ASV(B $AI=(B $AMA(B $A56(B $AZb(B $A?Z(B $ARB(B $Aqb(B $A4s(B $A6!(B $A[L(B $Ala(B $AJ.(B $A4u(B $AXg(B $ACE(B $A=q(B $AX-(B $AE.(B $ARR(B $A`m(B $AP!(B $A3'(B $A3f(B $A_.(B $A27(B \\") ("chinese-tonepy" "$A5wF4(B" "Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312'). -Pinyin is the standared roman transliteration method for Chinese. +Pinyin is the standard roman transliteration method for Chinese. For the details of Pinyin system, see the documentation of the input method `chinese-py'. @@ -210,7 +206,7 @@ SPC, 6, 3, 4, or 7 specifing a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy ;; Return a value of the key in the current line. (defsubst tit-read-key-value () - (if (looking-at "[^ \t\n]+") + (if (looking-at "[^ \t\r\n]+") (car (read-from-string (concat "\"" (match-string 0) "\""))))) ;; Return an appropriate quail-package filename from FILENAME (TIT @@ -221,7 +217,7 @@ SPC, 6, 3, 4, or 7 specifing a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy dirname)) ;; This value is nil if we are processing phrase dictionary. -(defconst tit-dictionary t) +(defvar tit-dictionary t) (defvar tit-encode nil) (defvar tit-default-encode "GB") @@ -271,8 +267,8 @@ SPC, 6, 3, 4, or 7 specifing a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy (tit-keyprompt nil)) (princ ";; Quail package `") - (princ package) - (princ "' generated by the command `titdic-convert'\n;;\tDate: ") + (princ package) (princ "' -*- coding:iso-2022-7bit; -*-\n") + (princ ";; Generated by the command `titdic-convert'\n;;\tDate: ") (princ (current-time-string)) (princ "\n;;\tOriginal TIT dictionary file: ") (princ (file-name-nondirectory filename)) @@ -335,7 +331,7 @@ SPC, 6, 3, 4, or 7 specifing a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy (princ (buffer-substring pos (point))) (princ "\n") (forward-line 1))) - + (princ "\n;;; End of the header of original TIT dictionary.\n\n") (princ ";;; Code:\n\n(require 'quail)\n\n") @@ -469,50 +465,54 @@ SPC, 6, 3, 4, or 7 specifing a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy Optional argument DIRNAME if specified is the directory name under which the generated Quail package is saved." (interactive "FTIT dictionary file: ") - (with-temp-file (tit-make-quail-package-file-name filename dirname) - (set-buffer-file-coding-system 'iso-2022-7bit) - (let ((standard-output (current-buffer))) - (with-temp-buffer - (let ((coding-system-for-read 'no-conversion)) - (insert-file-contents (expand-file-name filename))) - (set-buffer-multibyte t) - - ;; Decode the buffer contents from the encoding specified by a - ;; value of the key "ENCODE:". - (if (not (search-forward "\nBEGIN" nil t)) - (error "TIT dictionary doesn't have body part")) - (let ((limit (point)) - coding-system slot) + (let ((coding-system-for-write 'iso-2022-7bit)) + (with-temp-file (tit-make-quail-package-file-name filename dirname) + (set-buffer-file-coding-system 'iso-2022-7bit) + (let ((standard-output (current-buffer))) + (with-temp-buffer + (set-buffer-multibyte nil) + (let ((coding-system-for-read 'no-conversion)) + (insert-file-contents (expand-file-name filename))) + + ;; Decode the buffer contents from the encoding specified by a + ;; value of the key "ENCODE:". + (if (not (search-forward "\nBEGIN" nil t)) + (error "TIT dictionary doesn't have body part")) + (let ((limit (point)) + coding-system slot) + (goto-char (point-min)) + (if (re-search-forward "^ENCODE:[ \t]*" limit t) + (progn + (goto-char (match-end 0)) + (setq tit-encode (tit-read-key-value))) + (setq tit-encode tit-default-encode)) + (setq slot (assoc tit-encode tit-encode-list)) + (if (not slot) + (error "Invalid ENCODE: value in TIT dictionary")) + (setq coding-system (nth 1 slot)) + (message "Decoding with coding system %s..." coding-system) + (goto-char (point-min)) + (decode-coding-region (point-min) (point-max) coding-system)) + + ;; Set point the starting position of the body part. (goto-char (point-min)) - (if (re-search-forward "^ENCODE:[ \t]*" limit t) - (progn - (goto-char (match-end 0)) - (setq tit-encode (tit-read-key-value))) - (setq tit-encode tit-default-encode)) - (setq slot (assoc tit-encode tit-encode-list)) - (if (not slot) - (error "Invalid ENCODE: value in TIT dictionary")) - (setq coding-system (nth 1 slot)) - (message "Decoding with coding system %s..." coding-system) - (goto-char (point-min)) - (decode-coding-region (point-min) (point-max) coding-system)) - - ;; Set point the starting position of the body part. - (goto-char (point-min)) - (if (not (search-forward "\nBEGIN" nil t)) - (error "TIT dictionary can't be decoded correctly")) + (if (not (search-forward "\nBEGIN" nil t)) + (error "TIT dictionary can't be decoded correctly")) - ;; Process the header part. - (forward-line 1) - (narrow-to-region (point-min) (point)) - (tit-process-header filename) - (widen) + ;; Process the header part in multibyte mode. + (with-current-buffer standard-output + (set-buffer-multibyte t)) + (set-buffer-multibyte t) + (forward-line 1) + (narrow-to-region (point-min) (point)) + (tit-process-header filename) + (widen) - ;; Process the body part. For speed, we turn off multibyte facility. - (with-current-buffer standard-output - (set-buffer-multibyte nil)) - (set-buffer-multibyte nil) - (tit-process-body))))) + ;; Process the body part. For speed, we turn off multibyte facility. + (with-current-buffer standard-output + (set-buffer-multibyte nil)) + (set-buffer-multibyte nil) + (tit-process-body)))))) ;;;###autoload (defun batch-titdic-convert (&optional force) @@ -563,7 +563,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." ;; Alist of input method names and the corresponding information. ;; Each element has this form: ;; (INPUT-METHOD-NAME ;; Name of the input method. -;; INPUT-METHOD-TITLE ;; Title string of the input method +;; INPUT-METHOD-TITLE ;; Title string of the input method ;; DICFILE ;; Name of the source dictionary file. ;; CODING ;; Coding system of the dictionary file. ;; QUAILFILE ;; Name of the Quail package file. @@ -573,7 +573,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." (defvar quail-misc-package-ext-info '(("chinese-b5-tsangchi" "$(06A(BB" - "cangjie-table.b5" big5 "tsang-b5.el" + "cangjie-table.b5" big5 "tsang-b5.el" tsang-b5-converter "\ ;; # Copyright 2001 Christian Wittern @@ -583,7 +583,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." ;; # this notice is preserved.") ("chinese-b5-quick" "$(0X|(BB" - "cangjie-table.b5" big5 "quick-b5.el" + "cangjie-table.b5" big5 "quick-b5.el" quick-b5-converter "\ ;; # Copyright 2001 Christian Wittern @@ -624,21 +624,21 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." ;; ;; ;; Copyright (C) 1999, Rui He, herui@cs.duke.edu -;; -;; +;; +;; ;; CCE(Console Chinese Environment) 0.32 -;; -;; CCE is free software; you can redistribute it and/or modify it under the -;; terms of the GNU General Public License as published by the Free Software -;; Foundation; either version 1, or (at your option) any later version. -;; -;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY -;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -;; details. -;; +;; +;; CCE is free software; you can redistribute it and/or modify it under the +;; terms of the GNU General Public License as published by the Free Software +;; Foundation; either version 1, or (at your option) any later version. +;; +;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +;; details. +;; ;; You should have received a copy of the GNU General Public License along with -;; CCE; see the file COPYING. If not, write to the Free Software Foundation, +;; CCE; see the file COPYING. If not, write to the Free Software Foundation, ;; 675 Mass Ave, Cambridge, MA 02139, USA.") ("chinese-ziranma" "$AWTH;(B" @@ -653,22 +653,74 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." ;; ;; ;; Copyright (C) 1999, Rui He, herui@cs.duke.edu -;; -;; +;; +;; ;; CCE(Console Chinese Environment) 0.32 -;; -;; CCE is free software; you can redistribute it and/or modify it under the -;; terms of the GNU General Public License as published by the Free Software -;; Foundation; either version 1, or (at your option) any later version. -;; -;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY -;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -;; details. -;; +;; +;; CCE is free software; you can redistribute it and/or modify it under the +;; terms of the GNU General Public License as published by the Free Software +;; Foundation; either version 1, or (at your option) any later version. +;; +;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +;; details. +;; ;; You should have received a copy of the GNU General Public License along with -;; CCE; see the file COPYING. If not, write to the Free Software Foundation, +;; CCE; see the file COPYING. If not, write to the Free Software Foundation, ;; 675 Mass Ave, Cambridge, MA 02139, USA.") + + ("chinese-ctlau" "$AAuTA(B" + "CTLau.html" cn-gb-2312 "CTLau.el" + ctlau-gb-converter + "\ +;; \"CTLau.html\" is available at: +;; +;; http://umunhum.stanford.edu/~lee/chicomp/CTLau.html +;; +;; It contains the following copyright notice: +;; +;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu) +;; # +;; # This program is free software; you can redistribute it and/or +;; # modify it under the terms of the GNU General Public License +;; # as published by the Free Software Foundation; either version 2 +;; # of the License, or any later version. +;; # +;; # This program is distributed in the hope that it will be useful, +;; # but WITHOUT ANY WARRANTY; without even the implied warranty of +;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; # GNU General Public License for more details. +;; # +;; # You should have received a copy of the GNU General Public License +;; # along with this program; if not, write to the Free Software Foundation, +;; # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.") + + ("chinese-ctlaub" "$(0N,Gn(B" + "CTLau-b5.html" big5 "CTLau-b5.el" + ctlau-b5-converter + "\ +;; \"CTLau-b5.html\" is available at: +;; +;; http://umunhum.stanford.edu/~lee/chicomp/CTLau-b5.html +;; +;; It contains the following copyright notice: +;; +;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu) +;; # +;; # This program is free software; you can redistribute it and/or +;; # modify it under the terms of the GNU General Public License +;; # as published by the Free Software Foundation; either version 2 +;; # of the License, or any later version. +;; # +;; # This program is distributed in the hope that it will be useful, +;; # but WITHOUT ANY WARRANTY; without even the implied warranty of +;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; # GNU General Public License for more details. +;; # +;; # You should have received a copy of the GNU General Public License +;; # along with this program; if not, write to the Free Software Foundation, +;; # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.") )) ;; Generate a code of a Quail package in the current buffer from Tsang @@ -693,7 +745,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." [Q $(0'D(B] [W $(0(q(B] [E $(0'V(B] [R $(0&H(B] [T $(0'>(B] [Y $(0&4(B] [U $(0&U(B] [I $(0'B(B] [O $(0&*(B] [P $(0'A(B] [A $(0'K(B] [S $(0&T(B] [D $(0'N(B] [F $(0'W(B] [G $(0&I(B] [H $(0*M(B] [J $(0&3(B] [L $(0&d(B] - + [Z ] [X $(0[E(B] [C $(01[(B] [V $(0&M(B] [B $(0'M(B] [N $(0&_(B] [M $(0&"(B] \\\\\"\n" @@ -705,8 +757,8 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." [Q $(GEC(B] [W $(GFp(B] [E $(GEU(B] [R $(GDG(B] [T $(GE=(B] [Y $(GD3(B] [U $(GDT(B] [I $(GEA(B] [O $(GD)(B] [P $(GE@(B] [A $(GEJ(B] [S $(GDS(B] [D $(GEM(B] [F $(GEV(B] [G $(GDH(B] [H $(GHL(B] [J $(GD2(B] [L $(GDc(B] - - [Z ] [X $(GyE(B] [C $(GOZ(B] [V $(GDL(B] [B $(GEL(B] [N $(GD^(B] [M $(GD!(B] + + [Z ] [X $(GyE(B] [C $(GOZ(B] [V $(GDL(B] [B $(GEL(B] [N $(GD^(B] [M $(GD!(B] \\\\\"\n" fulltitle fulltitle))) @@ -716,6 +768,11 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." (insert "(quail-define-rules\n") (save-excursion (set-buffer dicbuf) + ;; Handle double CR line ends, which result when checking out of + ;; CVS on MS-Windows. + (goto-char (point-min)) + (while (re-search-forward "\r\r$" nil t) + (replace-match "")) (goto-char (point-min)) (search-forward "A440") (beginning-of-line) @@ -801,7 +858,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312'). -Pinyin is the standared roman transliteration method for Chinese. +Pinyin is the standard roman transliteration method for Chinese. Pinyin uses a sequence of Latin alphabetic characters for each Chinese character. The sequence is made by the combination of the initials \(the beginning sounds) and finals (the ending sounds). @@ -963,8 +1020,88 @@ To input symbols and punctuations, type `/' followed by one of `a' to (insert (format "(%S %S)\n" (car elt) (cdr elt)))) (insert ")\n"))) +;; Generate the code for a Quail package in the current buffer from a +;; CTLau or CTLau-b5 dictionary in the buffer DICBUF. The input +;; method name of the Quail package is NAME, and the title string is +;; TITLE. DESCRIPTION is the string shown by describe-input-method. + +(defun ctlau-converter (dicbuf name title description) + (goto-char (point-max)) + (insert (format "%S\n" description)) + (insert " '((\"\C-?\" . quail-delete-last-char) + (\".\" . quail-next-translation) + (\">\" . quail-next-translation) + (\",\" . quail-prev-translation) + (\"<\" . quail-prev-translation)) + nil nil nil nil)\n\n") + (insert "(quail-define-rules\n") + (let (dicbuf-start dicbuf-end key-start key (pos (point))) + ;; Find the dictionary, which starts below a horizontal rule and + ;; ends at the second to last line in the HTML file. + (save-excursion + (set-buffer dicbuf) + (goto-char (point-min)) + (search-forward "#\n#
\n") + (setq dicbuf-start (point)) + (goto-char (point-max)) + (forward-line -1) + (setq dicbuf-end (point))) + (insert-buffer-substring dicbuf dicbuf-start dicbuf-end) + ;; CTLau-b5.html contains characters (0xa1 0xbc) which show up as + ;; hollow boxes when the original characters in CTLau.html from + ;; which the file is converted have no Big5 equivalent. Go + ;; through and delete them. + (goto-char pos) + (while (search-forward "$(0!{(B" nil t) + (delete-char -1)) + ;; Uppercase keys in dictionary need to be downcased. Backslashes + ;; at the beginning of keys need to be turned into double + ;; backslashes. + (goto-char pos) + (while (not (eobp)) + (insert "(\"") + (if (char-equal (following-char) ?\\) + (insert "\\")) + (setq key-start (point)) + (skip-chars-forward "\\\\A-Z") + (downcase-region key-start (point)) + (insert "\" \"") + (delete-char 1) + (end-of-line) + (insert "\")") + (forward-line 1))) + (insert ")\n")) + +(defun ctlau-gb-converter (dicbuf name title) + (ctlau-converter dicbuf name title +"$A::WVJdHk!KAuN}OiJ=TARt!K(B + + $AAuN}OiJ=TASoW"Rt7=08(B + Sidney Lau's Cantonese transcription scheme as described in his book + \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972. + This file was prepared by Fung Fung Lee ($A@n7c7e(B). + Originally converted from CTCPS3.tit + Last modified: June 2, 1993. + + Some infrequent GB characters are accessed by typing \\, followed by + the Cantonese romanization of the respective radical ($A2?JW(B).")) + +(defun ctlau-b5-converter (dicbuf name title) + (ctlau-converter dicbuf name title +"$(0KH)tTT&,!(N,Tg>A*#Gn5x!((B + + $(0N,Tg>A*#GnM$0D5x'J7{(B + Sidney Lau's Cantonese transcription scheme as described in his book + \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972. + This file was prepared by Fung Fung Lee ($(0,XFS76(B). + Originally converted from CTCPS3.tit + Last modified: June 2, 1993. + + Some infrequent characters are accessed by typing \\, followed by + the Cantonese romanization of the respective radical ($(0?f5}(B).")) + (defun miscdic-convert (filename &optional dirname) - "Convert a dictionary file FILENAME into a Quail package. + "Convert a dictionary file FILENAME into a Quail package. Optional argument DIRNAME if specified is the directory name under which the generated Quail package is saved." (interactive "FInput method dictionary file: ") @@ -1032,7 +1169,8 @@ to store generated Quail packages." (if (string= (car command-line-args-left) "-dir") (progn (setq command-line-args-left (cdr command-line-args-left)) - (setq dir (car command-line-args-left)))) + (setq dir (car command-line-args-left)) + (setq command-line-args-left (cdr command-line-args-left)))) (setq filename (car command-line-args-left) command-line-args-left (cdr command-line-args-left)) (if (file-directory-p filename) @@ -1041,8 +1179,9 @@ to store generated Quail packages." (miscdic-convert filename dir)))) (kill-emacs 0)) -;;; titdic-cnv.el ends here - ;; Local Variables: ;; coding: iso-2022-7bit ;; End: + +;;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3 +;;; titdic-cnv.el ends here