;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*-
;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;; 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006, 2007, 2008
+;; 2005, 2006, 2007, 2008, 2009, 2010
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H14PRO021
+;; Copyright (C) 2003
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H13PRO009
;; Keywords: Quail, TIT, cxterm
;; This file is part of GNU Emacs.
-;; GNU Emacs is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
(princ ";; Quail package `")
(princ package)
- (princ "' -*- coding:iso-2022-7bit; ")
+ (princ (format "' -*- coding:%s; " coding-system-for-write))
(princ "byte-compile-disable-print-circle:t; -*-\n")
(princ ";; Generated by the command `titdic-convert'\n;;\tDate: ")
(princ (current-time-string))
(cond ((looking-at "COMMENT")
(let ((pos (match-end 0)))
(end-of-line)
- (setq tit-comments (cons (buffer-substring pos (point))
- tit-comments))))))
+ (setq tit-comments
+ (cons (buffer-substring-no-properties pos (point))
+ tit-comments))))))
((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
(cond ((looking-at "MULTICHOICE:[ \t]*")
(goto-char (match-end 0))
tit-keyprompt)))))))
(end-of-line)
(princ ";; ")
- (princ (buffer-substring pos (point)))
+ (princ (buffer-substring-no-properties pos (point)))
(princ "\n")
(forward-line 1)))
(forward-line 1)
(setq pos (point))
(skip-chars-forward "^ \t\n")
- (setq key (buffer-substring pos (point)))
+ (setq key (buffer-substring-no-properties pos (point)))
(skip-chars-forward " \t")
(setq ch (following-char))
(if (or (= ch ?#) (= ch ?\n))
(setq translations
(if translations
(concat translations
- (buffer-substring pos (point)))
- (buffer-substring pos (point)))))
+ (buffer-substring-no-properties pos (point)))
+ (buffer-substring-no-properties pos (point)))))
(while (not (eolp))
(setq pos (point))
(skip-chars-forward "^ \t\n")
- (setq translations (cons (buffer-substring pos (point))
+ (setq translations (cons (buffer-substring-no-properties
+ pos (point))
translations))
(skip-chars-forward " \t")
(setq ch (following-char))
Optional argument DIRNAME if specified is the directory name under which
the generated Quail package is saved."
(interactive "FTIT dictionary file: ")
- (let ((coding-system-for-write 'iso-2022-7bit-unix))
+ (let ((coding-system-for-write nil))
(with-temp-file (tit-make-quail-package-file-name filename dirname)
- ;; Explicitly speficy eol format to `unix'.
- (set-buffer-file-coding-system 'iso-2022-7bit-unix)
(let ((standard-output (current-buffer)))
(with-temp-buffer
(set-buffer-multibyte nil)
(setq coding-system (nth 1 slot))
(message "Decoding with coding system %s..." coding-system)
(goto-char (point-min))
- (decode-coding-region (point-min) (point-max) coding-system))
+ (decode-coding-region (point-min) (point-max) coding-system)
+ ;; Explicitly set eol format to `unix'.
+ (setq coding-system-for-write
+ (coding-system-change-eol-conversion coding-system 'unix))
+ (remove-text-properties (point-min) (point-max) '(charset nil)))
+ (set-buffer-multibyte t)
;; Set point the starting position of the body part.
(goto-char (point-min))
(if (not (search-forward "\nBEGIN" nil t))
(error "TIT dictionary can't be decoded correctly"))
- ;; Process the header part in multibyte mode.
- (with-current-buffer standard-output
- (set-buffer-multibyte t))
- (set-buffer-multibyte t)
+ ;; Process the header part.
(forward-line 1)
(narrow-to-region (point-min) (point))
(tit-process-header filename)
(widen)
- ;; Process the body part. For speed, we turn off multibyte facility.
- (with-current-buffer standard-output
- (set-buffer-multibyte nil))
- (set-buffer-multibyte nil)
+ ;; Process the body part
(tit-process-body))))))
;;;###autoload
(\",\" . quail-prev-translation-block))
nil nil)\n\n")
(insert "(quail-define-rules\n")
- (save-excursion
- (set-buffer dicbuf)
+ (with-current-buffer dicbuf
;; Handle double CR line ends, which result when checking out of
;; CVS on MS-Windows.
(goto-char (point-min))
- (while (re-search-forward "\r\r$" nil t)
- (replace-match ""))
- (goto-char (point-min))
(search-forward "A440")
(beginning-of-line)
(let ((table (make-hash-table :test 'equal))
(forward-char 5)
(let ((trans (char-to-string (following-char)))
key slot)
- (re-search-forward "[A-Z]+$" nil t)
+ (re-search-forward "\\([A-Z]+\\)\r*$" nil t)
(setq key (downcase
(if (or tsang-p
- (<= (- (match-end 0) (match-beginning 0)) 1))
- (match-string 0)
- (string (char-after (match-beginning 0))
- (char-after (1- (match-end 0)))))))
+ (<= (- (match-end 1) (match-beginning 1)) 1))
+ (match-string 1)
+ (string (char-after (match-beginning 1))
+ (char-after (1- (match-end 1)))))))
(setq val (gethash key table))
(if val (setq trans (concat val trans)))
(puthash key trans table)
nil nil nil nil)\n\n")
(insert "(quail-define-rules\n")
(let ((pos (point)))
- (insert-buffer-substring dicbuf)
+ (insert-buffer-substring-no-properties dicbuf)
(goto-char pos)
(re-search-forward "^[a-z]")
(beginning-of-line)
(insert "\" \"")
(delete-char 1)
(end-of-line)
+ (while (= (preceding-char) ?\r)
+ (delete-char -1))
(insert "\")")
(forward-line 1)))
(insert ")\n"))
(defun ziranma-converter (dicbuf name title)
(let (dic)
- (save-excursion
- (set-buffer dicbuf)
+ (with-current-buffer dicbuf
(goto-char (point-min))
- (search-forward "%keyname end\n")
+ (search-forward "\n%keyname end")
+ (forward-line 1)
(let ((table (make-hash-table :test 'equal))
elt pos key trans val)
(while (not (eobp))
(setq pos (point))
(skip-chars-forward "^ \t")
- (setq key (buffer-substring pos (point)))
+ (setq key (buffer-substring-no-properties pos (point)))
(skip-chars-forward " \t")
- (setq trans (vector (buffer-substring (point) (line-end-position))))
+ (setq pos (point))
+ (skip-chars-forward "^\r\n")
+ (setq trans (vector (buffer-substring-no-properties pos (point))))
(setq val (gethash key table))
(if val (setq trans (vconcat val trans)))
(puthash key trans table)
(let (dicbuf-start dicbuf-end key-start key (pos (point)))
;; Find the dictionary, which starts below a horizontal rule and
;; ends at the second to last line in the HTML file.
- (save-excursion
- (set-buffer dicbuf)
+ (with-current-buffer dicbuf
(goto-char (point-min))
- (search-forward "#\n#<hr>\n")
+ (re-search-forward "^#<hr>")
+ (forward-line 1)
(setq dicbuf-start (point))
(goto-char (point-max))
- (forward-line -1)
+ (re-search-backward "^<hr>")
(setq dicbuf-end (point)))
- (insert-buffer-substring dicbuf dicbuf-start dicbuf-end)
+ (insert-buffer-substring-no-properties dicbuf dicbuf-start dicbuf-end)
;; CTLau-b5.html contains characters (0xa1 0xbc) which show up as
;; hollow boxes when the original characters in CTLau.html from
;; which the file is converted have no Big5 equivalent. Go
(insert "\" \"")
(delete-char 1)
(end-of-line)
+ (while (= (preceding-char) ?\r)
+ (delete-char -1))
(insert "\")")
(forward-line 1)))
(insert ")\n"))
Some infrequent characters are accessed by typing \\, followed by
the Cantonese romanization of the respective radical (\e$(0?f5}\e(B)."))
+(declare-function dos-8+3-filename "dos-fns.el" (filename))
+
(defun miscdic-convert (filename &optional dirname)
"Convert a dictionary file FILENAME into a Quail package.
Optional argument DIRNAME if specified is the directory name under which
(or (file-readable-p filename)
(error "%s does not exist" filename))
(let ((tail quail-misc-package-ext-info)
- (default-buffer-file-coding-system 'iso-2022-7bit)
+ coding-system-for-write
slot
name title dicfile coding quailfile converter copyright
dicbuf)
converter (nth 5 slot)
copyright (nth 6 slot))
(message "Converting %s to %s..." dicfile quailfile)
+ ;; Explicitly set eol format to `unix'.
+ (setq coding-system-for-write
+ (coding-system-change-eol-conversion coding 'unix))
(with-temp-file (expand-file-name quailfile dirname)
- ;; Explicitly speficy eol format to `unix'.
- (set-buffer-file-coding-system 'iso-2022-7bit-unix)
- (insert ";; Quail package `" name "' -*- coding:iso-2022-7bit; ")
- (insert "byte-compile-disable-print-circle:t; -*-\n");
+ (insert (format ";; Quail package `%s' -*- coding:%s; " name coding))
+ (insert "byte-compile-disable-print-circle:t; -*-\n")
(insert ";; Generated by the command `miscdic-convert'\n")
(insert ";; Date: " (current-time-string) "\n")
(insert ";; Source dictionary file: " dicfile "\n")
(insert ";;; Code:\n\n")
(insert "(require 'quail)\n")
(insert "(quail-define-package \"" name "\" \""
- (if (eq coding 'big5) "Chinese-BIG5" "Chinese-CNS")
+ (if (eq coding 'big5) "Chinese-BIG5"
+ (if (eq coding 'iso-2022-cn-ext) "Chinese-CNS"
+ "Chinese-GB"))
"\" \"" title "\" t\n")
- (let* ((coding-system-for-read coding)
+ (let* ((coding-system-for-read
+ (coding-system-change-eol-conversion coding 'unix))
(dicbuf (find-file-noselect filename)))
(funcall converter dicbuf name title)
(kill-buffer dicbuf)))
command-line-args-left (cdr command-line-args-left))
(if (file-directory-p filename)
(dolist (file (directory-files filename t nil t))
- (miscdic-convert file dir))
+ (or (file-directory-p file)
+ (miscdic-convert file dir)))
(miscdic-convert filename dir))))
(kill-emacs 0))
;; coding: iso-2022-7bit
;; End:
-;;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3
+;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3
;;; titdic-cnv.el ends here