1 ;;; japan-util.el --- utilities for Japanese
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
6 ;; Keywords: mule, multilingual, Japanese
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defvar sentence-end-save nil)
30 (defun setup-japanese-environment ()
31 "Setup multilingual environment (MULE) for Japanese."
33 (setup-english-environment)
34 (set-language-environment-coding-systems "Japanese")
35 (if (eq system-type 'ms-dos)
36 (prefer-coding-system 'japanese-shift-jis))
37 (setq default-file-name-coding-system 'japanese-iso-8bit)
38 (setq default-input-method "japanese")
39 (setq sentence-end-save sentence-end)
40 (setq sentence-end (concat sentence-end "\\|[
\e$B!#!)!*
\e(B]")))
42 (defun exit-japanese-environment ()
43 (setq sentence-end sentence-end-save))
45 (defconst japanese-kana-table
46 '((?
\e$B$"
\e(B ?
\e$B%"
\e(B ?
\e(I1
\e(B) (?
\e$B$$
\e(B ?
\e$B%$
\e(B ?
\e(I2
\e(B) (?
\e$B$&
\e(B ?
\e$B%&
\e(B ?
\e(I3
\e(B) (?
\e$B$(
\e(B ?
\e$B%(
\e(B ?
\e(I4
\e(B) (?
\e$B$*
\e(B ?
\e$B%*
\e(B ?
\e(I5
\e(B)
47 (?
\e$B$+
\e(B ?
\e$B%+
\e(B ?
\e(I6
\e(B) (?
\e$B$-
\e(B ?
\e$B%-
\e(B ?
\e(I7
\e(B) (?
\e$B$/
\e(B ?
\e$B%/
\e(B ?
\e(I8
\e(B) (?
\e$B$1
\e(B ?
\e$B%1
\e(B ?
\e(I9
\e(B) (?
\e$B$3
\e(B ?
\e$B%3
\e(B ?
\e(I:
\e(B)
48 (?
\e$B$5
\e(B ?
\e$B%5
\e(B ?
\e(I;
\e(B) (?
\e$B$7
\e(B ?
\e$B%7
\e(B ?
\e(I<
\e(B) (?
\e$B$9
\e(B ?
\e$B%9
\e(B ?
\e(I=
\e(B) (?
\e$B$;
\e(B ?
\e$B%;
\e(B ?
\e(I>
\e(B) (?
\e$B$=
\e(B ?
\e$B%=
\e(B ?
\e(I?
\e(B)
49 (?
\e$B$?
\e(B ?
\e$B%?
\e(B ?
\e(I@
\e(B) (?
\e$B$A
\e(B ?
\e$B%A
\e(B ?
\e(IA
\e(B) (?
\e$B$D
\e(B ?
\e$B%D
\e(B ?
\e(IB
\e(B) (?
\e$B$F
\e(B ?
\e$B%F
\e(B ?
\e(IC
\e(B) (?
\e$B$H
\e(B ?
\e$B%H
\e(B ?
\e(ID
\e(B)
50 (?
\e$B$J
\e(B ?
\e$B%J
\e(B ?
\e(IE
\e(B) (?
\e$B$K
\e(B ?
\e$B%K
\e(B ?
\e(IF
\e(B) (?
\e$B$L
\e(B ?
\e$B%L
\e(B ?
\e(IG
\e(B) (?
\e$B$M
\e(B ?
\e$B%M
\e(B ?
\e(IH
\e(B) (?
\e$B$N
\e(B ?
\e$B%N
\e(B ?
\e(II
\e(B)
51 (?
\e$B$O
\e(B ?
\e$B%O
\e(B ?
\e(IJ
\e(B) (?
\e$B$R
\e(B ?
\e$B%R
\e(B ?
\e(IK
\e(B) (?
\e$B$U
\e(B ?
\e$B%U
\e(B ?
\e(IL
\e(B) (?
\e$B$X
\e(B ?
\e$B%X
\e(B ?
\e(IM
\e(B) (?
\e$B$[
\e(B ?
\e$B%[
\e(B ?
\e(IN
\e(B)
52 (?
\e$B$^
\e(B ?
\e$B%^
\e(B ?
\e(IO
\e(B) (?
\e$B$_
\e(B ?
\e$B%_
\e(B ?
\e(IP
\e(B) (?
\e$B$`
\e(B ?
\e$B%`
\e(B ?
\e(IQ
\e(B) (?
\e$B$a
\e(B ?
\e$B%a
\e(B ?
\e(IR
\e(B) (?
\e$B$b
\e(B ?
\e$B%b
\e(B ?
\e(IS
\e(B)
53 (?
\e$B$d
\e(B ?
\e$B%d
\e(B ?
\e(IT
\e(B) (?
\e$B$f
\e(B ?
\e$B%f
\e(B ?
\e(IU
\e(B) (?
\e$B$h
\e(B ?
\e$B%h
\e(B ?
\e(IV
\e(B)
54 (?
\e$B$i
\e(B ?
\e$B%i
\e(B ?
\e(IW
\e(B) (?
\e$B$j
\e(B ?
\e$B%j
\e(B ?
\e(IX
\e(B) (?
\e$B$k
\e(B ?
\e$B%k
\e(B ?
\e(IY
\e(B) (?
\e$B$l
\e(B ?
\e$B%l
\e(B ?
\e(IZ
\e(B) (?
\e$B$m
\e(B ?
\e$B%m
\e(B ?
\e(I[
\e(B)
55 (?
\e$B$o
\e(B ?
\e$B%o
\e(B ?
\e(I\
\e(B) (?
\e$B$p
\e(B ?
\e$B%p
\e(B "
\e(I2
\e(B") (?
\e$B$q
\e(B ?
\e$B%q
\e(B "
\e(I4
\e(B") (?
\e$B$r
\e(B ?
\e$B%r
\e(B ?
\e(I&
\e(B)
56 (?
\e$B$s
\e(B ?
\e$B%s
\e(B ?
\e(I]
\e(B)
57 (?
\e$B$,
\e(B ?
\e$B%,
\e(B "
\e(I6^
\e(B") (?
\e$B$.
\e(B ?
\e$B%.
\e(B "
\e(I7^
\e(B") (?
\e$B$0
\e(B ?
\e$B%0
\e(B "
\e(I8^
\e(B") (?
\e$B$2
\e(B ?
\e$B%2
\e(B "
\e(I9^
\e(B") (?
\e$B$4
\e(B ?
\e$B%4
\e(B "
\e(I:^
\e(B")
58 (?
\e$B$6
\e(B ?
\e$B%6
\e(B "
\e(I;^
\e(B") (?
\e$B$8
\e(B ?
\e$B%8
\e(B "
\e(I<^
\e(B") (?
\e$B$:
\e(B ?
\e$B%:
\e(B "
\e(I=^
\e(B") (?
\e$B$<
\e(B ?
\e$B%<
\e(B "
\e(I>^
\e(B") (?
\e$B$>
\e(B ?
\e$B%>
\e(B "
\e(I?^
\e(B")
59 (?
\e$B$@
\e(B ?
\e$B%@
\e(B "
\e(I@^
\e(B") (?
\e$B$B
\e(B ?
\e$B%B
\e(B "
\e(IA^
\e(B") (?
\e$B$E
\e(B ?
\e$B%E
\e(B "
\e(IB^
\e(B") (?
\e$B$G
\e(B ?
\e$B%G
\e(B "
\e(IC^
\e(B") (?
\e$B$I
\e(B ?
\e$B%I
\e(B "
\e(ID^
\e(B")
60 (?
\e$B$P
\e(B ?
\e$B%P
\e(B "
\e(IJ^
\e(B") (?
\e$B$S
\e(B ?
\e$B%S
\e(B "
\e(IK^
\e(B") (?
\e$B$V
\e(B ?
\e$B%V
\e(B "
\e(IL^
\e(B") (?
\e$B$Y
\e(B ?
\e$B%Y
\e(B "
\e(IM^
\e(B") (?
\e$B$\
\e(B ?
\e$B%\
\e(B "
\e(IN^
\e(B")
61 (?
\e$B$Q
\e(B ?
\e$B%Q
\e(B "
\e(IJ_
\e(B") (?
\e$B$T
\e(B ?
\e$B%T
\e(B "
\e(IK_
\e(B") (?
\e$B$W
\e(B ?
\e$B%W
\e(B "
\e(IL_
\e(B") (?
\e$B$Z
\e(B ?
\e$B%Z
\e(B "
\e(IM_
\e(B") (?
\e$B$]
\e(B ?
\e$B%]
\e(B "
\e(IN_
\e(B")
62 (?
\e$B$!
\e(B ?
\e$B%!
\e(B ?
\e(I'
\e(B) (?
\e$B$#
\e(B ?
\e$B%#
\e(B ?
\e(I(
\e(B) (?
\e$B$%
\e(B ?
\e$B%%
\e(B ?
\e(I)
\e(B) (?
\e$B$'
\e(B ?
\e$B%'
\e(B ?
\e(I*
\e(B) (?
\e$B$)
\e(B ?
\e$B%)
\e(B ?
\e(I+
\e(B)
63 (?
\e$B$C
\e(B ?
\e$B%C
\e(B ?
\e(I/
\e(B)
64 (?
\e$B$c
\e(B ?
\e$B%c
\e(B ?
\e(I,
\e(B) (?
\e$B$e
\e(B ?
\e$B%e
\e(B ?
\e(I-
\e(B) (?
\e$B$g
\e(B ?
\e$B%g
\e(B ?
\e(I.
\e(B)
65 (?
\e$B$n
\e(B ?
\e$B%n
\e(B "
\e(I\
\e(B")
66 ("
\e$B$&!+
\e(B" ?
\e$B%t
\e(B "
\e(I3^
\e(B") (nil ?
\e$B%u
\e(B "
\e(I6
\e(B") (nil ?
\e$B%v
\e(B "
\e(I9
\e(B"))
67 "Japanese JISX0208 Kana character table.
68 Each element is of the form (HIRAGANA KATAKANA HANKAKU-KATAKANA), where
69 HIRAGANA and KATAKANA belong to `japanese-jisx0208',
70 HANKAKU-KATAKANA belongs to `japanese-jisx0201-kana'.")
72 ;; Put properties 'katakana, 'hiragana, and 'jix0201 to each Japanese
73 ;; kana characters for conversion among them.
74 (let ((l japanese-kana-table)
75 slot hiragana katakana jisx0201)
78 hiragana (car slot) katakana (nth 1 slot) jisx0201 (nth 2 slot)
81 (if (stringp hiragana)
82 (if (> (length hiragana) 1)
83 (let ((hira (aref hiragana 0)))
84 (put-char-code-property
86 (cons (cons (aref hiragana 1) katakana)
87 (get-char-code-property hira 'composition)))))
88 (put-char-code-property hiragana 'katakana katakana)
89 (put-char-code-property hiragana 'jisx0201 jisx0201)))
90 (when (integerp katakana)
91 (put-char-code-property katakana 'hiragana hiragana)
92 (put-char-code-property katakana 'jisx0201 jisx0201))
94 (if (stringp jisx0201)
95 (if (> (length jisx0201) 1)
96 (let ((kana (aref jisx0201 0)))
97 (put-char-code-property
99 (cons (cons (aref jisx0201 1) katakana)
100 (get-char-code-property kana 'composition)))))
101 (put-char-code-property jisx0201 'hiragana hiragana)
102 (put-char-code-property jisx0201 'katakana katakana)
103 (put-char-code-property jisx0201 'jisx0208 katakana)))))
105 (defconst japanese-symbol-table
106 '((?\
\e$B!!
\e(B ?\ ) (?
\e$B!$
\e(B ?, ?
\e(I$
\e(B) (?
\e$B!%
\e(B ?. ?
\e(I!
\e(B) (?
\e$B!"
\e(B ?, ?
\e(I$
\e(B) (?
\e$B!#
\e(B ?. ?
\e(I!
\e(B) (?
\e$B!&
\e(B nil ?
\e(I%
\e(B)
107 (?
\e$B!'
\e(B ?:) (?
\e$B!(
\e(B ?\;) (?
\e$B!)
\e(B ??) (?
\e$B!*
\e(B ?!) (?
\e$B!+
\e(B nil ?
\e(I^
\e(B) (?
\e$B!,
\e(B nil ?
\e(I_
\e(B)
108 (?
\e$B!-
\e(B ?') (?
\e$B!.
\e(B ?`) (?
\e$B!0
\e(B ?^) (?
\e$B!2
\e(B ?_) (?
\e$B!<
\e(B ?-) (?
\e$B!=
\e(B ?-) (?
\e$B!>
\e(B ?-)
109 (?
\e$B!?
\e(B ?/) (?
\e$B!@
\e(B ?\\) (?
\e$B!A
\e(B ?~) (?
\e$B!C
\e(B ?|) (?
\e$B!F
\e(B ?`) (?
\e$B!G
\e(B ?') (?
\e$B!H
\e(B ?\") (?
\e$B!I
\e(B ?\")
110 (?\
\e$B!J
\e(B ?\() (?\
\e$B!K
\e(B ?\)) (?\
\e$B!N
\e(B ?[) (?\
\e$B!O
\e(B ?]) (?\
\e$B!P
\e(B ?{) (?\
\e$B!Q
\e(B ?})
111 (?
\e$B!R
\e(B ?<) (?
\e$B!S
\e(B ?>) (?
\e$B!\
\e(B ?+) (?
\e$B!]
\e(B ?-) (?
\e$B!a
\e(B ?=) (?
\e$B!c
\e(B ?<) (?
\e$B!d
\e(B ?>)
112 (?
\e$B!l
\e(B ?') (?
\e$B!m
\e(B ?\") (?
\e$B!o
\e(B ?\\) (?
\e$B!p
\e(B ?$) (?
\e$B!s
\e(B ?%) (?
\e$B!t
\e(B ?#) (?
\e$B!u
\e(B ?&) (?
\e$B!v
\e(B ?*)
114 "Japanese JISX0208 symbol character table.
115 Each element is of the form (SYMBOL ASCII HANKAKU), where SYMBOL
116 belongs to `japanese-jisx0208', ASCII belongs to `ascii', and HANKAKU
117 belongs to `japanese-jisx0201-kana'.")
119 ;; Put properties 'jisx0208, 'jisx0201, and 'ascii to each Japanese
120 ;; symbol and ASCII characters for conversion among them.
121 (let ((l japanese-symbol-table)
122 slot jisx0208 ascii jisx0201)
125 jisx0208 (car slot) ascii (nth 1 slot) jisx0201 (nth 2 slot)
129 (put-char-code-property jisx0208 'ascii ascii)
130 (put-char-code-property ascii 'jisx0208 jisx0208)))
133 (put-char-code-property jisx0208 'jisx0201 jisx0201)
134 (put-char-code-property jisx0201 'jisx0208 jisx0208)))))
136 (defconst japanese-alpha-numeric-table
137 '((?
\e$B#0
\e(B . ?0) (?
\e$B#1
\e(B . ?1) (?
\e$B#2
\e(B . ?2) (?
\e$B#3
\e(B . ?3) (?
\e$B#4
\e(B . ?4)
138 (?
\e$B#5
\e(B . ?5) (?
\e$B#6
\e(B . ?6) (?
\e$B#7
\e(B . ?7) (?
\e$B#8
\e(B . ?8) (?
\e$B#9
\e(B . ?9)
139 (?
\e$B#A
\e(B . ?A) (?
\e$B#B
\e(B . ?B) (?
\e$B#C
\e(B . ?C) (?
\e$B#D
\e(B . ?D) (?
\e$B#E
\e(B . ?E)
140 (?
\e$B#F
\e(B . ?F) (?
\e$B#G
\e(B . ?G) (?
\e$B#H
\e(B . ?H) (?
\e$B#I
\e(B . ?I) (?
\e$B#J
\e(B . ?J)
141 (?
\e$B#K
\e(B . ?K) (?
\e$B#L
\e(B . ?L) (?
\e$B#M
\e(B . ?M) (?
\e$B#N
\e(B . ?N) (?
\e$B#O
\e(B . ?O)
142 (?
\e$B#P
\e(B . ?P) (?
\e$B#Q
\e(B . ?Q) (?
\e$B#R
\e(B . ?R) (?
\e$B#S
\e(B . ?S) (?
\e$B#T
\e(B . ?T)
143 (?
\e$B#U
\e(B . ?U) (?
\e$B#V
\e(B . ?V) (?
\e$B#W
\e(B . ?W) (?
\e$B#X
\e(B . ?X) (?
\e$B#Y
\e(B . ?Y) (?
\e$B#Z
\e(B . ?Z)
144 (?
\e$B#a
\e(B . ?a) (?
\e$B#b
\e(B . ?b) (?
\e$B#c
\e(B . ?c) (?
\e$B#d
\e(B . ?d) (?
\e$B#e
\e(B . ?e)
145 (?
\e$B#f
\e(B . ?f) (?
\e$B#g
\e(B . ?g) (?
\e$B#h
\e(B . ?h) (?
\e$B#i
\e(B . ?i) (?
\e$B#j
\e(B . ?j)
146 (?
\e$B#k
\e(B . ?k) (?
\e$B#l
\e(B . ?l) (?
\e$B#m
\e(B . ?m) (?
\e$B#n
\e(B . ?n) (?
\e$B#o
\e(B . ?o)
147 (?
\e$B#p
\e(B . ?p) (?
\e$B#q
\e(B . ?q) (?
\e$B#r
\e(B . ?r) (?
\e$B#s
\e(B . ?s) (?
\e$B#t
\e(B . ?t)
148 (?
\e$B#u
\e(B . ?u) (?
\e$B#v
\e(B . ?v) (?
\e$B#w
\e(B . ?w) (?
\e$B#x
\e(B . ?x) (?
\e$B#y
\e(B . ?y) (?
\e$B#z
\e(B . ?z))
149 "Japanese JISX0208 alpha numeric character table.
150 Each element is of the form (ALPHA-NUMERIC ASCII), where ALPHA-NUMERIC
151 belongs to `japanese-jisx0208', ASCII belongs to `ascii'.")
153 ;; Put properties 'jisx0208 and 'ascii to each Japanese alpha numeric
154 ;; and ASCII characters for conversion between them.
155 (let ((l japanese-alpha-numeric-table)
159 jisx0208 (car slot) ascii (cdr slot)
161 (put-char-code-property jisx0208 'ascii ascii)
162 (put-char-code-property ascii 'jisx0208 jisx0208)))
164 ;; Convert string STR by FUNC and return a resulting string.
165 (defun japanese-string-conversion (str func &rest args)
166 (let ((buf (get-buffer-create " *Japanese work*")))
171 (apply func 1 (point) args)
175 (defun japanese-katakana (obj &optional hankaku)
176 "Convert argument to Katakana and return that.
177 The argument may be a character or string. The result has the same type.
178 The argument object is not altered--the value is a copy.
179 Optional argument HANKAKU t means to convert to `hankaku' Katakana
180 \(`japanese-jisx0201-kana'), in which case return value
181 may be a string even if OBJ is a character if two Katakanas are
182 necessary to represent OBJ."
184 (japanese-string-conversion obj 'japanese-katakana-region hankaku)
185 (or (get-char-code-property obj (if hankaku 'jisx0201 'katakana))
189 (defun japanese-hiragana (obj)
190 "Convert argument to Hiragana and return that.
191 The argument may be a character or string. The result has the same type.
192 The argument object is not altered--the value is a copy."
194 (japanese-string-conversion obj 'japanese-hiragana-region)
195 (or (get-char-code-property obj 'hiragana)
199 (defun japanese-hankaku (obj &optional ascii-only)
200 "Convert argument to `hankaku' and return that.
201 The argument may be a character or string. The result has the same type.
202 The argument object is not altered--the value is a copy.
203 Optional argument ASCII-ONLY non-nil means to return only ASCII character."
205 (japanese-string-conversion obj 'japanese-hankaku-region ascii-only)
206 (or (get-char-code-property obj 'ascii)
207 (and (not ascii-only)
208 (get-char-code-property obj 'jisx0201))
212 (defun japanese-zenkaku (obj)
213 "Convert argument to `zenkaku' and return that.
214 The argument may be a character or string. The result has the same type.
215 The argument object is not altered--the value is a copy."
217 (japanese-string-conversion obj 'japanese-zenkaku-region)
218 (or (get-char-code-property obj 'jisx0208)
222 (defun japanese-katakana-region (from to &optional hankaku)
223 "Convert Japanese `hiragana' chars in the region to `katakana' chars.
224 Optional argument HANKAKU t means to convert to `hankaku katakana' character
225 of which charset is `japanese-jisx0201-kana'."
228 (narrow-to-region from to)
229 (goto-char (point-min))
230 (while (re-search-forward "\\cH\\|\\cK" nil t)
231 (let* ((kana (preceding-char))
232 (composition (get-char-code-property kana 'composition))
234 (if (and composition (setq slot (assq (following-char) composition)))
236 (delete-region (match-beginning 0) (1+ (point)))
238 (let ((kata (get-char-code-property
239 kana (if hankaku 'jisx0201 'katakana))))
242 (delete-region (match-beginning 0) (match-end 0))
243 (insert kata)))))))))
246 (defun japanese-hiragana-region (from to)
247 "Convert Japanese `katakana' chars in the region to `hiragana' chars."
250 (narrow-to-region from to)
251 (goto-char (point-min))
252 (while (re-search-forward "\\cK\\|\\ck" nil t)
253 (let* ((kata (preceding-char))
254 (composition (get-char-code-property kata 'composition))
256 (if (and composition (setq slot (assq (following-char) composition)))
258 (delete-region (match-beginning 0) (1+ (point)))
259 (insert (get-char-code-property (cdr slot) 'hiragana)))
260 (let ((hira (get-char-code-property kata 'hiragana)))
263 (delete-region (match-beginning 0) (match-end 0))
264 (insert hira)))))))))
267 (defun japanese-hankaku-region (from to &optional ascii-only)
268 "Convert Japanese `zenkaku' chars in the region to `hankaku' chars.
269 `Zenkaku' chars belong to `japanese-jisx0208'
270 `Hankaku' chars belong to `ascii' or `japanese-jisx0201-kana'.
271 Optional argument ASCII-ONLY non-nil means to convert only to ASCII char."
274 (narrow-to-region from to)
275 (goto-char (point-min))
276 (while (re-search-forward "\\cj" nil t)
277 (let* ((zenkaku (preceding-char))
278 (hankaku (or (get-char-code-property zenkaku 'ascii)
279 (and (not ascii-only)
280 (get-char-code-property zenkaku 'jisx0201)))))
283 (delete-region (match-beginning 0) (match-end 0))
284 (insert hankaku)))))))
287 (defun japanese-zenkaku-region (from to)
288 "Convert hankaku' chars in the region to Japanese `zenkaku' chars.
289 `Zenkaku' chars belong to `japanese-jisx0208'
290 `Hankaku' chars belong to `ascii' or `japanese-jisx0201-kana'."
293 (narrow-to-region from to)
294 (goto-char (point-min))
295 (while (re-search-forward "\\ca\\|\\ck" nil t)
296 (let* ((hankaku (preceding-char))
297 (composition (get-char-code-property hankaku 'composition))
299 (if (and composition (setq slot (assq (following-char) composition)))
301 (delete-region (match-beginning 0) (1+ (point)))
303 (let ((zenkaku (japanese-zenkaku hankaku)))
306 (delete-region (match-beginning 0) (match-end 0))
307 (insert zenkaku)))))))))
310 (defun read-hiragana-string (prompt &optional initial-input)
311 "Read a Hiragana string from the minibuffer, prompting with string PROMPT.
312 If non-nil, second arg INITIAL-INPUT is a string to insert before reading."
313 (read-multilingual-string prompt initial-input "japanese-hiragana"))
316 (provide 'japan-util)
318 ;;; japan-util.el ends here