]> code.delx.au - gnu-emacs/blob - lisp/language/cyrillic.el
*** empty log message ***
[gnu-emacs] / lisp / language / cyrillic.el
1 ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*-
2
3 ;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 ;; 2005, 2006, 2007
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
9 ;; Copyright (C) 2003
10 ;; National Institute of Advanced Industrial Science and Technology (AIST)
11 ;; Registration Number H13PRO009
12
13 ;; Author: Kenichi Handa <handa@etl.go.jp>
14 ;; Keywords: multilingual, Cyrillic, i18n
15
16 ;; This file is part of GNU Emacs.
17
18 ;; GNU Emacs is free software; you can redistribute it and/or modify
19 ;; it under the terms of the GNU General Public License as published by
20 ;; the Free Software Foundation; either version 3, or (at your option)
21 ;; any later version.
22
23 ;; GNU Emacs is distributed in the hope that it will be useful,
24 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 ;; GNU General Public License for more details.
27
28 ;; You should have received a copy of the GNU General Public License
29 ;; along with GNU Emacs; see the file COPYING. If not, write to the
30 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
31 ;; Boston, MA 02110-1301, USA.
32
33 ;;; Commentary:
34
35 ;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ
36 ;; are converted to Unicode internally. See
37 ;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info
38 ;; on Cyrillic charsets, see
39 ;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and
40 ;; Alternativnyj coding systems should live in code-pages.el, but
41 ;; they've always been preloaded and the coding system autoload
42 ;; mechanism didn't get accepted, so they have to stay here and
43 ;; duplicate code-pages stuff.
44
45 ;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
46 ;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
47 ;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and
48 ;; Alternativnyj coding systems encode both 8859-5 and Unicode.
49 ;; ucs-tables.el provides unification for cyrillic-iso-8bit.
50
51 ;; Customizing `utf-fragment-on-decoding' allows decoding characters
52 ;; from KOI and Alternativnyj into 8859-5 where that's possible.
53 ;; cyrillic-iso8859-5 characters take half as much space in the buffer
54 ;; as the mule-unicode-0100-24ff equivalents, though that's probably
55 ;; not normally a big deal.
56
57 ;;; Code:
58
59 ;; Cyrillic (general)
60
61 ;; ISO-8859-5 stuff
62
63 (define-coding-system 'cyrillic-iso-8bit
64 "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)."
65 :coding-type 'charset
66 :mnemonic ?5
67 :charset-list '(iso-8859-5)
68 :mime-charset 'iso-8859-5)
69
70 (define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit)
71
72 (set-language-info-alist
73 "Cyrillic-ISO" '((charset iso-8859-5)
74 (coding-system cyrillic-iso-8bit)
75 (coding-priority cyrillic-iso-8bit)
76 (input-method . "cyrillic-yawerty") ; fixme
77 (nonascii-translation . iso-8859-5)
78 (unibyte-display . cyrillic-iso-8bit)
79 (features cyril-util)
80 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
81 (documentation . "Support for Cyrillic ISO-8859-5."))
82 '("Cyrillic"))
83
84 ;; KOI-8R stuff
85
86 (define-coding-system 'cyrillic-koi8
87 "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)."
88 :coding-type 'charset
89 ;; We used to use ?K. It is true that ?K is more strictly correct,
90 ;; but it is also used for Korean. So people who use koi8 for
91 ;; languages other than Russian will have to forgive us.
92 :mnemonic ?R
93 :charset-list '(koi8)
94 :mime-charset 'koi8-r)
95
96 (define-coding-system-alias 'koi8-r 'cyrillic-koi8)
97 (define-coding-system-alias 'koi8 'cyrillic-koi8)
98 (define-coding-system-alias 'cp878 'cyrillic-koi8)
99
100 (set-language-info-alist
101 "Cyrillic-KOI8" `((charset koi8)
102 (coding-system cyrillic-koi8)
103 (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
104 (ctext-non-standard-encodings "koi8-r")
105 (nonascii-translation . koi8)
106 (input-method . "russian-typewriter")
107 (features cyril-util)
108 (unibyte-display . cyrillic-koi8)
109 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
110 (documentation . "Support for Cyrillic KOI8-R."))
111 '("Cyrillic"))
112
113 (set-language-info-alist
114 "Russian" `((charset cyrillic-iso8859-5)
115 (nonascii-translation
116 . ,(get 'cyrillic-koi8-r-nonascii-translation-table
117 'translation-table))
118 (coding-system cyrillic-koi8)
119 (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
120 (input-method . "russian-computer")
121 (features cyril-util)
122 (unibyte-display . cyrillic-koi8)
123 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
124 (documentation . "\
125 Support for Russian using koi8-r and the russian-computer input method.")
126 (tutorial . "TUTORIAL.ru"))
127 '("Cyrillic"))
128
129 (define-coding-system 'koi8-u
130 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
131 :coding-type 'charset
132 :mnemonic ?U
133 :charset-list '(koi8-u)
134 :mime-charset 'koi8-u)
135
136 (set-language-info-alist
137 "Ukrainian" `((charset koi8-u)
138 (coding-system koi8-u)
139 (coding-priority koi8-u)
140 (nonascii-translation . koi8-u)
141 (input-method . "ukrainian-computer")
142 (documentation
143 . "Support for Ukrainian with KOI8-U character set."))
144 '("Cyrillic"))
145
146 ;;; ALTERNATIVNYJ stuff
147
148 (define-coding-system 'cyrillic-alternativnyj
149 "ALTERNATIVNYJ 8-bit encoding for Cyrillic."
150 :coding-type 'charset
151 :mnemonic ?A
152 :charset-list '(alternativnyj))
153
154 (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj)
155
156 (set-language-info-alist
157 "Cyrillic-ALT" `((charset alternativnyj)
158 (coding-system cyrillic-alternativnyj)
159 (coding-priority cyrillic-alternativnyj)
160 (nonascii-translation . alternativnyj)
161 (input-method . "russian-typewriter")
162 (features cyril-util)
163 (unibyte-display . cyrillic-alternativnyj)
164 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
165 (documentation . "Support for Cyrillic ALTERNATIVNYJ."))
166 '("Cyrillic"))
167
168 (define-coding-system 'cp866
169 "CP866 encoding for Cyrillic."
170 :coding-type 'charset
171 :mnemonic ?*
172 :charset-list '(ibm866)
173 :mime-charset 'cp866)
174
175 (define-coding-system 'koi8-u
176 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
177 :coding-type 'charset
178 :mnemonic ?U
179 :charset-list '(koi8-u)
180 :mime-charset 'koi8-u)
181
182 (define-coding-system 'koi8-t
183 "KOI8-T 8-bit encoding for Cyrillic"
184 :coding-type 'charset
185 :mnemonic ?*
186 :charset-list '(koi8-t)
187 :mime-charset 'koi8-t)
188
189 (define-coding-system 'windows-1251
190 "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)"
191 :coding-type 'charset
192 :mnemonic ?b
193 :charset-list '(windows-1251)
194 :mime-charset 'windows-1251)
195 (define-coding-system-alias 'cp1251 'windows-1251)
196
197 (define-coding-system 'cp1125
198 "cp1125 8-bit encoding for Cyrillic"
199 :coding-type 'charset
200 :mnemonic ?*
201 :charset-list '(cp1125))
202 (define-coding-system-alias 'ruscii 'cp1125)
203 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
204 (define-coding-system-alias 'cp866u 'cp1125)
205
206 (define-coding-system 'cp855
207 "DOS codepage 855 (Russian)"
208 :coding-type 'charset
209 :mnemonic ?D
210 :charset-list '(cp855)
211 :mime-charset 'cp855)
212 (define-coding-system-alias 'ibm855 'cp855)
213
214 (define-coding-system 'mik
215 "Bulgarian DOS codepage"
216 :coding-type 'charset
217 :mnemonic ?D
218 :charset-list '(mik))
219
220 (define-coding-system 'pt154
221 "Parattype Asian Cyrillic codepage"
222 :coding-type 'charset
223 :mnemonic ?D
224 :charset-list '(pt154))
225
226 ;; (set-language-info-alist
227 ;; "Windows-1251" `((coding-system windows-1251)
228 ;; (coding-priority windows-1251)
229 ;; (input-method . "russian-typewriter") ; fixme?
230 ;; (features code-pages)
231 ;; (documentation . "Support for windows-1251 character set."))
232 ;; '("Cyrillic"))
233
234 (set-language-info-alist
235 "Tajik" `((coding-system koi8-t)
236 (coding-priority koi8-t)
237 (nonascii-translation . cyrillic-koi8-t)
238 (charset koi8-t)
239 (input-method . "russian-typewriter") ; fixme?
240 (features code-pages)
241 (documentation . "Support for Tajik using KOI8-T."))
242 '("Cyrillic"))
243
244 (let ((elt `("microsoft-cp1251" windows-1251 1
245 ,(get 'encode-windows-1251 'translation-table)))
246 (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
247 (if slot
248 (setcdr slot (cdr elt))
249 (push elt ctext-non-standard-encodings-alist)))
250
251 (set-language-info-alist
252 "Bulgarian" `((coding-system windows-1251)
253 (coding-priority windows-1251)
254 (nonascii-translation . windows-1251)
255 (charset windows-1251)
256 (ctext-non-standard-encodings "microsoft-cp1251")
257 (input-method . "bulgarian-bds")
258 (documentation
259 . "Support for Bulgrian with windows-1251 character set."))
260 '("Cyrillic"))
261
262 (set-language-info-alist
263 "Belarusian" `((coding-system windows-1251)
264 (coding-priority windows-1251)
265 (nonascii-translation . windows-1251)
266 (charset windows-1251)
267 (ctext-non-standard-encodings "microsoft-cp1251")
268 (input-method . "belarusian")
269 (documentation
270 . "Support for Belarusian with windows-1251 character set.
271 \(The name Belarusian replaced Byelorussian in the early 1990s.)"))
272 '("Cyrillic"))
273
274 (set-language-info-alist
275 "Ukrainian" `((coding-system koi8-u)
276 (coding-priority koi8-u)
277 (input-method . "ukrainian-computer")
278 (documentation
279 . "Support for Ukrainian with koi8-u character set."))
280 '("Cyrillic"))
281
282 (provide 'cyrillic)
283
284 ;;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3
285 ;;; cyrillic.el ends here