]> code.delx.au - gnu-emacs/blob - lisp/international/mule-conf.el
11207b0b78dc1b9647b384fb6cbc0798a14140cf
[gnu-emacs] / lisp / international / mule-conf.el
1 ;;; mule-conf.el --- configure multilingual environment
2
3 ;; Copyright (C) 1997-2012 Free Software Foundation, Inc.
4 ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 ;; Registration Number H14PRO021
7 ;; Copyright (C) 2003
8 ;; National Institute of Advanced Industrial Science and Technology (AIST)
9 ;; Registration Number H13PRO009
10
11 ;; Keywords: i18n, mule, multilingual, character set, coding system
12
13 ;; This file is part of GNU Emacs.
14
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
19
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
24
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27
28 ;;; Commentary:
29
30 ;; This file defines the Emacs charsets and some basic coding systems.
31 ;; Other coding systems are defined in the files in directory
32 ;; lisp/language.
33
34 ;;; Code:
35
36 ;;; Remarks
37
38 ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
39 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
40 ;; http://www.ecma.ch/.
41
42 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
43 ;; MS Windows, which are presumably the only charsets we really need
44 ;; to worry about on such systems:
45 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
46 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
47 ;; 1258, 874, 932, 936, 949, 950
48
49 ;;; Definitions of character sets.
50
51 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
52 ;; in charset.c as below:
53 ;;
54 ;; (define-charset 'ascii
55 ;; ""
56 ;; :dimension 1
57 ;; :code-space [0 127]
58 ;; :iso-final-char ?B
59 ;; :ascii-compatible-p t
60 ;; :emacs-mule-id 0
61 ;; :code-offset 0)
62 ;;
63 ;; (define-charset 'unicode
64 ;; ""
65 ;; :dimension 3
66 ;; :code-space [0 255 0 255 0 16]
67 ;; :ascii-compatible-p t
68 ;; :code-offset 0)
69 ;;
70 ;; (define-charset 'emacs
71 ;; ""
72 ;; :dimension 3
73 ;; :code-space [0 255 0 255 0 63]
74 ;; :ascii-compatible-p t
75 ;; :supplementary-p t
76 ;; :code-offset 0)
77 ;;
78 ;; (define-charset 'eight-bit
79 ;; ""
80 ;; :dimension 1
81 ;; :code-space [128 255]
82 ;; :code-offset #x3FFF80)
83 ;;
84 ;; We now set :docstring, :short-name, and :long-name properties.
85
86 (put-charset-property
87 'ascii :docstring "ASCII (ISO646 IRV)")
88 (put-charset-property
89 'ascii :short-name "ASCII")
90 (put-charset-property
91 'ascii :long-name "ASCII (ISO646 IRV)")
92 (put-charset-property
93 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
94 (put-charset-property
95 'iso-8859-1 :short-name "Latin-1")
96 (put-charset-property
97 'iso-8859-1 :long-name "Latin-1")
98 (put-charset-property
99 'unicode :docstring "Unicode (ISO10646)")
100 (put-charset-property
101 'unicode :short-name "Unicode")
102 (put-charset-property
103 'unicode :long-name "Unicode (ISO10646)")
104 (put-charset-property
105 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
106 (put-charset-property
107 'emacs :short-name "Emacs")
108 (put-charset-property
109 'emacs :long-name "Emacs")
110
111 (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
112 (put-charset-property 'eight-bit :short-name "Raw bytes")
113
114 (define-charset-alias 'ucs 'unicode)
115
116 (define-charset 'latin-iso8859-1
117 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
118 :short-name "RHP of Latin-1"
119 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
120 :iso-final-char ?A
121 :emacs-mule-id 129
122 :code-space [32 127]
123 :code-offset 160)
124
125 ;; Name perhaps not ideal, but is XEmacs-compatible.
126 (define-charset 'control-1
127 "8-bit control code (0x80..0x9F)"
128 :short-name "8-bit control code"
129 :code-space [128 159]
130 :code-offset 128)
131
132 (define-charset 'eight-bit-control
133 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
134 :short-name "Raw bytes 0x80..0x9F"
135 :supplementary-p t
136 :code-space [128 159]
137 :code-offset #x3FFF80) ; see character.h
138
139 (define-charset 'eight-bit-graphic
140 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
141 :short-name "Raw bytes 0xA0..0xFF"
142 :supplementary-p t
143 :code-space [160 255]
144 :code-offset #x3FFFA0) ; see character.h
145
146 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
147 iso-ir iso-final
148 emacs-mule-id map)
149 `(progn
150 (define-charset ,symbol
151 ,name
152 :short-name ,nickname
153 :long-name ,name
154 :ascii-compatible-p t
155 :code-space [0 255]
156 :map ,map)
157 (if ,iso-symbol
158 (define-charset ,iso-symbol
159 (if ,iso-ir
160 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
161 ,name ,nickname ,iso-ir)
162 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
163 :short-name (format "RHP of %s" ,name)
164 :long-name (format "RHP of %s (%s)" ,name ,nickname)
165 :iso-final-char ,iso-final
166 :emacs-mule-id ,emacs-mule-id
167 :code-space [32 127]
168 :subset (list ,symbol 160 255 -128)))))
169
170 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
171 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
172
173 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
174 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
175
176 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
177 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
178
179 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
180 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
181
182 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
183 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
184
185 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
186 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
187
188 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
189 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
190
191 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
192 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
193
194 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
195 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
196
197 ;; http://www.nectec.or.th/it-standards/iso8859-11/
198 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
199 ;; plus nbsp
200 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
201 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
202
203 ;; 8859-12 doesn't (yet?) exist.
204
205 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
206 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
207
208 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
209 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
210
211 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
212 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
213
214 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
215 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
216
217 ;; No point in keeping it around.
218 (fmakunbound 'define-iso-single-byte-charset)
219
220 ;; Can this be shared with 8859-11?
221 ;; N.b. not all of these are defined in Unicode.
222 (define-charset 'thai-tis620
223 "TIS620.2533"
224 :short-name "TIS620.2533"
225 :iso-final-char ?T
226 :emacs-mule-id 133
227 :code-space [32 127]
228 :code-offset #x0E00)
229
230 ;; Fixme: doc for this, c.f. above
231 (define-charset 'tis620-2533
232 "TIS620.2533"
233 :short-name "TIS620.2533"
234 :ascii-compatible-p t
235 :code-space [0 255]
236 :superset '(ascii eight-bit-control (thai-tis620 . 128)))
237
238 (define-charset 'jisx0201
239 "JISX0201"
240 :short-name "JISX0201"
241 :code-space [0 #xDF]
242 :map "JISX0201")
243
244 (define-charset 'latin-jisx0201
245 "Roman Part of JISX0201.1976"
246 :short-name "JISX0201 Roman"
247 :long-name "Japanese Roman (JISX0201.1976)"
248 :iso-final-char ?J
249 :emacs-mule-id 138
250 :supplementary-p t
251 :code-space [33 126]
252 :subset '(jisx0201 33 126 0))
253
254 (define-charset 'katakana-jisx0201
255 "Katakana Part of JISX0201.1976"
256 :short-name "JISX0201 Katakana"
257 :long-name "Japanese Katakana (JISX0201.1976)"
258 :iso-final-char ?I
259 :emacs-mule-id 137
260 :supplementary-p t
261 :code-space [33 126]
262 :subset '(jisx0201 161 254 -128))
263
264 (define-charset 'chinese-gb2312
265 "GB2312 Chinese simplified: ISO-IR-58"
266 :short-name "GB2312"
267 :long-name "GB2312: ISO-IR-58"
268 :iso-final-char ?A
269 :emacs-mule-id 145
270 :code-space [33 126 33 126]
271 :code-offset #x110000
272 :unify-map "GB2312")
273
274 (define-charset 'chinese-gbk
275 "GBK Chinese simplified."
276 :short-name "GBK"
277 :code-space [#x40 #xFE #x81 #xFE]
278 :code-offset #x160000
279 :unify-map "GBK")
280 (define-charset-alias 'cp936 'chinese-gbk)
281 (define-charset-alias 'windows-936 'chinese-gbk)
282
283 (define-charset 'chinese-cns11643-1
284 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
285 :short-name "CNS11643-1"
286 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
287 :iso-final-char ?G
288 :emacs-mule-id 149
289 :code-space [33 126 33 126]
290 :code-offset #x114000
291 :unify-map "CNS-1")
292
293 (define-charset 'chinese-cns11643-2
294 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
295 :short-name "CNS11643-2"
296 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
297 :iso-final-char ?H
298 :emacs-mule-id 150
299 :code-space [33 126 33 126]
300 :code-offset #x118000
301 :unify-map "CNS-2")
302
303 (define-charset 'chinese-cns11643-3
304 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
305 :short-name "CNS11643-3"
306 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
307 :iso-final-char ?I
308 :code-space [33 126 33 126]
309 :emacs-mule-id 246
310 :code-offset #x11C000
311 :unify-map "CNS-3")
312
313 (define-charset 'chinese-cns11643-4
314 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
315 :short-name "CNS11643-4"
316 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
317 :iso-final-char ?J
318 :emacs-mule-id 247
319 :code-space [33 126 33 126]
320 :code-offset #x120000
321 :unify-map "CNS-4")
322
323 (define-charset 'chinese-cns11643-5
324 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
325 :short-name "CNS11643-5"
326 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
327 :iso-final-char ?K
328 :emacs-mule-id 248
329 :code-space [33 126 33 126]
330 :code-offset #x124000
331 :unify-map "CNS-5")
332
333 (define-charset 'chinese-cns11643-6
334 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
335 :short-name "CNS11643-6"
336 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
337 :iso-final-char ?L
338 :emacs-mule-id 249
339 :code-space [33 126 33 126]
340 :code-offset #x128000
341 :unify-map "CNS-6")
342
343 (define-charset 'chinese-cns11643-7
344 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
345 :short-name "CNS11643-7"
346 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
347 :iso-final-char ?M
348 :emacs-mule-id 250
349 :code-space [33 126 33 126]
350 :code-offset #x12C000
351 :unify-map "CNS-7")
352
353 (define-charset 'big5
354 "Big5 (Chinese traditional)"
355 :short-name "Big5"
356 :code-space [#x40 #xFE #xA1 #xFE]
357 :code-offset #x130000
358 :unify-map "BIG5")
359 ;; Fixme: AKA cp950 according to
360 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
361 ;; that correct?
362
363 (define-charset 'chinese-big5-1
364 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
365 :short-name "Big5 (Level-1)"
366 :long-name "Big5 (Level-1) A141-C67F"
367 :iso-final-char ?0
368 :emacs-mule-id 152
369 :supplementary-p t
370 :code-space [#x21 #x7E #x21 #x7E]
371 :code-offset #x135000
372 :unify-map "BIG5-1")
373
374 (define-charset 'chinese-big5-2
375 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
376 :short-name "Big5 (Level-2)"
377 :long-name "Big5 (Level-2) C940-FEFE"
378 :iso-final-char ?1
379 :emacs-mule-id 153
380 :supplementary-p t
381 :code-space [#x21 #x7E #x21 #x7E]
382 :code-offset #x137800
383 :unify-map "BIG5-2")
384
385 (define-charset 'japanese-jisx0208
386 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
387 :short-name "JISX0208"
388 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
389 :iso-final-char ?B
390 :emacs-mule-id 146
391 :code-space [33 126 33 126]
392 :code-offset #x140000
393 :unify-map "JISX0208")
394
395 (define-charset 'japanese-jisx0208-1978
396 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
397 :short-name "JISX0208.1978"
398 :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
399 :iso-final-char ?@
400 :emacs-mule-id 144
401 :code-space [33 126 33 126]
402 :code-offset #x144000
403 :unify-map "JISC6226")
404
405 (define-charset 'japanese-jisx0212
406 "JISX0212 Japanese supplement: ISO-IR-159"
407 :short-name "JISX0212"
408 :long-name "JISX0212 (Japanese): ISO-IR-159"
409 :iso-final-char ?D
410 :emacs-mule-id 148
411 :code-space [33 126 33 126]
412 :code-offset #x148000
413 :unify-map "JISX0212")
414
415 ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
416 ;; arguable whether it should have a unify-map.
417 (define-charset 'japanese-jisx0213-1
418 "JISX0213.2000 Plane 1 (Japanese)"
419 :short-name "JISX0213-1"
420 :iso-final-char ?O
421 :emacs-mule-id 151
422 :unify-map "JISX2131"
423 :code-space [33 126 33 126]
424 :code-offset #x14C000)
425
426 (define-charset 'japanese-jisx0213-2
427 "JISX0213.2000 Plane 2 (Japanese)"
428 :short-name "JISX0213-2"
429 :iso-final-char ?P
430 :emacs-mule-id 254
431 :unify-map "JISX2132"
432 :code-space [33 126 33 126]
433 :code-offset #x150000)
434
435 (define-charset 'japanese-jisx0213-a
436 "JISX0213.2004 adds these characters to JISX0213.2000."
437 :short-name "JISX0213A"
438 :dimension 2
439 :code-space [33 126 33 126]
440 :supplementary-p t
441 :map "JISX213A")
442
443 (define-charset 'japanese-jisx0213.2004-1
444 "JISX0213.2004 Plane1 (Japanese)"
445 :short-name "JISX0213.2004-1"
446 :dimension 2
447 :code-space [33 126 33 126]
448 :iso-final-char ?Q
449 :superset '(japanese-jisx0213-a japanese-jisx0213-1))
450
451 (define-charset 'katakana-sjis
452 "Katakana part of Shift-JIS"
453 :dimension 1
454 :code-space [#xA1 #xDF]
455 :subset '(jisx0201 #xA1 #xDF 0)
456 :supplementary-p t)
457
458 (define-charset 'cp932-2-byte
459 "2-byte part of CP932"
460 :dimension 2
461 :map "CP932-2BYTE"
462 :code-space [#x40 #xFC #x81 #xFC]
463 :supplementary-p t)
464
465 (define-charset 'cp932
466 "CP932 (Microsoft shift-jis)"
467 :code-space [#x00 #xFF #x00 #xFE]
468 :short-name "CP932"
469 :superset '(ascii katakana-sjis cp932-2-byte))
470
471 (define-charset 'korean-ksc5601
472 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
473 :short-name "KSC5601"
474 :long-name "KSC5601 (Korean): ISO-IR-149"
475 :iso-final-char ?C
476 :emacs-mule-id 147
477 :code-space [33 126 33 126]
478 :code-offset #x279f94 ; ... #x27c217
479 :unify-map "KSC5601")
480
481 (define-charset 'big5-hkscs
482 "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
483 :short-name "Big5"
484 :code-space [#x40 #xFE #xA1 #xFE]
485 :code-offset #x27c218 ; ... #x280839
486 :unify-map "BIG5-HKSCS")
487
488 (define-charset 'cp949-2-byte
489 "2-byte part of CP949"
490 :dimension 2
491 :map "CP949-2BYTE"
492 :code-space [#x41 #xFE #x81 #xFD]
493 :supplementary-p t)
494
495 (define-charset 'cp949
496 "CP949 (Korean)"
497 :short-name "CP949"
498 :long-name "CP949 (Korean)"
499 :code-space [#x00 #xFE #x00 #xFD]
500 :superset '(ascii cp949-2-byte))
501
502 (define-charset 'chinese-sisheng
503 "SiSheng characters for PinYin/ZhuYin"
504 :short-name "SiSheng"
505 :long-name "SiSheng (PinYin/ZhuYin)"
506 :iso-final-char ?0
507 :emacs-mule-id 160
508 :code-space [33 126]
509 :unify-map "MULE-sisheng"
510 :supplementary-p t
511 :code-offset #x200000)
512
513 ;; A subset of the 1989 version of IPA. It consists of the consonant
514 ;; signs used in English, French, German and Italian, and all vowels
515 ;; signs in the table. [says old MULE doc]
516 (define-charset 'ipa
517 "IPA (International Phonetic Association)"
518 :short-name "IPA"
519 :iso-final-char ?0
520 :emacs-mule-id 161
521 :unify-map "MULE-ipa"
522 :code-space [32 127]
523 :supplementary-p t
524 :code-offset #x200080)
525
526 (define-charset 'viscii
527 "VISCII1.1"
528 :short-name "VISCII"
529 :long-name "VISCII 1.1"
530 :code-space [0 255]
531 :map "VISCII")
532
533 (define-charset 'vietnamese-viscii-lower
534 "VISCII1.1 lower-case"
535 :short-name "VISCII lower"
536 :long-name "VISCII lower-case"
537 :iso-final-char ?1
538 :emacs-mule-id 162
539 :code-space [32 127]
540 :code-offset #x200200
541 :supplementary-p t
542 :unify-map "MULE-lviscii")
543
544 (define-charset 'vietnamese-viscii-upper
545 "VISCII1.1 upper-case"
546 :short-name "VISCII upper"
547 :long-name "VISCII upper-case"
548 :iso-final-char ?2
549 :emacs-mule-id 163
550 :code-space [32 127]
551 :code-offset #x200280
552 :supplementary-p t
553 :unify-map "MULE-uviscii")
554
555 (define-charset 'vscii
556 "VSCII1.1 (TCVN-5712 VN1)"
557 :short-name "VSCII"
558 :code-space [0 255]
559 :map "VSCII")
560
561 (define-charset-alias 'tcvn-5712 'vscii)
562
563 ;; Fixme: see note in tcvn.map about combining characters
564 (define-charset 'vscii-2
565 "VSCII-2 (TCVN-5712 VN2)"
566 :code-space [0 255]
567 :map "VSCII-2")
568
569 (define-charset 'koi8-r
570 "KOI8-R"
571 :short-name "KOI8-R"
572 :ascii-compatible-p t
573 :code-space [0 255]
574 :map "KOI8-R")
575
576 (define-charset-alias 'koi8 'koi8-r)
577
578 (define-charset 'alternativnyj
579 "ALTERNATIVNYJ"
580 :short-name "alternativnyj"
581 :ascii-compatible-p t
582 :code-space [0 255]
583 :map "ALTERNATIVNYJ")
584
585 (define-charset 'cp866
586 "CP866"
587 :short-name "cp866"
588 :ascii-compatible-p t
589 :code-space [0 255]
590 :map "IBM866")
591 (define-charset-alias 'ibm866 'cp866)
592
593 (define-charset 'koi8-u
594 "KOI8-U"
595 :short-name "KOI8-U"
596 :ascii-compatible-p t
597 :code-space [0 255]
598 :map "KOI8-U")
599
600 (define-charset 'koi8-t
601 "KOI8-T"
602 :short-name "KOI8-T"
603 :ascii-compatible-p t
604 :code-space [0 255]
605 :map "KOI8-T")
606
607 (define-charset 'georgian-ps
608 "GEORGIAN-PS"
609 :short-name "GEORGIAN-PS"
610 :ascii-compatible-p t
611 :code-space [0 255]
612 :map "KA-PS")
613
614 (define-charset 'georgian-academy
615 "GEORGIAN-ACADEMY"
616 :short-name "GEORGIAN-ACADEMY"
617 :ascii-compatible-p t
618 :code-space [0 255]
619 :map "KA-ACADEMY")
620
621 (define-charset 'windows-1250
622 "WINDOWS-1250 (Central Europe)"
623 :short-name "WINDOWS-1250"
624 :ascii-compatible-p t
625 :code-space [0 255]
626 :map "CP1250")
627 (define-charset-alias 'cp1250 'windows-1250)
628
629 (define-charset 'windows-1251
630 "WINDOWS-1251 (Cyrillic)"
631 :short-name "WINDOWS-1251"
632 :ascii-compatible-p t
633 :code-space [0 255]
634 :map "CP1251")
635 (define-charset-alias 'cp1251 'windows-1251)
636
637 (define-charset 'windows-1252
638 "WINDOWS-1252 (Latin I)"
639 :short-name "WINDOWS-1252"
640 :ascii-compatible-p t
641 :code-space [0 255]
642 :map "CP1252")
643 (define-charset-alias 'cp1252 'windows-1252)
644
645 (define-charset 'windows-1253
646 "WINDOWS-1253 (Greek)"
647 :short-name "WINDOWS-1253"
648 :ascii-compatible-p t
649 :code-space [0 255]
650 :map "CP1253")
651 (define-charset-alias 'cp1253 'windows-1253)
652
653 (define-charset 'windows-1254
654 "WINDOWS-1254 (Turkish)"
655 :short-name "WINDOWS-1254"
656 :ascii-compatible-p t
657 :code-space [0 255]
658 :map "CP1254")
659 (define-charset-alias 'cp1254 'windows-1254)
660
661 (define-charset 'windows-1255
662 "WINDOWS-1255 (Hebrew)"
663 :short-name "WINDOWS-1255"
664 :ascii-compatible-p t
665 :code-space [0 255]
666 :map "CP1255")
667 (define-charset-alias 'cp1255 'windows-1255)
668
669 (define-charset 'windows-1256
670 "WINDOWS-1256 (Arabic)"
671 :short-name "WINDOWS-1256"
672 :ascii-compatible-p t
673 :code-space [0 255]
674 :map "CP1256")
675 (define-charset-alias 'cp1256 'windows-1256)
676
677 (define-charset 'windows-1257
678 "WINDOWS-1257 (Baltic)"
679 :short-name "WINDOWS-1257"
680 :ascii-compatible-p t
681 :code-space [0 255]
682 :map "CP1257")
683 (define-charset-alias 'cp1257 'windows-1257)
684
685 (define-charset 'windows-1258
686 "WINDOWS-1258 (Viet Nam)"
687 :short-name "WINDOWS-1258"
688 :ascii-compatible-p t
689 :code-space [0 255]
690 :map "CP1258")
691 (define-charset-alias 'cp1258 'windows-1258)
692
693 (define-charset 'next
694 "NEXT"
695 :short-name "NEXT"
696 :ascii-compatible-p t
697 :code-space [0 255]
698 :map "NEXTSTEP")
699
700 (define-charset 'cp1125
701 "CP1125"
702 :short-name "CP1125"
703 :code-space [0 255]
704 :ascii-compatible-p t
705 :map "CP1125")
706 (define-charset-alias 'ruscii 'cp1125)
707 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
708 (define-charset-alias 'cp866u 'cp1125)
709
710 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
711 ;; shows this as not ASCII compatible, with various graphics in
712 ;; 0x01-0x1F.
713 (define-charset 'cp437
714 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
715 :short-name "CP437"
716 :code-space [0 255]
717 :ascii-compatible-p t
718 :map "IBM437")
719
720 (define-charset 'cp720
721 "CP720 (Arabic)"
722 :short-name "CP720"
723 :code-space [0 255]
724 :ascii-compatible-p t
725 :map "CP720")
726
727 (define-charset 'cp737
728 "CP737 (PC Greek)"
729 :short-name "CP737"
730 :code-space [0 255]
731 :ascii-compatible-p t
732 :map "CP737")
733
734 (define-charset 'cp775
735 "CP775 (PC Baltic)"
736 :short-name "CP775"
737 :code-space [0 255]
738 :ascii-compatible-p t
739 :map "CP775")
740
741 (define-charset 'cp851
742 "CP851 (Greek)"
743 :short-name "CP851"
744 :code-space [0 255]
745 :ascii-compatible-p t
746 :map "IBM851")
747
748 (define-charset 'cp852
749 "CP852 (MS-DOS Latin-2)"
750 :short-name "CP852"
751 :code-space [0 255]
752 :ascii-compatible-p t
753 :map "IBM852")
754
755 (define-charset 'cp855
756 "CP855 (IBM Cyrillic)"
757 :short-name "CP855"
758 :code-space [0 255]
759 :ascii-compatible-p t
760 :map "IBM855")
761
762 (define-charset 'cp857
763 "CP857 (IBM Turkish)"
764 :short-name "CP857"
765 :code-space [0 255]
766 :ascii-compatible-p t
767 :map "IBM857")
768
769 (define-charset 'cp858
770 "CP858 (Multilingual Latin I + Euro)"
771 :short-name "CP858"
772 :code-space [0 255]
773 :ascii-compatible-p t
774 :map "CP858")
775 (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
776
777 (define-charset 'cp860
778 "CP860 (MS-DOS Portuguese)"
779 :short-name "CP860"
780 :code-space [0 255]
781 :ascii-compatible-p t
782 :map "IBM860")
783
784 (define-charset 'cp861
785 "CP861 (MS-DOS Icelandic)"
786 :short-name "CP861"
787 :code-space [0 255]
788 :ascii-compatible-p t
789 :map "IBM861")
790
791 (define-charset 'cp862
792 "CP862 (PC Hebrew)"
793 :short-name "CP862"
794 :code-space [0 255]
795 :ascii-compatible-p t
796 :map "IBM862")
797
798 (define-charset 'cp863
799 "CP863 (MS-DOS Canadian French)"
800 :short-name "CP863"
801 :code-space [0 255]
802 :ascii-compatible-p t
803 :map "IBM863")
804
805 (define-charset 'cp864
806 "CP864 (PC Arabic)"
807 :short-name "CP864"
808 :code-space [0 255]
809 :ascii-compatible-p t
810 :map "IBM864")
811
812 (define-charset 'cp865
813 "CP865 (MS-DOS Nordic)"
814 :short-name "CP865"
815 :code-space [0 255]
816 :ascii-compatible-p t
817 :map "IBM865")
818
819 (define-charset 'cp869
820 "CP869 (IBM Modern Greek)"
821 :short-name "CP869"
822 :code-space [0 255]
823 :ascii-compatible-p t
824 :map "IBM869")
825
826 (define-charset 'cp874
827 "CP874 (IBM Thai)"
828 :short-name "CP874"
829 :code-space [0 255]
830 :ascii-compatible-p t
831 :map "IBM874")
832
833 ;; For Arabic, we need three different types of character sets.
834 ;; Digits are of direction left-to-right and of width 1-column.
835 ;; Others are of direction right-to-left and of width 1-column or
836 ;; 2-column.
837 (define-charset 'arabic-digit
838 "Arabic digit"
839 :short-name "Arabic digit"
840 :iso-final-char ?2
841 :emacs-mule-id 164
842 :supplementary-p t
843 :code-space [34 42]
844 :code-offset #x0600)
845
846 (define-charset 'arabic-1-column
847 "Arabic 1-column"
848 :short-name "Arabic 1-col"
849 :long-name "Arabic 1-column"
850 :iso-final-char ?3
851 :emacs-mule-id 165
852 :supplementary-p t
853 :code-space [33 126]
854 :code-offset #x200100)
855
856 (define-charset 'arabic-2-column
857 "Arabic 2-column"
858 :short-name "Arabic 2-col"
859 :long-name "Arabic 2-column"
860 :iso-final-char ?4
861 :emacs-mule-id 224
862 :supplementary-p t
863 :code-space [33 126]
864 :code-offset #x200180)
865
866 ;; Lao script.
867 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
868 ;; Not all of them are defined in Unicode.
869 (define-charset 'lao
870 "Lao characters (ISO10646 0E81..0EDF)"
871 :short-name "Lao"
872 :iso-final-char ?1
873 :emacs-mule-id 167
874 :supplementary-p t
875 :code-space [33 126]
876 :code-offset #x0E81)
877
878 (define-charset 'mule-lao
879 "Lao characters (ISO10646 0E81..0EDF)"
880 :short-name "Lao"
881 :code-space [0 255]
882 :supplementary-p t
883 :superset '(ascii eight-bit-control (lao . 128)))
884
885
886 ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
887 ;; not assigned. They are automatically converted to each Indian
888 ;; script which IS-13194 supports.
889
890 (define-charset 'indian-is13194
891 "Generic Indian charset for data exchange with IS 13194"
892 :short-name "IS 13194"
893 :long-name "Indian IS 13194"
894 :iso-final-char ?5
895 :emacs-mule-id 225
896 :supplementary-p t
897 :code-space [33 126]
898 :code-offset #x180000)
899
900 (let ((code-offset #x180100))
901 (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
902 oriya kannada malayalam gujarati punjabi))
903 (define-charset (intern (format "%s-cdac" script))
904 (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
905 (capitalize (symbol-name script)))
906 :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
907 :supplementary-p t
908 :code-space [0 255]
909 :code-offset code-offset)
910 (setq code-offset (+ code-offset #x100)))
911
912 (dolist (script '(devanagari bengali punjabi gujarati
913 oriya tamil telugu kannada malayalam))
914 (define-charset (intern (format "%s-akruti" script))
915 (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
916 (capitalize (symbol-name script)))
917 :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
918 :supplementary-p t
919 :code-space [0 255]
920 :code-offset code-offset)
921 (setq code-offset (+ code-offset #x100))))
922
923 (define-charset 'indian-glyph
924 "Glyphs for Indian characters."
925 :short-name "Indian glyph"
926 :iso-final-char ?4
927 :emacs-mule-id 240
928 :supplementary-p t
929 :code-space [32 127 32 127]
930 :code-offset #x180100)
931
932 ;; Actual Glyph for 1-column width.
933 (define-charset 'indian-1-column
934 "Indian charset for 1-column width glyphs."
935 :short-name "Indian 1-col"
936 :long-name "Indian 1 Column"
937 :iso-final-char ?6
938 :emacs-mule-id 251
939 :supplementary-p t
940 :code-space [33 126 33 126]
941 :code-offset #x184000)
942
943 ;; Actual Glyph for 2-column width.
944 (define-charset 'indian-2-column
945 "Indian charset for 2-column width glyphs."
946 :short-name "Indian 2-col"
947 :long-name "Indian 2 Column"
948 :iso-final-char ?5
949 :emacs-mule-id 251
950 :supplementary-p t
951 :code-space [33 126 33 126]
952 :code-offset #x184000)
953
954 (define-charset 'tibetan
955 "Tibetan characters"
956 :iso-final-char ?7
957 :short-name "Tibetan 2-col"
958 :long-name "Tibetan 2 column"
959 :iso-final-char ?7
960 :emacs-mule-id 252
961 :unify-map "MULE-tibetan"
962 :supplementary-p t
963 :code-space [33 126 33 37]
964 :code-offset #x190000)
965
966 (define-charset 'tibetan-1-column
967 "Tibetan 1 column glyph"
968 :short-name "Tibetan 1-col"
969 :long-name "Tibetan 1 column"
970 :iso-final-char ?8
971 :emacs-mule-id 241
972 :supplementary-p t
973 :code-space [33 126 33 37]
974 :code-offset #x190000)
975
976 ;; Subsets of Unicode.
977 (define-charset 'mule-unicode-2500-33ff
978 "Unicode characters of the range U+2500..U+33FF."
979 :short-name "Unicode subset 2"
980 :long-name "Unicode subset (U+2500..U+33FF)"
981 :iso-final-char ?2
982 :emacs-mule-id 242
983 :supplementary-p t
984 :code-space [#x20 #x7f #x20 #x47]
985 :code-offset #x2500)
986
987 (define-charset 'mule-unicode-e000-ffff
988 "Unicode characters of the range U+E000..U+FFFF."
989 :short-name "Unicode subset 3"
990 :long-name "Unicode subset (U+E000+FFFF)"
991 :iso-final-char ?3
992 :emacs-mule-id 243
993 :supplementary-p t
994 :code-space [#x20 #x7F #x20 #x75]
995 :code-offset #xE000
996 :max-code 30015) ; U+FFFF
997
998 (define-charset 'mule-unicode-0100-24ff
999 "Unicode characters of the range U+0100..U+24FF."
1000 :short-name "Unicode subset"
1001 :long-name "Unicode subset (U+0100..U+24FF)"
1002 :iso-final-char ?1
1003 :emacs-mule-id 244
1004 :supplementary-p t
1005 :code-space [#x20 #x7F #x20 #x7F]
1006 :code-offset #x100)
1007
1008 (define-charset 'unicode-bmp
1009 "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1010 :short-name "Unicode BMP"
1011 :code-space [0 255 0 255]
1012 :code-offset 0)
1013
1014 (define-charset 'unicode-smp
1015 "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1016 :short-name "Unicode SMP "
1017 :code-space [0 255 0 255]
1018 :code-offset #x10000)
1019
1020 (define-charset 'unicode-sip
1021 "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1022 :short-name "Unicode SIP"
1023 :code-space [0 255 0 255]
1024 :code-offset #x20000)
1025
1026 (define-charset 'unicode-ssp
1027 "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1028 :short-name "Unicode SSP"
1029 :code-space [0 255 0 255]
1030 :code-offset #xE0000)
1031
1032 (define-charset 'ethiopic
1033 "Ethiopic characters for Amharic and Tigrigna."
1034 :short-name "Ethiopic"
1035 :long-name "Ethiopic characters"
1036 :iso-final-char ?3
1037 :emacs-mule-id 245
1038 :supplementary-p t
1039 :unify-map "MULE-ethiopic"
1040 :code-space [33 126 33 126]
1041 :code-offset #x1A0000)
1042
1043 (define-charset 'mac-roman
1044 "Mac Roman charset"
1045 :short-name "Mac Roman"
1046 :ascii-compatible-p t
1047 :code-space [0 255]
1048 :map "MACINTOSH")
1049
1050 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1051 (define-charset 'ebcdic-us
1052 "US version of EBCDIC"
1053 :short-name "EBCDIC-US"
1054 :code-space [0 255]
1055 :mime-charset 'ebcdic-us
1056 :map "EBCDICUS")
1057
1058 (define-charset 'ebcdic-uk
1059 "UK version of EBCDIC"
1060 :short-name "EBCDIC-UK"
1061 :code-space [0 255]
1062 :mime-charset 'ebcdic-uk
1063 :map "EBCDICUK")
1064
1065 (define-charset 'ibm1047
1066 ;; Says groff:
1067 "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1068 :short-name "IBM1047"
1069 :code-space [0 255]
1070 :mime-charset 'ibm1047
1071 :map "IBM1047")
1072 (define-charset-alias 'cp1047 'ibm1047)
1073
1074 (define-charset 'hp-roman8
1075 "Encoding used by Hewlet-Packard printer software"
1076 :short-name "HP-ROMAN8"
1077 :ascii-compatible-p t
1078 :code-space [0 255]
1079 :map "HP-ROMAN8")
1080
1081 ;; To make a coding system with this, a pre-write-conversion should
1082 ;; account for the commented-out multi-valued code points in
1083 ;; stdenc.map.
1084 (define-charset 'adobe-standard-encoding
1085 "Adobe `standard encoding' used in PostScript"
1086 :short-name "ADOBE-STANDARD-ENCODING"
1087 :code-space [#x20 255]
1088 :map "stdenc")
1089
1090 (define-charset 'symbol
1091 "Adobe symbol encoding used in PostScript"
1092 :short-name "ADOBE-SYMBOL"
1093 :code-space [#x20 255]
1094 :map "symbol")
1095
1096 (define-charset 'ibm850
1097 "DOS codepage 850 (Latin-1)"
1098 :short-name "IBM850"
1099 :ascii-compatible-p t
1100 :code-space [0 255]
1101 :map "IBM850")
1102 (define-charset-alias 'cp850 'ibm850)
1103
1104 (define-charset 'mik
1105 "Bulgarian DOS codepage"
1106 :short-name "MIK"
1107 :ascii-compatible-p t
1108 :code-space [0 255]
1109 :map "MIK")
1110
1111 (define-charset 'ptcp154
1112 "`Paratype' codepage (Asian Cyrillic)"
1113 :short-name "PT154"
1114 :ascii-compatible-p t
1115 :code-space [0 255]
1116 :mime-charset 'pt154
1117 :map "PTCP154")
1118 (define-charset-alias 'pt154 'ptcp154)
1119 (define-charset-alias 'cp154 'ptcp154)
1120
1121 (define-charset 'gb18030-2-byte
1122 "GB18030 2-byte (0x814E..0xFEFE)"
1123 :code-space [#x40 #xFE #x81 #xFE]
1124 :supplementary-p t
1125 :map "GB180302")
1126
1127 (define-charset 'gb18030-4-byte-bmp
1128 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1129 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1130 :supplementary-p t
1131 :map "GB180304")
1132
1133 (define-charset 'gb18030-4-byte-smp
1134 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1135 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1136 :min-code '(#x9030 . #x8130)
1137 :max-code '(#xE332 . #x9A35)
1138 :supplementary-p t
1139 :code-offset #x10000)
1140
1141 (define-charset 'gb18030-4-byte-ext-1
1142 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1143 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1144 :min-code '(#x8431 . #xA530)
1145 :max-code '(#x8F39 . #xFE39)
1146 :supplementary-p t
1147 :code-offset #x200000 ; ... #x22484B
1148 )
1149
1150 (define-charset 'gb18030-4-byte-ext-2
1151 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1152 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1153 :min-code '(#xE332 . #x9A36)
1154 :max-code '(#xFE39 . #xFE39)
1155 :supplementary-p t
1156 :code-offset #x22484C ; ... #x279f93
1157 )
1158
1159 (define-charset 'gb18030
1160 "GB18030"
1161 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1162 :min-code 0
1163 :max-code '(#xFE39 . #xFE39)
1164 :superset '(ascii gb18030-2-byte
1165 gb18030-4-byte-bmp gb18030-4-byte-smp
1166 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1167
1168 (define-charset 'chinese-cns11643-15
1169 "CNS11643 Plane 15 Chinese Traditional"
1170 :short-name "CNS11643-15"
1171 :long-name "CNS11643-15 (Chinese traditional)"
1172 :code-space [33 126 33 126]
1173 :code-offset #x27A000)
1174
1175 (unify-charset 'chinese-gb2312)
1176 (unify-charset 'chinese-gbk)
1177 (unify-charset 'chinese-cns11643-1)
1178 (unify-charset 'chinese-cns11643-2)
1179 (unify-charset 'chinese-cns11643-3)
1180 (unify-charset 'chinese-cns11643-4)
1181 (unify-charset 'chinese-cns11643-5)
1182 (unify-charset 'chinese-cns11643-6)
1183 (unify-charset 'chinese-cns11643-7)
1184 (unify-charset 'big5)
1185 (unify-charset 'chinese-big5-1)
1186 (unify-charset 'chinese-big5-2)
1187 (unify-charset 'big5-hkscs)
1188 (unify-charset 'korean-ksc5601)
1189 (unify-charset 'vietnamese-viscii-lower)
1190 (unify-charset 'vietnamese-viscii-upper)
1191 (unify-charset 'chinese-sisheng)
1192 (unify-charset 'ipa)
1193 (unify-charset 'tibetan)
1194 (unify-charset 'ethiopic)
1195 (unify-charset 'japanese-jisx0208-1978)
1196 (unify-charset 'japanese-jisx0208)
1197 (unify-charset 'japanese-jisx0212)
1198 (unify-charset 'japanese-jisx0213-1)
1199 (unify-charset 'japanese-jisx0213-2)
1200
1201 \f
1202 ;; These are tables for translating characters on decoding and
1203 ;; encoding.
1204 ;; Fixme: these aren't used now -- should they be?
1205 (setq standard-translation-table-for-decode nil)
1206
1207 (setq standard-translation-table-for-encode nil)
1208 \f
1209 ;;; Make fundamental coding systems.
1210
1211 ;; The coding system `no-conversion' and `undecided' are already
1212 ;; defined in coding.c as below:
1213 ;;
1214 ;; (define-coding-system 'no-conversion
1215 ;; "..."
1216 ;; :coding-type 'raw-text
1217 ;; ...)
1218 ;; (define-coding-system 'undecided
1219 ;; "..."
1220 ;; :coding-type 'undecided
1221 ;; ...)
1222
1223 (define-coding-system-alias 'binary 'no-conversion)
1224 (define-coding-system-alias 'unix 'undecided-unix)
1225 (define-coding-system-alias 'dos 'undecided-dos)
1226 (define-coding-system-alias 'mac 'undecided-mac)
1227
1228 (define-coding-system 'raw-text
1229 "Raw text, which means text contains random 8-bit codes.
1230 Encoding text with this coding system produces the actual byte
1231 sequence of the text in buffers and strings. An exception is made for
1232 characters from the `eight-bit' character set. Each of them is encoded
1233 into a single byte.
1234
1235 When you visit a file with this coding, the file is read into a
1236 unibyte buffer as is (except for EOL format), thus each byte of a file
1237 is treated as a character."
1238 :coding-type 'raw-text
1239 :for-unibyte t
1240 :mnemonic ?t)
1241
1242 (define-coding-system 'no-conversion-multibyte
1243 "Like `no-conversion' but don't read a file into a unibyte buffer."
1244 :coding-type 'raw-text
1245 :eol-type 'unix
1246 :mnemonic ?=)
1247
1248 (define-coding-system 'iso-latin-1
1249 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1250 :coding-type 'charset
1251 :mnemonic ?1
1252 :charset-list '(iso-8859-1)
1253 :mime-charset 'iso-8859-1)
1254
1255 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1256 (define-coding-system-alias 'latin-1 'iso-latin-1)
1257
1258 ;; Coding systems not specific to each language environment.
1259
1260 (define-coding-system 'emacs-mule
1261 "Emacs 21 internal format used in buffer and string."
1262 :coding-type 'emacs-mule
1263 :charset-list 'emacs-mule
1264 :mnemonic ?M)
1265
1266 (define-coding-system 'utf-8
1267 "UTF-8 (no signature (BOM))"
1268 :coding-type 'utf-8
1269 :mnemonic ?U
1270 :charset-list '(unicode)
1271 :mime-charset 'utf-8)
1272
1273 (define-coding-system 'utf-8-with-signature
1274 "UTF-8 (with signature (BOM))"
1275 :coding-type 'utf-8
1276 :mnemonic ?U
1277 :charset-list '(unicode)
1278 :bom t)
1279
1280 (define-coding-system 'utf-8-auto
1281 "UTF-8 (auto-detect signature (BOM))"
1282 :coding-type 'utf-8
1283 :mnemonic ?U
1284 :charset-list '(unicode)
1285 :bom '(utf-8-with-signature . utf-8))
1286
1287 (define-coding-system-alias 'mule-utf-8 'utf-8)
1288
1289 (define-coding-system 'utf-8-emacs
1290 "Support for all Emacs characters (including non-Unicode characters)."
1291 :coding-type 'utf-8
1292 :mnemonic ?U
1293 :charset-list '(emacs))
1294
1295 ;; The encoding used internally. This encoding is meant to be able to save
1296 ;; any multibyte buffer without losing information. It can change between
1297 ;; Emacs releases, tho, so should only be used for internal files.
1298 (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1299
1300 (define-coding-system 'utf-16le
1301 "UTF-16LE (little endian, no signature (BOM))."
1302 :coding-type 'utf-16
1303 :mnemonic ?U
1304 :charset-list '(unicode)
1305 :endian 'little
1306 :mime-text-unsuitable t
1307 :mime-charset 'utf-16le)
1308
1309 (define-coding-system 'utf-16be
1310 "UTF-16BE (big endian, no signature (BOM))."
1311 :coding-type 'utf-16
1312 :mnemonic ?U
1313 :charset-list '(unicode)
1314 :endian 'big
1315 :mime-text-unsuitable t
1316 :mime-charset 'utf-16be)
1317
1318 (define-coding-system 'utf-16le-with-signature
1319 "UTF-16 (little endian, with signature (BOM))."
1320 :coding-type 'utf-16
1321 :mnemonic ?U
1322 :charset-list '(unicode)
1323 :bom t
1324 :endian 'little
1325 :mime-text-unsuitable t
1326 :mime-charset 'utf-16)
1327
1328 (define-coding-system 'utf-16be-with-signature
1329 "UTF-16 (big endian, with signature (BOM))."
1330 :coding-type 'utf-16
1331 :mnemonic ?U
1332 :charset-list '(unicode)
1333 :bom t
1334 :endian 'big
1335 :mime-text-unsuitable t
1336 :mime-charset 'utf-16)
1337
1338 (define-coding-system 'utf-16
1339 "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1340 :coding-type 'utf-16
1341 :mnemonic ?U
1342 :charset-list '(unicode)
1343 :bom '(utf-16le-with-signature . utf-16be-with-signature)
1344 :endian 'big
1345 :mime-text-unsuitable t
1346 :mime-charset 'utf-16)
1347
1348 ;; Backwards compatibility (old names, also used by Mule-UCS). We
1349 ;; prefer the MIME names.
1350 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1351 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1352
1353
1354 (define-coding-system 'iso-2022-7bit
1355 "ISO 2022 based 7-bit encoding using only G0."
1356 :coding-type 'iso-2022
1357 :mnemonic ?J
1358 :charset-list 'iso-2022
1359 :designation [(ascii t) nil nil nil]
1360 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1361
1362 (define-coding-system 'iso-2022-7bit-ss2
1363 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1364 :coding-type 'iso-2022
1365 :mnemonic ?$
1366 :charset-list 'iso-2022
1367 :designation [(ascii 94) nil (nil 96) nil]
1368 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1369 designation single-shift composition))
1370
1371 (define-coding-system 'iso-2022-7bit-lock
1372 "ISO-2022 coding system using Locking-Shift for 96-charset."
1373 :coding-type 'iso-2022
1374 :mnemonic ?&
1375 :charset-list 'iso-2022
1376 :designation [(ascii 94) (nil 96) nil nil]
1377 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1378 designation locking-shift composition))
1379
1380 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1381
1382 (define-coding-system 'iso-2022-7bit-lock-ss2
1383 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1384 :coding-type 'iso-2022
1385 :mnemonic ?i
1386 :charset-list '(ascii
1387 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1388 korean-ksc5601
1389 chinese-gb2312
1390 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1391 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1392 chinese-cns11643-7)
1393 :designation [(ascii 94)
1394 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1395 (nil chinese-cns11643-2)
1396 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1397 chinese-cns11643-6 chinese-cns11643-7)]
1398 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1399 single-shift init-bol))
1400
1401 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1402
1403 (define-coding-system 'iso-2022-8bit-ss2
1404 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1405 :coding-type 'iso-2022
1406 :mnemonic ?@
1407 :charset-list 'iso-2022
1408 :designation [(ascii 94) nil (nil 96) nil]
1409 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1410
1411 (define-coding-system 'compound-text
1412 "Compound text based generic encoding.
1413 This coding system is an extension of X's \"Compound Text Encoding\".
1414 It encodes many characters using the normal ISO-2022 designation sequences,
1415 but it doesn't support extended segments of CTEXT."
1416 :coding-type 'iso-2022
1417 :mnemonic ?x
1418 :charset-list 'iso-2022
1419 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1420 :flags '(ascii-at-eol ascii-at-cntl long-form
1421 designation locking-shift single-shift composition)
1422 ;; Fixme: this isn't a valid MIME charset and has to be
1423 ;; special-cased elsewhere -- fx
1424 :mime-charset 'x-ctext)
1425
1426 (define-coding-system-alias 'x-ctext 'compound-text)
1427 (define-coding-system-alias 'ctext 'compound-text)
1428
1429 ;; Same as compound-text, but doesn't produce composition escape
1430 ;; sequences. Used in post-read and pre-write conversions of
1431 ;; compound-text-with-extensions, see mule.el. Note that this should
1432 ;; not have a mime-charset property, to prevent it from showing up
1433 ;; close to the beginning of coding systems ordered by priority.
1434 (define-coding-system 'ctext-no-compositions
1435 "Compound text based generic encoding.
1436
1437 Like `compound-text', but does not produce escape sequences for compositions."
1438 :coding-type 'iso-2022
1439 :mnemonic ?x
1440 :charset-list 'iso-2022
1441 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1442 :flags '(ascii-at-eol ascii-at-cntl
1443 designation locking-shift single-shift))
1444
1445 (define-coding-system 'compound-text-with-extensions
1446 "Compound text encoding with ICCCM Extended Segment extensions.
1447
1448 See the variables `ctext-standard-encodings' and
1449 `ctext-non-standard-encodings-alist' for the detail about how
1450 extended segments are handled.
1451
1452 This coding system should be used only for X selections. It is inappropriate
1453 for decoding and encoding files, process I/O, etc."
1454 :coding-type 'iso-2022
1455 :mnemonic ?x
1456 :charset-list 'iso-2022
1457 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1458 :flags '(ascii-at-eol ascii-at-cntl long-form
1459 designation locking-shift single-shift)
1460 :post-read-conversion 'ctext-post-read-conversion
1461 :pre-write-conversion 'ctext-pre-write-conversion
1462 :mime-charset 'x-ctext)
1463
1464 (define-coding-system-alias
1465 'x-ctext-with-extensions 'compound-text-with-extensions)
1466 (define-coding-system-alias
1467 'ctext-with-extensions 'compound-text-with-extensions)
1468
1469 (define-coding-system 'us-ascii
1470 "Encode ASCII as-is and encode non-ASCII characters to `?'."
1471 :coding-type 'charset
1472 :mnemonic ?-
1473 :charset-list '(ascii)
1474 :default-char ??
1475 :mime-charset 'us-ascii)
1476
1477 (define-coding-system-alias 'iso-safe 'us-ascii)
1478
1479 (define-coding-system 'utf-7
1480 "UTF-7 encoding of Unicode (RFC 2152)."
1481 :coding-type 'utf-8
1482 :mnemonic ?U
1483 :mime-charset 'utf-7
1484 :charset-list '(unicode)
1485 :pre-write-conversion 'utf-7-pre-write-conversion
1486 :post-read-conversion 'utf-7-post-read-conversion)
1487
1488 (define-coding-system 'utf-7-imap
1489 "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1490 :coding-type 'utf-8
1491 :mnemonic ?u
1492 :charset-list '(unicode)
1493 :pre-write-conversion 'utf-7-imap-pre-write-conversion
1494 :post-read-conversion 'utf-7-imap-post-read-conversion)
1495
1496 ;; Use us-ascii for terminal output if some other coding system is not
1497 ;; specified explicitly.
1498 (set-safe-terminal-coding-system-internal 'us-ascii)
1499
1500 ;; The other coding-systems are defined in each language specific
1501 ;; files under lisp/language.
1502
1503 ;; Normally, set coding system to `undecided' before reading a file.
1504 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1505 ;; but we regard them as containing multibyte characters.
1506 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1507
1508 (setq file-coding-system-alist
1509 (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1510 '(("\\.elc\\'" . utf-8-emacs)
1511 ("\\.utf\\(-8\\)?\\'" . utf-8)
1512 ("\\.xml\\'" . xml-find-file-coding-system)
1513 ;; We use raw-text for reading loaddefs.el so that if it
1514 ;; happens to have DOS or Mac EOLs, they are converted to
1515 ;; newlines. This is required to make the special treatment
1516 ;; of the "\ newline" combination in loaddefs.el, which marks
1517 ;; the beginning of a doc string, work.
1518 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1519 ("\\.tar\\'" . (no-conversion . no-conversion))
1520 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1521 ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1522 ("" . (undecided . nil)))))
1523
1524 \f
1525 ;;; Setting coding categories and their priorities.
1526
1527 ;; This setting is just to read an Emacs Lisp source files which
1528 ;; contain multilingual text while dumping Emacs. More appropriate
1529 ;; values are set by the command `set-language-environment' for each
1530 ;; language environment.
1531
1532 (set-coding-system-priority
1533 'iso-latin-1
1534 'utf-8
1535 'iso-2022-7bit
1536 )
1537
1538 \f
1539 ;;; Miscellaneous settings.
1540
1541 ;; Make all multibyte characters self-insert.
1542 (set-char-table-range (nth 1 global-map)
1543 (cons 128 (max-char))
1544 'self-insert-command)
1545
1546 (aset latin-extra-code-table ?\221 t)
1547 (aset latin-extra-code-table ?\222 t)
1548 (aset latin-extra-code-table ?\223 t)
1549 (aset latin-extra-code-table ?\224 t)
1550 (aset latin-extra-code-table ?\225 t)
1551 (aset latin-extra-code-table ?\226 t)
1552
1553 ;; The old code-pages library is obsoleted by coding systems based on
1554 ;; the charsets defined in this file but might be required by user
1555 ;; code.
1556 (provide 'code-pages)
1557
1558 ;;; mule-conf.el ends here