;;; code-pages.el --- coding systems for assorted codepages -*-coding: utf-8;-*-
-;; Copyright (C) 2001 Free Software Foundation, Inc.
+;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
;; Author: Dave Love <fx@gnu.org>
;; Keywords: i18n
;; This file is part of GNU Emacs.
-;; This file is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
-;; This file is distributed in the hope that it will be useful,
+;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to
-;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; along with GNU Emacs; see the file COPYING. If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
;;; Commentary:
;; Those covered are: cp437, cp737, cp720, cp775, cp850, cp851, cp852,
;; cp855, cp857, cp860, cp861, cp862, cp863, cp864, cp865, cp866,
-;; cp869, cp874, cp1125, windows-1250, windows-1251, windows-1252,
-;; windows-1253, windows-1254, windows-1255, windows-1256,
-;; windows-1257, windows-1258, next, koi8-r, koi8-u, iso-8859-6,
-;; iso-8859-10, iso-8859-12, iso-8859-16, koi8-t, georgian-ps. This
-;; is meant to include all the single-byte ones relevant to GNU (used
-;; in glibc-defined locales); we don't yet get all the multibyte ones
-;; in base Emacs.
+;; cp869, cp874, cp1125, windows-1250, windows-1253, windows-1254,
+;; windows-1255, windows-1256, windows-1257, windows-1258, next,
+;; iso-8859-6, iso-8859-10, iso-8859-11, iso-8859-16, koi8-t,
+;; georgian-ps. This is meant to include all the single-byte ones
+;; relevant to GNU (used in glibc-defined locales); we don't yet get
+;; all the multibyte ones in base Emacs.
-;; Note that koi8-r and cp866 (alternativnyj) clash with the
-;; iso8859-5-based versions in cyrillic.el (which should be changed),
-;; and others can clash with definitions in codepage.el; we try to
-;; avoid damage from that. A few CPs from codepage.el (770, 773, 774)
-;; aren't covered (in the absence of translation tables to Unicode).
+;; Note that various of these can clash with definitions in
+;; codepage.el; we try to avoid damage from that. A few CPs from
+;; codepage.el (770, 773, 774) aren't covered (in the absence of
+;; translation tables to Unicode).
;; Compile this, to avoid loading `ccl' at runtime.
;;; Code:
-(defun cp-make-translation-table (v)
+;; The defsubsts here are just so that language files can use
+;; `cp-make-coding-system' and not require functions from this file
+;; at runtime.
+
+(defsubst cp-make-translation-table (v)
"Return a translation table made from 128-long vector V.
V comprises characters encodable by mule-utf-8."
(let ((encoding-vector (make-vector 256 0)))
(aset encoding-vector i i))
(dotimes (i 128)
(aset encoding-vector (+ i 128) (aref v i)))
- (make-translation-table-from-vector encoding-vector)))
+ ;; Add equivalent characters to the encoder so that we can unify
+ ;; on encoding.
+ (let* ((tab (make-translation-table-from-vector encoding-vector))
+ ;; Translation table used for encoding:
+ (encode-table (char-table-extra-slot tab 0)))
+ (map-char-table (lambda (c v)
+ (if v
+ (let ((c1 (aref encode-table v)))
+ (if c1 ; we encode that unicode
+ (aset encode-table c c1)))))
+ ucs-mule-to-mule-unicode)
+ tab)))
-(defun cp-valid-codes (v)
+(defsubst cp-valid-codes (v)
"Derive a valid-codes list for translation vector V.
See `make-coding-system'."
(let (pairs
(if start (push (cons start end) pairs))
(nreverse pairs)))
-(defun cp-fix-safe-chars (cs)
- "Remove `char-coding-system-table' entries from previous definition of CS.
-CS is a base coding system or alias."
- (when (coding-system-p cs)
- (let ((chars (coding-system-get cs 'safe-chars)))
- (map-char-table
- (lambda (k v)
- (if (and v (not (eq v t)))
- (aset char-coding-system-table
- k
- (remq cs (aref char-coding-system-table v)))))
- chars))))
-
-;; Fix things that have been, or might be done by codepage.el.
+;; Fix things that have been, or might be, done by codepage.el.
(eval-after-load "codepage"
'(progn
- (dolist (cs '(cp857 cp861 cp1253 cp852 cp866 cp437 cp855 cp869 cp775
- cp862 cp864 cp1250 cp863 cp865 cp1251 cp737 cp1257 cp850
- cp860 cp851 720))
- (cp-fix-safe-chars cs))
-
;; Semi-dummy version for the stuff in codepage.el which we don't
;; define here. (Used by mule-diag.)
(defun cp-supported-codepages ()
cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp))))))
) ; eval-after-load
-;; Somewhat amended from the version in mule-diag.el, needed below.
-(defvar non-iso-charset-alist
- `((mac-roman
- nil
- mac-roman-decoder
- ((0 255)))
- (viscii
- (ascii vietnamese-viscii-lower vietnamese-viscii-upper)
- viet-viscii-nonascii-translation-table
- ((0 255)))
- (big5
- (ascii chinese-big5-1 chinese-big5-2)
- decode-big5-char
- ((32 127)
- ((?\xA1 ?\xFE) . (?\x40 ?\x7E ?\xA1 ?\xFE))))
- (sjis
- (ascii katakana-jisx0201 japanese-jisx0208)
- decode-sjis-char
- ((32 127 ?\xA1 ?\xDF)
- ((?\x81 ?\x9F ?\xE0 ?\xEF) . (?\x40 ?\x7E ?\x80 ?\xFC))))))
-
;; Macro to allow ccl compilation at byte-compile time, avoiding
;; loading ccl.
;;;###autoload
(defmacro cp-make-coding-system (name v &optional doc-string mnemonic)
"Make coding system NAME for and 8-bit, extended-ASCII character set.
V is a 128-long vector of characters to translate the upper half of
-the charactert set. DOC-STRING and MNEMONIC are used as the
+the character set. DOC-STRING and MNEMONIC are used as the
corresponding args of `make-coding-system'. If MNEMONIC isn't given,
-?* is used."
+?* is used.
+Return an updated `non-iso-charset-alist'."
(let* ((encoder (intern (format "encode-%s" name)))
(decoder (intern (format "decode-%s" name)))
(ccl-decoder
(r0 = ,(charset-id 'eight-bit-control))
(r0 = ,(charset-id 'eight-bit-graphic))))
(translate-character ,decoder r0 r1)
+ ;; Allow fragmentation on decoding -- relevant for
+ ;; Cyrillic, Greek and, possibly Arabic and Hebrew.
+ (translate-character utf-translation-table-for-decode r0 r1)
(write-multibyte-character r0 r1)
(repeat))))))
(ccl-encoder
((loop
(read-multibyte-character r0 r1)
(translate-character ,encoder r0 r1)
+ (if (r0 != ,(charset-id 'ascii))
+ (if (r0 != ,(charset-id 'eight-bit-graphic))
+ (if (r0 != ,(charset-id 'eight-bit-control))
+ (r1 = ??))))
(write-repeat r1)))))))
`(let ((translation-table (cp-make-translation-table ,v))
(codes (cp-valid-codes ,v)))
(define-translation-table ',decoder translation-table)
(define-translation-table ',encoder
(char-table-extra-slot translation-table 0))
- (cp-fix-safe-chars ',name)
(make-coding-system
',name 4 ,(or mnemonic ?*)
(or ,doc-string (format "%s encoding" ',name))
(cons ,ccl-decoder ,ccl-encoder)
(list (cons 'safe-chars (get ',encoder 'translation-table))
(cons 'valid-codes codes)
- (cons 'mime-charset ',name)))
+ (cons 'mime-charset ',name)
+ ;; For Quail translation. Fixme: this should really be
+ ;; a separate table that only translates the coding
+ ;; system's safe-chars.
+ (cons 'translation-table-for-input ,ucs-mule-to-mule-unicode)))
(push (list ',name
nil ; charset list
',decoder
(list l)))
non-iso-charset-alist))))
+(eval-when-compile (defvar non-iso-charset-alist))
;; These tables were mostly derived by running somthing like
;; `recode -f cpxxx/..utf-8' on a binary file filled by
?\■
?\ ])
-;; ;; This should be the same as cyrillic-alternativnyj,
-;; ;; (<URL:http://czyborra.com/charsets/cyrillic.html>), but code point
-;; ;; 255 in the cyrillic.el alternativnyj table is `№', i.e. point 240
-;; ;; in 8859-5, not no-break space as below; `№' should be at point 252.
-;; (cp-make-coding-system
-;; cp866
-;; [?\А
-;; ?\Б
-;; ?\В
-;; ?\Г
-;; ?\Д
-;; ?\Е
-;; ?\Ж
-;; ?\З
-;; ?\И
-;; ?\Й
-;; ?\К
-;; ?\Л
-;; ?\М
-;; ?\Н
-;; ?\О
-;; ?\П
-;; ?\Р
-;; ?\С
-;; ?\Т
-;; ?\У
-;; ?\Ф
-;; ?\Х
-;; ?\Ц
-;; ?\Ч
-;; ?\Ш
-;; ?\Щ
-;; ?\Ъ
-;; ?\Ы
-;; ?\Ь
-;; ?\Э
-;; ?\Ю
-;; ?\Я
-;; ?\а
-;; ?\б
-;; ?\в
-;; ?\г
-;; ?\д
-;; ?\е
-;; ?\ж
-;; ?\з
-;; ?\и
-;; ?\й
-;; ?\к
-;; ?\л
-;; ?\м
-;; ?\н
-;; ?\о
-;; ?\п
-;; ?\░
-;; ?\▒
-;; ?\▓
-;; ?\│
-;; ?\┤
-;; ?\╡
-;; ?\╢
-;; ?\╖
-;; ?\╕
-;; ?\╣
-;; ?\║
-;; ?\╗
-;; ?\╝
-;; ?\╜
-;; ?\╛
-;; ?\┐
-;; ?\└
-;; ?\┴
-;; ?\┬
-;; ?\├
-;; ?\─
-;; ?\┼
-;; ?\╞
-;; ?\╟
-;; ?\╚
-;; ?\╔
-;; ?\╩
-;; ?\╦
-;; ?\╠
-;; ?\═
-;; ?\╬
-;; ?\╧
-;; ?\╨
-;; ?\╤
-;; ?\╥
-;; ?\╙
-;; ?\╘
-;; ?\╒
-;; ?\╓
-;; ?\╫
-;; ?\╪
-;; ?\┘
-;; ?\┌
-;; ?\█
-;; ?\▄
-;; ?\▌
-;; ?\▐
-;; ?\▀
-;; ?\р
-;; ?\с
-;; ?\т
-;; ?\у
-;; ?\ф
-;; ?\х
-;; ?\ц
-;; ?\ч
-;; ?\ш
-;; ?\щ
-;; ?\ъ
-;; ?\ы
-;; ?\ь
-;; ?\э
-;; ?\ю
-;; ?\я
-;; ?\Ё
-;; ?\ё
-;; ?\Є
-;; ?\є
-;; ?\Ї
-;; ?\ї
-;; ?\Ў
-;; ?\ў
-;; ?\°
-;; ?\∙
-;; ?\·
-;; ?\√
-;; ?\№
-;; ?\¤
-;; ?\■
-;; ?\ ]
-;; "CP866 (Cyrillic Alternativnyj) encoding using Unicode."
-;; ?A)
-;; (define-coding-system-alias 'alternativnyj 'cp866)
-;; (cp-fix-safe-chars 'cyrillic-alternativnyj)
-;; (define-coding-system-alias 'cyrillic-alternativnyj 'cp866)
+(cp-make-coding-system
+ cp866
+ [?\А
+ ?\Б
+ ?\В
+ ?\Г
+ ?\Д
+ ?\Е
+ ?\Ж
+ ?\З
+ ?\И
+ ?\Й
+ ?\К
+ ?\Л
+ ?\М
+ ?\Н
+ ?\О
+ ?\П
+ ?\Р
+ ?\С
+ ?\Т
+ ?\У
+ ?\Ф
+ ?\Х
+ ?\Ц
+ ?\Ч
+ ?\Ш
+ ?\Щ
+ ?\Ъ
+ ?\Ы
+ ?\Ь
+ ?\Э
+ ?\Ю
+ ?\Я
+ ?\а
+ ?\б
+ ?\в
+ ?\г
+ ?\д
+ ?\е
+ ?\ж
+ ?\з
+ ?\и
+ ?\й
+ ?\к
+ ?\л
+ ?\м
+ ?\н
+ ?\о
+ ?\п
+ ?\░
+ ?\▒
+ ?\▓
+ ?\│
+ ?\┤
+ ?\╡
+ ?\╢
+ ?\╖
+ ?\╕
+ ?\╣
+ ?\║
+ ?\╗
+ ?\╝
+ ?\╜
+ ?\╛
+ ?\┐
+ ?\└
+ ?\┴
+ ?\┬
+ ?\├
+ ?\─
+ ?\┼
+ ?\╞
+ ?\╟
+ ?\╚
+ ?\╔
+ ?\╩
+ ?\╦
+ ?\╠
+ ?\═
+ ?\╬
+ ?\╧
+ ?\╨
+ ?\╤
+ ?\╥
+ ?\╙
+ ?\╘
+ ?\╒
+ ?\╓
+ ?\╫
+ ?\╪
+ ?\┘
+ ?\┌
+ ?\█
+ ?\▄
+ ?\▌
+ ?\▐
+ ?\▀
+ ?\р
+ ?\с
+ ?\т
+ ?\у
+ ?\ф
+ ?\х
+ ?\ц
+ ?\ч
+ ?\ш
+ ?\щ
+ ?\ъ
+ ?\ы
+ ?\ь
+ ?\э
+ ?\ю
+ ?\я
+ ?\Ё
+ ?\ё
+ ?\Є
+ ?\є
+ ?\Ї
+ ?\ї
+ ?\Ў
+ ?\ў
+ ?\°
+ ?\∙
+ ?\·
+ ?\√
+ ?\№
+ ?\¤
+ ?\■
+ ?\ ]
+ "CP866 (Cyrillic)."
+ ?A)
(cp-make-coding-system
cp869
?\ţ
?\˙])
-;; be_BY, bg_BG
(cp-make-coding-system
- windows-1251
- [?\Ђ
- ?\Ѓ
+ windows-1253
+ [?\€
+ nil
?\‚
- ?\ѓ
+ ?\ƒ
?\„
?\…
?\†
?\‡
- ?\€
+ nil
?\‰
- ?\Љ
+ nil
?\‹
- ?\Њ
- ?\Ќ
- ?\Ћ
- ?\Џ
- ?\ђ
+ nil
+ nil
+ nil
+ nil
+ nil
?\‘
?\’
?\“
?\—
nil
?\™
- ?\љ
+ nil
?\›
- ?\њ
- ?\ќ
- ?\ћ
- ?\џ
+ nil
+ nil
+ nil
+ nil
?\
- ?\Ў
- ?\ў
- ?\Ј
+ ?\΅
+ ?\Ά
+ ?\£
?\¤
- ?\Ґ
+ ?\¥
?\¦
?\§
- ?\Ё
+ ?\¨
?\©
- ?\Є
+ nil
?\«
?\¬
?\
?\®
- ?\Ї
+ ?\―
?\°
?\±
- ?\І
- ?\і
- ?\ґ
+ ?\²
+ ?\³
+ ?\΄
?\µ
?\¶
?\·
- ?\ё
- ?\№
- ?\є
- ?\»
- ?\ј
- ?\Ѕ
- ?\ѕ
- ?\ї
- ?\А
- ?\Б
- ?\В
- ?\Г
- ?\Д
- ?\Е
- ?\Ж
- ?\З
- ?\И
- ?\Й
- ?\К
- ?\Л
- ?\М
- ?\Н
- ?\О
- ?\П
- ?\Р
- ?\С
- ?\Т
- ?\У
- ?\Ф
- ?\Х
- ?\Ц
- ?\Ч
- ?\Ш
- ?\Щ
- ?\Ъ
- ?\Ы
- ?\Ь
- ?\Э
- ?\Ю
- ?\Я
- ?\а
- ?\б
- ?\в
- ?\г
- ?\д
- ?\е
- ?\ж
- ?\з
- ?\и
- ?\й
- ?\к
- ?\л
- ?\м
- ?\н
- ?\о
- ?\п
- ?\р
- ?\с
- ?\т
- ?\у
- ?\ф
- ?\х
- ?\ц
- ?\ч
- ?\ш
- ?\щ
- ?\ъ
- ?\ы
- ?\ь
- ?\э
- ?\ю
- ?\я]
- nil ?b)
-
-(cp-make-coding-system
- windows-1252
- [?\€
- nil
- ?\‚
- ?\ƒ
- ?\„
- ?\…
- ?\†
- ?\‡
- ?\ˆ
- ?\‰
- ?\Š
- ?\‹
- ?\Œ
- nil
- ?\Ž
- ?\ž
- nil
- ?\‘
- ?\’
- ?\“
- ?\”
- ?\•
- ?\–
- ?\—
- ?\˜
- ?\™
- ?\š
- ?\›
- ?\œ
- nil
- nil
- ?\Ÿ
- ?\
- ?\¡
- ?\¢
- ?\£
- ?\¤
- ?\¥
- ?\¦
- ?\§
- ?\¨
- ?\©
- ?\ª
- ?\«
- ?\¬
- ?\
- ?\®
- ?\¯
- ?\°
- ?\±
- ?\²
- ?\³
- ?\´
- ?\µ
- ?\¶
- ?\·
- ?\¸
- ?\¹
- ?\º
- ?\»
- ?\¼
- ?\½
- ?\¾
- ?\¿
- ?\À
- ?\Á
- ?\Â
- ?\Ã
- ?\Ä
- ?\Å
- ?\Æ
- ?\Ç
- ?\È
- ?\É
- ?\Ê
- ?\Ë
- ?\Ì
- ?\Í
- ?\Î
- ?\Ï
- ?\Ð
- ?\Ñ
- ?\Ò
- ?\Ó
- ?\Ô
- ?\Õ
- ?\Ö
- ?\×
- ?\Ø
- ?\Ù
- ?\Ú
- ?\Û
- ?\Ü
- ?\Ý
- ?\Þ
- ?\ß
- ?\à
- ?\á
- ?\â
- ?\ã
- ?\ä
- ?\å
- ?\æ
- ?\ç
- ?\è
- ?\é
- ?\ê
- ?\ë
- ?\ì
- ?\í
- ?\î
- ?\ï
- ?\ð
- ?\ñ
- ?\ò
- ?\ó
- ?\ô
- ?\õ
- ?\ö
- ?\÷
- ?\ø
- ?\ù
- ?\ú
- ?\û
- ?\ü
- ?\ý
- ?\þ
- ?\ÿ])
-
-(cp-make-coding-system
- windows-1253
- [?\€
- nil
- ?\‚
- ?\ƒ
- ?\„
- ?\…
- ?\†
- ?\‡
- nil
- ?\‰
- nil
- ?\‹
- nil
- nil
- nil
- nil
- nil
- ?\‘
- ?\’
- ?\“
- ?\”
- ?\•
- ?\–
- ?\—
- nil
- ?\™
- nil
- ?\›
- nil
- nil
- nil
- nil
- ?\
- ?\΅
- ?\Ά
- ?\£
- ?\¤
- ?\¥
- ?\¦
- ?\§
- ?\¨
- ?\©
- nil
- ?\«
- ?\¬
- ?\
- ?\®
- ?\―
- ?\°
- ?\±
- ?\²
- ?\³
- ?\΄
- ?\µ
- ?\¶
- ?\·
- ?\Έ
- ?\Ή
- ?\Ί
+ ?\Έ
+ ?\Ή
+ ?\Ί
?\»
?\Ό
?\½
(cp-make-coding-system
windows-1256
[?\€
- ?\Ù
+ ?\Ù¾
?\‚
- ?\١
+ ?\ƒ
?\„
?\…
?\†
?\‡
- ?\٢
- ?\٣
- ?\Ù¤
+ ?\ˆ
+ ?\‰
+ ?\Ù¹
?\‹
- ?\٥
- ?\٦
- ?\٧
- ?\٨
- ?\٩
+ ?\Œ
+ ?\چ
+ ?\ژ
+ ?\ڈ
+ ?\گ
?\‘
?\’
?\“
?\•
?\–
?\—
- ?\؛
+ ?\ک
?\™
- ?\؟
+ ?\ڑ
?\›
- ?\ء
- ?\آ
- ?\أ
- ?\Ÿ
+ ?\œ
+ ?\
+ ?\
+ ?\ں
?\
- ?\ؤ
- ?\إ
+ ?\Ø\8c
+ ?\¢
?\£
?\¤
- ?\ئ
+ ?\¥
?\¦
?\§
- ?\ا
+ ?\¨
?\©
- ?\ب
+ ?\ھ
?\«
?\¬
?\
?\®
- ?\پ
+ ?\¯
?\°
?\±
- ?\ة
- ?\ت
- ?\ث
+ ?\²
+ ?\³
+ ?\´
?\µ
?\¶
?\·
+ ?\¸
+ ?\¹
+ ?\؛
+ ?\»
+ ?\¼
+ ?\½
+ ?\¾
+ ?\؟
+ ?\ہ
+ ?\ء
+ ?\آ
+ ?\أ
+ ?\ؤ
+ ?\إ
+ ?\ئ
+ ?\ا
+ ?\ب
+ ?\ة
+ ?\ت
+ ?\ث
?\ج
- ?\چ
?\ح
- ?\»
?\خ
?\د
?\ذ
?\ر
- ?\À
?\ز
- ?\Â
- ?\ژ
?\س
?\ش
?\ص
- ?\Ç
- ?\È
- ?\É
- ?\Ê
- ?\Ë
?\ض
+ ?\×
?\ط
- ?\Î
- ?\Ï
- ?\ㄓ
+ ?\ظ
?\ع
?\غ
?\ـ
- ?\Ô
?\ف
?\ق
- ?\×
?\ك
- ?\Ù
- ?\گ
- ?\Û
- ?\Ü
+ ?\à
?\ل
+ ?\â
?\م
?\ن
- ?\à
?\ه
- ?\â
- ?\ځ
?\و
- ?\ى
- ?\ي
?\ç
?\è
?\é
?\ê
?\ë
- ?\Ù\8b
- ?\Ù\8c
+ ?\Ù\89
+ ?\Ù\8a
?\î
?\ï
+ ?\ً
+ ?\ٌ
?\ٍ
?\َ
+ ?\ô
?\ُ
?\ِ
- ?\ô
- ?\ّ
- ?\ْ
?\÷
- nil
+ ?\ّ
?\ù
- nil
+ ?\ْ
?\û
?\ü
?\
?\
- ?\ÿ]
+ ?\ے]
nil ?a) ;; Arabic
(cp-make-coding-system
nil]
"NeXTstep encoding." ?N)
-(cp-make-coding-system
- koi8-u
- [?\─
- ?\│
- ?\┌
- ?\┐
- ?\└
- ?\┘
- ?\├
- ?\┤
- ?\┬
- ?\┴
- ?\┼
- ?\▀
- ?\▄
- ?\█
- ?\▌
- ?\▐
- ?\░
- ?\▒
- ?\▓
- ?\⌠
- ?\■
- ?\∙
- ?\√
- ?\≈
- ?\≤
- ?\≥
- ?\
- ?\⌡
- ?\°
- ?\²
- ?\·
- ?\÷
- ?\═
- ?\║
- ?\╒
- ?\ё
- ?\є
- ?\╔
- ?\і
- ?\ї
- ?\╗
- ?\╘
- ?\╙
- ?\╚
- ?\╛
- ?\ґ
- ?\╝
- ?\╞
- ?\╟
- ?\╠
- ?\╡
- ?\Ё
- ?\Є
- ?\╣
- ?\І
- ?\Ї
- ?\╦
- ?\╧
- ?\╨
- ?\╩
- ?\╪
- ?\Ґ
- ?\╬
- ?\©
- ?\ю
- ?\а
- ?\б
- ?\ц
- ?\д
- ?\е
- ?\ф
- ?\г
- ?\х
- ?\и
- ?\й
- ?\к
- ?\л
- ?\м
- ?\н
- ?\о
- ?\п
- ?\я
- ?\р
- ?\с
- ?\т
- ?\у
- ?\ж
- ?\в
- ?\ь
- ?\ы
- ?\з
- ?\ш
- ?\э
- ?\щ
- ?\ч
- ?\ъ
- ?\Ю
- ?\А
- ?\Б
- ?\Ц
- ?\Д
- ?\Е
- ?\Ф
- ?\Г
- ?\Х
- ?\И
- ?\Й
- ?\К
- ?\Л
- ?\М
- ?\Н
- ?\О
- ?\П
- ?\Я
- ?\Р
- ?\С
- ?\Т
- ?\У
- ?\Ж
- ?\В
- ?\Ь
- ?\Ы
- ?\З
- ?\Ш
- ?\Э
- ?\Щ
- ?\Ч
- ?\Ъ]
- "Cyrillic KOI8-U (Ukranian) encoding."
- ?U)
-
-;; ;; Unicode-based, not cyrillic-iso8859-5 based (and thus incomplete)
-;; ;; like the standard version.
-;; (cp-make-coding-system
-;; ;; The base system uses cyrillic-koi8 as the canonical name, but
-;; ;; that's not a MIME name.
-;; koi8-r
-;; [?\─
-;; ?\│
-;; ?\┌
-;; ?\┐
-;; ?\└
-;; ?\┘
-;; ?\├
-;; ?\┤
-;; ?\┬
-;; ?\┴
-;; ?\┼
-;; ?\▀
-;; ?\▄
-;; ?\█
-;; ?\▌
-;; ?\▐
-;; ?\░
-;; ?\▒
-;; ?\▓
-;; ?\⌠
-;; ?\■
-;; ?\•
-;; ?\√
-;; ?\≈
-;; ?\≤
-;; ?\≥
-;; ?\
-;; ?\⌡
-;; ?\°
-;; ?\²
-;; ?\·
-;; ?\÷
-;; ?\═
-;; ?\║
-;; ?\╒
-;; ?\ё
-;; ?\╓
-;; ?\╔
-;; ?\╕
-;; ?\╖
-;; ?\╗
-;; ?\╘
-;; ?\╙
-;; ?\╚
-;; ?\╛
-;; ?\╜
-;; ?\╝
-;; ?\╞
-;; ?\╟
-;; ?\╠
-;; ?\╡
-;; ?\Ё
-;; ?\╢
-;; ?\╣
-;; ?\╤
-;; ?\╥
-;; ?\╦
-;; ?\╧
-;; ?\╨
-;; ?\╩
-;; ?\╪
-;; ?\╫
-;; ?\╬
-;; ?\©
-;; ?\ю
-;; ?\а
-;; ?\б
-;; ?\ц
-;; ?\д
-;; ?\е
-;; ?\ф
-;; ?\г
-;; ?\х
-;; ?\и
-;; ?\й
-;; ?\к
-;; ?\л
-;; ?\м
-;; ?\н
-;; ?\о
-;; ?\п
-;; ?\я
-;; ?\р
-;; ?\с
-;; ?\т
-;; ?\у
-;; ?\ж
-;; ?\в
-;; ?\ь
-;; ?\ы
-;; ?\з
-;; ?\ш
-;; ?\э
-;; ?\щ
-;; ?\ч
-;; ?\ъ
-;; ?\Ю
-;; ?\А
-;; ?\Б
-;; ?\Ц
-;; ?\Д
-;; ?\Е
-;; ?\Ф
-;; ?\Г
-;; ?\Х
-;; ?\И
-;; ?\Й
-;; ?\К
-;; ?\Л
-;; ?\М
-;; ?\Н
-;; ?\О
-;; ?\П
-;; ?\Я
-;; ?\Р
-;; ?\С
-;; ?\Т
-;; ?\У
-;; ?\Ж
-;; ?\В
-;; ?\Ь
-;; ?\Ы
-;; ?\З
-;; ?\Ш
-;; ?\Э
-;; ?\Щ
-;; ?\Ч
-;; ?\Ъ]
-;; "Unicode-based KOI8 encoding for Cyrillic (MIME: KOI8-R)"
-;; ?R)
-;; (cp-fix-safe-chars 'cyrillic-koi8)
-;; (define-coding-system-alias 'cyrillic-koi8 'koi8-r)
-;; (define-coding-system-alias 'koi8 'koi8-r)
-;; (define-coding-system-alias 'cp878 'koi8-r)
-
(cp-make-coding-system
koi8-t ; used by glibc for tg_TJ
[?\қ
?\Ъ]
"Unicode-based KOI8-T encoding for Cyrillic")
(coding-system-put 'koi8-t 'mime-charset nil) ; not in the IANA list
+(define-coding-system-alias 'cyrillic-koi8-t 'koi8-t)
;; Online final ISO draft:
-;; http://www.egt.ie/standards/iso8859/fdis8859-16-en.pdf
+;; http://www.evertype.com/standards/iso8859/fdis8859-16-en.pdf
;; Equivalent National Standard:
;; Romanian Standard SR 14111:1998, Romanian Standards Institution
(define-coding-system-alias 'iso-8859-13 'iso-latin-7)
(define-coding-system-alias 'latin-7 'iso-latin-7)
+;; Fixme: check on the C1 characters which libiconv includes. They
+;; are reproduced below, but are probably wrong. I can't find an
+;; official definition of georgian-ps.
(cp-make-coding-system
georgian-ps ; used by glibc for ka_GE
[?\\80
?\і
?\Ї
?\ї
- ?\÷
- ?\±
+ ?\·
+ ?\√
?\№
?\¤
- ?\■
+ ?\■
?\ ])
-;; Original ;name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
+(define-coding-system-alias 'ruscii 'cp1125)
+;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
(define-coding-system-alias 'cp866u 'cp1125)
+(coding-system-put 'cp1125 'mime-charset nil)
-(dotimes (i 8)
+;; Suggested by Anton Zinoviev <anton@lml.bas.bg>: Bulgarian DOS
+;; codepage. Table at
+;; <URL:http://czyborra.com/charsets/bulgarian-mik.txt.gz>.
+(cp-make-coding-system
+ mik
+ [?А ?Б ?В ?Г ?Д ?Е ?Ж ?З ?И ?Й ?К ?Л ?М ?Н ?О ?П ?Р ?С ?Т ?У ?Ф ?Х ?Ц
+ ?Ч ?Ш ?Щ ?Ъ ?Ы ?Ь ?Э ?Ю ?Я ?а ?б ?в ?г ?д ?е ?ж ?з ?и ?й ?к ?л ?м ?н
+ ?о ?п ?р ?с ?т ?у ?ф ?х ?ц ?ч ?ш ?щ ?ъ ?ы ?ь ?э ?ю ?я ?└ ?┴ ?┬ ?├ ?─
+ ?┼ ?╣ ?║ ?╚ ?╔ ?╩ ?╦ ?╠ ?═ ?╬ ?┐ ?░ ?▒ ?▓ ?│ ?┤ ?№ ?§ ?╗ ?╝ ?┘ ?┌ ?█
+ ?▄ ?▌ ?▐ ?▀ ?α ?β ?Γ ?π ?Σ ?σ ?μ ?τ ?Φ ?Θ ?Ω ?δ ?∞ ?∅ ?∈ ?∩ ?≡ ?± ?≥
+ ?≤ ?⌠ ?⌡ ?÷ ?≈ ?° ?∙ ?· ?√ ?ⁿ ?² ?■ ? ])
+(coding-system-put 'mik 'mime-charset nil)
+
+;; Suggested by Anton Zinoviev <anton@lml.bas.bg>: similar to CP1251
+;; and used for some non-Slavic Cyrillic languages. Table found at
+;; <URL:ftp://ftp.logic.ru/pub/logic/linux/cyr-asian/PT154>. See also
+;; <URL:http://lists.w3.org/Archives/Public/ietf-charsets/2002AprJun/0092.html,
+;; which suggests it's used in an Asian Cyrillic context.
+;;;###autoload(autoload-coding-system 'pt154 '(require 'code-pages))
+(cp-make-coding-system
+ pt154
+ [?Җ ?Ғ ?Ӯ ?ғ ?„ ?… ?Ҷ ?Ү ?Ҳ ?ү ?Ҡ ?Ӣ ?Ң ?Қ ?Һ ?Ҹ ?җ ?‘ ?’ ?“ ?” ?• ?–
+ ?— ?ҳ ?ҷ ?ҡ ?ӣ ?ң ?қ ?һ ?ҹ ? ?Ў ?ў ?Ј ?Ө ?Ҙ ?Ұ ?§ ?Ё ?© ?Ә ?\« ?¬ ?ӯ
+ ?® ?Ҝ ?° ?ұ ?І ?і ?ҙ ?ө ?¶ ?· ?ё ?№ ?ә ?» ?ј ?Ҫ ?ҫ ?ҝ ?А ?Б ?В ?Г ?Д
+ ?Е ?Ж ?З ?И ?Й ?К ?Л ?М ?Н ?О ?П ?Р ?С ?Т ?У ?Ф ?Х ?Ц ?Ч ?Ш ?Щ ?Ъ ?Ы
+ ?Ь ?Э ?Ю ?Я ?а ?б ?в ?г ?д ?е ?ж ?з ?и ?й ?к ?л ?м ?н ?о ?п ?р ?с ?т
+ ?у ?ф ?х ?ц ?ч ?ш ?щ ?ъ ?ы ?ь ?э ?ю ?я])
+
+;;;###autoload(autoload-coding-system 'iso-8859-11 '(require 'code-pages))
+(cp-make-coding-system
+ iso-8859-11
+ [nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
+ nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
+ ? ?ก ?ข ?ฃ ?ค ?ฅ ?ฆ ?ง ?จ ?ฉ ?ช ?ซ ?ฌ ?ญ ?ฎ ?ฏ
+ ?ฐ ?ฑ ?ฒ ?ณ ?ด ?ต ?ถ ?ท ?ธ ?น ?บ ?ป ?ผ ?ฝ ?พ ?ฟ
+ ?ภ ?ม ?ย ?ร ?ฤ ?ล ?ฦ ?ว ?ศ ?ษ ?ส ?ห ?ฬ ?อ ?ฮ ?ฯ
+ ?ะ ?ั ?า ?ำ ?ิ ?ี ?ึ ?ื ?ุ ?ู ?ฺ nil nil nil nil ?฿
+ ?เ ?แ ?โ ?ใ ?ไ ?ๅ ?ๆ ?็ ?่ ?้ ?๊ ?๋ ?์ ?ํ ?๎ ?๏
+ ?๐ ?๑ ?๒ ?๓ ?๔ ?๕ ?๖ ?๗ ?๘ ?๙ ?๚ ?๛ nil nil nil nil]
+ "ISO-8859-11. This is `thai-tis620' with the addition of no-break-space.")
+
+(dotimes (i 9)
(let ((w (intern (format "windows-125%d" i)))
(c (intern (format "cp125%d" i))))
- (define-coding-system-alias c w)
+ ;; Define cp125* as aliases for all windows-125*, so on Windows
+ ;; we can just concat "cp" to the ANSI codepage we get from the system
+ ;; and not have to worry about whether it should be "cp" or "windows-".
+ (if (coding-system-p w)
+ (define-coding-system-alias c w))
;; Compatibility with codepage.el, though cp... are not the
;; canonical names.
(push (assoc w non-iso-charset-alist) non-iso-charset-alist)))
-;; Use Unicode font under Windows. Jason Rumney fecit.
-(if (and (fboundp 'w32-add-charset-info)
- (not (boundp 'w32-unicode-charset-defined)))
- (w32-add-charset-info "iso10646-1" 'w32-charset-ansi t))
-
(provide 'code-pages)
+;;; arch-tag: 8b6e3c73-b271-4198-866d-ea6d0ceff1b2
;;; code-pages.el ends here