;; Those covered are: cp437, cp737, cp720, cp775, cp850, cp851, cp852,
;; cp855, cp857, cp860, cp861, cp862, cp863, cp864, cp865, cp866,
-;; cp869, cp874, cp1125, windows-1250, windows-1251, windows-1252,
-;; windows-1253, windows-1254, windows-1255, windows-1256,
-;; windows-1257, windows-1258, next, koi8-u, iso-8859-6,
-;; iso-8859-10, iso-8859-11, iso-8859-16, koi8-t, georgian-ps. This
-;; is meant to include all the single-byte ones relevant to GNU (used
-;; in glibc-defined locales); we don't yet get all the multibyte ones
-;; in base Emacs.
+;; cp869, cp874, cp1125, windows-1250, windows-1253, windows-1254,
+;; windows-1255, windows-1256, windows-1257, windows-1258, next,
+;; iso-8859-6, iso-8859-10, iso-8859-11, iso-8859-16, koi8-t,
+;; georgian-ps. This is meant to include all the single-byte ones
+;; relevant to GNU (used in glibc-defined locales); we don't yet get
+;; all the multibyte ones in base Emacs.
;; Note that various of these can clash with definitions in
;; codepage.el; we try to avoid damage from that. A few CPs from
;;; Code:
-(defun cp-make-translation-table (v)
+;; The defsubsts here are just so that language files can use
+;; `cp-make-coding-system' and not require functions from this file
+;; at runtime.
+
+(defsubst cp-make-translation-table (v)
"Return a translation table made from 128-long vector V.
V comprises characters encodable by mule-utf-8."
(let ((encoding-vector (make-vector 256 0)))
ucs-mule-to-mule-unicode)
tab)))
-(defun cp-valid-codes (v)
+(defsubst cp-valid-codes (v)
"Derive a valid-codes list for translation vector V.
See `make-coding-system'."
(let (pairs
(if start (push (cons start end) pairs))
(nreverse pairs)))
-(defun cp-fix-safe-chars (cs)
- "Remove `char-coding-system-table' entries from previous definition of CS.
-CS is a base coding system or alias."
- (when (coding-system-p cs)
- (let ((chars (coding-system-get cs 'safe-chars)))
- (map-char-table
- (lambda (k v)
- (if (and v (not (eq v t)))
- (aset char-coding-system-table
- k
- (remq cs (aref char-coding-system-table k)))))
- chars))))
-
;; Fix things that have been, or might be, done by codepage.el.
(eval-after-load "codepage"
'(progn
- (dolist (cs '(cp857 cp861 cp1253 cp852 cp866 cp437 cp855 cp869 cp775
- cp862 cp864 cp1250 cp863 cp865 cp1251 cp737 cp1257 cp850
- cp860 cp851 720))
- (cp-fix-safe-chars cs))
-
;; Semi-dummy version for the stuff in codepage.el which we don't
;; define here. (Used by mule-diag.)
(defun cp-supported-codepages ()
cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp))))))
) ; eval-after-load
-;; For `non-iso-charset-alist'. Do this after redefining
-;; `cp-supported-codepages', which is called through loading
-;; mule-diag.
-(require 'mule-diag)
-
;; Macro to allow ccl compilation at byte-compile time, avoiding
;; loading ccl.
;;;###autoload
(defmacro cp-make-coding-system (name v &optional doc-string mnemonic)
"Make coding system NAME for and 8-bit, extended-ASCII character set.
V is a 128-long vector of characters to translate the upper half of
-the charactert set. DOC-STRING and MNEMONIC are used as the
+the character set. DOC-STRING and MNEMONIC are used as the
corresponding args of `make-coding-system'. If MNEMONIC isn't given,
-?* is used."
+?* is used.
+Return an updated `non-iso-charset-alist'."
(let* ((encoder (intern (format "encode-%s" name)))
(decoder (intern (format "decode-%s" name)))
(ccl-decoder
(define-translation-table ',decoder translation-table)
(define-translation-table ',encoder
(char-table-extra-slot translation-table 0))
- (cp-fix-safe-chars ',name)
(make-coding-system
',name 4 ,(or mnemonic ?*)
(or ,doc-string (format "%s encoding" ',name))
(list l)))
non-iso-charset-alist))))
+(eval-when-compile (defvar non-iso-charset-alist))
;; These tables were mostly derived by running somthing like
;; `recode -f cpxxx/..utf-8' on a binary file filled by
nil
nil
nil
- ?\
+ ?\
?\ก
?\ข
?\ฃ
?\ť
?\ž
?\ź
- ?\
+ ?\
?\ˇ
?\˘
?\Ł
?\ţ
?\˙])
-;; be_BY, bg_BG
-(cp-make-coding-system
- windows-1251
- [?\Ђ
- ?\Ѓ
- ?\‚
- ?\ѓ
- ?\„
- ?\…
- ?\†
- ?\‡
- ?\€
- ?\‰
- ?\Љ
- ?\‹
- ?\Њ
- ?\Ќ
- ?\Ћ
- ?\Џ
- ?\ђ
- ?\‘
- ?\’
- ?\“
- ?\”
- ?\•
- ?\–
- ?\—
- nil
- ?\™
- ?\љ
- ?\›
- ?\њ
- ?\ќ
- ?\ћ
- ?\џ
- ?\
- ?\Ў
- ?\ў
- ?\Ј
- ?\¤
- ?\Ґ
- ?\¦
- ?\§
- ?\Ё
- ?\©
- ?\Є
- ?\«
- ?\¬
- ?\
- ?\®
- ?\Ї
- ?\°
- ?\±
- ?\І
- ?\і
- ?\ґ
- ?\µ
- ?\¶
- ?\·
- ?\ё
- ?\№
- ?\є
- ?\»
- ?\ј
- ?\Ѕ
- ?\ѕ
- ?\ї
- ?\А
- ?\Б
- ?\В
- ?\Г
- ?\Д
- ?\Е
- ?\Ж
- ?\З
- ?\И
- ?\Й
- ?\К
- ?\Л
- ?\М
- ?\Н
- ?\О
- ?\П
- ?\Р
- ?\С
- ?\Т
- ?\У
- ?\Ф
- ?\Х
- ?\Ц
- ?\Ч
- ?\Ш
- ?\Щ
- ?\Ъ
- ?\Ы
- ?\Ь
- ?\Э
- ?\Ю
- ?\Я
- ?\а
- ?\б
- ?\в
- ?\г
- ?\д
- ?\е
- ?\ж
- ?\з
- ?\и
- ?\й
- ?\к
- ?\л
- ?\м
- ?\н
- ?\о
- ?\п
- ?\р
- ?\с
- ?\т
- ?\у
- ?\ф
- ?\х
- ?\ц
- ?\ч
- ?\ш
- ?\щ
- ?\ъ
- ?\ы
- ?\ь
- ?\э
- ?\ю
- ?\я]
- nil ?b)
-
-(cp-make-coding-system
- windows-1252
- [?\€
- nil
- ?\‚
- ?\ƒ
- ?\„
- ?\…
- ?\†
- ?\‡
- ?\ˆ
- ?\‰
- ?\Š
- ?\‹
- ?\Œ
- nil
- ?\Ž
- ?\ž
- nil
- ?\‘
- ?\’
- ?\“
- ?\”
- ?\•
- ?\–
- ?\—
- ?\˜
- ?\™
- ?\š
- ?\›
- ?\œ
- nil
- nil
- ?\Ÿ
- ?\
- ?\¡
- ?\¢
- ?\£
- ?\¤
- ?\¥
- ?\¦
- ?\§
- ?\¨
- ?\©
- ?\ª
- ?\«
- ?\¬
- ?\
- ?\®
- ?\¯
- ?\°
- ?\±
- ?\²
- ?\³
- ?\´
- ?\µ
- ?\¶
- ?\·
- ?\¸
- ?\¹
- ?\º
- ?\»
- ?\¼
- ?\½
- ?\¾
- ?\¿
- ?\À
- ?\Á
- ?\Â
- ?\Ã
- ?\Ä
- ?\Å
- ?\Æ
- ?\Ç
- ?\È
- ?\É
- ?\Ê
- ?\Ë
- ?\Ì
- ?\Í
- ?\Î
- ?\Ï
- ?\Ð
- ?\Ñ
- ?\Ò
- ?\Ó
- ?\Ô
- ?\Õ
- ?\Ö
- ?\×
- ?\Ø
- ?\Ù
- ?\Ú
- ?\Û
- ?\Ü
- ?\Ý
- ?\Þ
- ?\ß
- ?\à
- ?\á
- ?\â
- ?\ã
- ?\ä
- ?\å
- ?\æ
- ?\ç
- ?\è
- ?\é
- ?\ê
- ?\ë
- ?\ì
- ?\í
- ?\î
- ?\ï
- ?\ð
- ?\ñ
- ?\ò
- ?\ó
- ?\ô
- ?\õ
- ?\ö
- ?\÷
- ?\ø
- ?\ù
- ?\ú
- ?\û
- ?\ü
- ?\ý
- ?\þ
- ?\ÿ])
-
(cp-make-coding-system
windows-1253
[?\€
nil
nil
nil
- ?\
+ ?\
?\΅
?\Ά
?\£
nil
nil
?\Ÿ
- ?\
+ ?\
?\¡
?\¢
?\£
nil
nil
nil
- ?\
+ ?\
?\¡
?\¢
?\£
(cp-make-coding-system
windows-1256
[?\€
- ?\Ù
+ ?\Ù¾
?\‚
- ?\١
+ ?\ƒ
?\„
?\…
?\†
?\‡
- ?\٢
- ?\٣
- ?\Ù¤
+ ?\ˆ
+ ?\‰
+ ?\Ù¹
?\‹
- ?\٥
- ?\٦
- ?\٧
- ?\٨
- ?\٩
+ ?\Œ
+ ?\چ
+ ?\ژ
+ ?\ڈ
+ ?\گ
?\‘
?\’
?\“
?\•
?\–
?\—
- ?\؛
+ ?\ک
?\™
- ?\؟
+ ?\ڑ
?\›
- ?\ء
- ?\آ
- ?\أ
- ?\Ÿ
- ?\
- ?\ؤ
- ?\إ
+ ?\œ
+ ?\
+ ?\
+ ?\ں
+ ?\
+ ?\Ø\8c
+ ?\¢
?\£
?\¤
- ?\ئ
+ ?\¥
?\¦
?\§
- ?\ا
+ ?\¨
?\©
- ?\ب
+ ?\ھ
?\«
?\¬
?\
?\®
- ?\پ
+ ?\¯
?\°
?\±
- ?\ة
- ?\ت
- ?\ث
+ ?\²
+ ?\³
+ ?\´
?\µ
?\¶
?\·
+ ?\¸
+ ?\¹
+ ?\؛
+ ?\»
+ ?\¼
+ ?\½
+ ?\¾
+ ?\؟
+ ?\ہ
+ ?\ء
+ ?\آ
+ ?\أ
+ ?\ؤ
+ ?\إ
+ ?\ئ
+ ?\ا
+ ?\ب
+ ?\ة
+ ?\ت
+ ?\ث
?\ج
- ?\چ
?\ح
- ?\»
?\خ
?\د
?\ذ
?\ر
- ?\À
?\ز
- ?\Â
- ?\ژ
?\س
?\ش
?\ص
- ?\Ç
- ?\È
- ?\É
- ?\Ê
- ?\Ë
?\ض
+ ?\×
?\ط
- ?\Î
- ?\Ï
- ?\ㄓ
+ ?\ظ
?\ع
?\غ
?\ـ
- ?\Ô
?\ف
?\ق
- ?\×
?\ك
- ?\Ù
- ?\گ
- ?\Û
- ?\Ü
+ ?\à
?\ل
+ ?\â
?\م
?\ن
- ?\à
?\ه
- ?\â
- ?\ځ
?\و
- ?\ى
- ?\ي
?\ç
?\è
?\é
?\ê
?\ë
- ?\Ù\8b
- ?\Ù\8c
+ ?\Ù\89
+ ?\Ù\8a
?\î
?\ï
+ ?\ً
+ ?\ٌ
?\ٍ
?\َ
+ ?\ô
?\ُ
?\ِ
- ?\ô
- ?\ّ
- ?\ْ
?\÷
- nil
+ ?\ّ
?\ù
- nil
+ ?\ْ
?\û
?\ü
?\
?\
- ?\ÿ]
+ ?\ے]
nil ?a) ;; Arabic
(cp-make-coding-system
nil
nil
nil
- ?\
+ ?\
nil
?\¢
?\£
nil
nil
?\Ÿ
- ?\
+ ?\
?\¡
?\¢
?\£
(cp-make-coding-system
next
- [?\
+ [?\
?\À
?\Á
?\Â
?\Ъ]
"Unicode-based KOI8-T encoding for Cyrillic")
(coding-system-put 'koi8-t 'mime-charset nil) ; not in the IANA list
+(define-coding-system-alias 'cyrillic-koi8-t 'koi8-t)
;; Online final ISO draft:
;; Equivalent National Standard:
;; Romanian Standard SR 14111:1998, Romanian Standards Institution
;; (ASRO).
-
+
;; Intended usage:
;; "This set of coded graphic characters is intended for use in data and
;; Slovenian. This set of coded graphic characters may be regarded as a
;; version of an 8-bit code according to ISO/IEC 2022 or ISO/IEC 4873 at
;; level 1." [ISO 8859-16:2001(E), p. 1]
-
+
;; This charset is suitable for use in MIME text body parts.
-
+
;; ISO 8859-16 was primarily designed for single-byte encoding the Romanian
;; language. The UTF-8 charset is the preferred and in today's MIME software
;; more widely implemented encoding suitable for Romanian.
iso-latin-10 ; consistent with, e.g. Latin-1
[nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
- ?\
+ ?\
?\Ą
?\ą
?\Ł
iso-8859-6
[nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
- ?\
+ ?\
?\¤
?\،
?\
iso-latin-6
[nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
- ?\
+ ?\
?\Ą
?\Ē
?\Ģ
iso-latin-7
[nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
- ?\
+ ?\
?\¡
?\¢
?\£
(define-coding-system-alias 'iso-8859-13 'iso-latin-7)
(define-coding-system-alias 'latin-7 'iso-latin-7)
+;; Fixme: check on the C1 characters which libiconv includes. They
+;; are reproduced below, but are probably wrong. I can't find an
+;; official definition of georgian-ps.
(cp-make-coding-system
georgian-ps ; used by glibc for ka_GE
- [?\
- ?\
+ [?\\80
+ ?\\81
?\‚
?\ƒ
?\„
?\Š
?\‹
?\Œ
- ?\
- ?\
- ?\
- ?\
+ ?\\8d
+ ?\\8e
+ ?\\8f
+ ?\\90
?\‘
?\’
?\“
?\š
?\›
?\œ
- ?\
- ?\
+ ?\\9d
+ ?\\9e
?\Ÿ
- ?\
+ ?\
?\¡
?\¢
?\£
?\і
?\Ї
?\ї
- ?\÷
- ?\±
+ ?\·
+ ?\√
?\№
?\¤
- ?\■
+ ?\■
?\ ])
(define-coding-system-alias 'ruscii 'cp1125)
;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
(define-coding-system-alias 'cp866u 'cp1125)
+(coding-system-put 'cp1125 'mime-charset nil)
;; Suggested by Anton Zinoviev <anton@lml.bas.bg>: Bulgarian DOS
;; codepage. Table at
?┼ ?╣ ?║ ?╚ ?╔ ?╩ ?╦ ?╠ ?═ ?╬ ?┐ ?░ ?▒ ?▓ ?│ ?┤ ?№ ?§ ?╗ ?╝ ?┘ ?┌ ?█
?▄ ?▌ ?▐ ?▀ ?α ?β ?Γ ?π ?Σ ?σ ?μ ?τ ?Φ ?Θ ?Ω ?δ ?∞ ?∅ ?∈ ?∩ ?≡ ?± ?≥
?≤ ?⌠ ?⌡ ?÷ ?≈ ?° ?∙ ?· ?√ ?ⁿ ?² ?■ ? ])
+(coding-system-put 'mik 'mime-charset nil)
;; Suggested by Anton Zinoviev <anton@lml.bas.bg>: similar to CP1251
;; and used for some non-Slavic Cyrillic languages. Table found at
;; <URL:ftp://ftp.logic.ru/pub/logic/linux/cyr-asian/PT154>. See also
;; <URL:http://lists.w3.org/Archives/Public/ietf-charsets/2002AprJun/0092.html,
;; which suggests it's used in an Asian Cyrillic context.
+;;;###autoload(autoload-coding-system 'pt154 '(require 'code-pages))
(cp-make-coding-system
pt154
[?Җ ?Ғ ?Ӯ ?ғ ?„ ?… ?Ҷ ?Ү ?Ҳ ?ү ?Ҡ ?Ӣ ?Ң ?Қ ?Һ ?Ҹ ?җ ?‘ ?’ ?“ ?” ?• ?–
?Е ?Ж ?З ?И ?Й ?К ?Л ?М ?Н ?О ?П ?Р ?С ?Т ?У ?Ф ?Х ?Ц ?Ч ?Ш ?Щ ?Ъ ?Ы
?Ь ?Э ?Ю ?Я ?а ?б ?в ?г ?д ?е ?ж ?з ?и ?й ?к ?л ?м ?н ?о ?п ?р ?с ?т
?у ?ф ?х ?ц ?ч ?ш ?щ ?ъ ?ы ?ь ?э ?ю ?я])
-
+
+;;;###autoload(autoload-coding-system 'iso-8859-11 '(require 'code-pages))
(cp-make-coding-system
iso-8859-11
[nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
?๐ ?๑ ?๒ ?๓ ?๔ ?๕ ?๖ ?๗ ?๘ ?๙ ?๚ ?๛ nil nil nil nil]
"ISO-8859-11. This is `thai-tis620' with the addition of no-break-space.")
-(dotimes (i 8)
+(dotimes (i 9)
(let ((w (intern (format "windows-125%d" i)))
(c (intern (format "cp125%d" i))))
- (define-coding-system-alias c w)
+ ;; Define cp125* as aliases for all windows-125*, so on Windows
+ ;; we can just concat "cp" to the ANSI codepage we get from the system
+ ;; and not have to worry about whether it should be "cp" or "windows-".
+ (if (coding-system-p w)
+ (define-coding-system-alias c w))
;; Compatibility with codepage.el, though cp... are not the
;; canonical names.
(push (assoc w non-iso-charset-alist) non-iso-charset-alist)))
-;; Use Unicode font under Windows. Jason Rumney fecit.
-(if (and (fboundp 'w32-add-charset-info)
- (not (boundp 'w32-unicode-charset-defined)))
- (w32-add-charset-info "iso10646-1" 'w32-charset-ansi t))
-
(provide 'code-pages)
+;;; arch-tag: 8b6e3c73-b271-4198-866d-ea6d0ceff1b2
;;; code-pages.el ends here