;; Those covered are: cp437, cp737, cp720, cp775, cp850, cp851, cp852,
;; cp855, cp857, cp860, cp861, cp862, cp863, cp864, cp865, cp866,
-;; cp869, cp874, cp1125, windows-1250, windows-1251, windows-1252,
-;; windows-1253, windows-1254, windows-1255, windows-1256,
-;; windows-1257, windows-1258, next, koi8-u, iso-8859-6,
-;; iso-8859-10, iso-8859-11, iso-8859-16, koi8-t, georgian-ps. This
-;; is meant to include all the single-byte ones relevant to GNU (used
-;; in glibc-defined locales); we don't yet get all the multibyte ones
-;; in base Emacs.
+;; cp869, cp874, cp1125, windows-1250, windows-1253, windows-1254,
+;; windows-1255, windows-1256, windows-1257, windows-1258, next,
+;; iso-8859-6, iso-8859-10, iso-8859-11, iso-8859-16, koi8-t,
+;; georgian-ps. This is meant to include all the single-byte ones
+;; relevant to GNU (used in glibc-defined locales); we don't yet get
+;; all the multibyte ones in base Emacs.
;; Note that various of these can clash with definitions in
;; codepage.el; we try to avoid damage from that. A few CPs from
;;; Code:
-(defun cp-make-translation-table (v)
+;; The defsubsts here are just so that language files can use
+;; `cp-make-coding-system' and not require functions from this file
+;; at runtime.
+
+(defsubst cp-make-translation-table (v)
"Return a translation table made from 128-long vector V.
V comprises characters encodable by mule-utf-8."
(let ((encoding-vector (make-vector 256 0)))
ucs-mule-to-mule-unicode)
tab)))
-(defun cp-valid-codes (v)
+(defsubst cp-valid-codes (v)
"Derive a valid-codes list for translation vector V.
See `make-coding-system'."
(let (pairs
(if start (push (cons start end) pairs))
(nreverse pairs)))
-(defun cp-fix-safe-chars (cs)
- "Remove `char-coding-system-table' entries from previous definition of CS.
-CS is a base coding system or alias."
- (when (coding-system-p cs)
- (let ((chars (coding-system-get cs 'safe-chars)))
- (map-char-table
- (lambda (k v)
- (if (and v (not (eq v t)))
- (aset char-coding-system-table
- k
- (remq cs (aref char-coding-system-table k)))))
- chars))))
-
-;; Fix things that have been, or might be done by codepage.el.
+;; Fix things that have been, or might be, done by codepage.el.
(eval-after-load "codepage"
'(progn
- (dolist (cs '(cp857 cp861 cp1253 cp852 cp866 cp437 cp855 cp869 cp775
- cp862 cp864 cp1250 cp863 cp865 cp1251 cp737 cp1257 cp850
- cp860 cp851 720))
- (cp-fix-safe-chars cs))
-
;; Semi-dummy version for the stuff in codepage.el which we don't
;; define here. (Used by mule-diag.)
(defun cp-supported-codepages ()
cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp))))))
) ; eval-after-load
-;; For `non-iso-charset-alist'. Do this after redefining
-;; `cp-supported-codepages', which is called through loading
-;; mule-diag.
-(require 'mule-diag)
-
;; Macro to allow ccl compilation at byte-compile time, avoiding
;; loading ccl.
;;;###autoload
(defmacro cp-make-coding-system (name v &optional doc-string mnemonic)
"Make coding system NAME for and 8-bit, extended-ASCII character set.
V is a 128-long vector of characters to translate the upper half of
-the charactert set. DOC-STRING and MNEMONIC are used as the
+the character set. DOC-STRING and MNEMONIC are used as the
corresponding args of `make-coding-system'. If MNEMONIC isn't given,
-?* is used."
+?* is used.
+Return an updated `non-iso-charset-alist'."
(let* ((encoder (intern (format "encode-%s" name)))
(decoder (intern (format "decode-%s" name)))
(ccl-decoder
((loop
(read-multibyte-character r0 r1)
(translate-character ,encoder r0 r1)
- (if (r0 != ,(charset-id 'eight-bit-graphic))
- (if (r0 != ,(charset-id 'eight-bit-control))
- (r1 = ??)))
+ (if (r0 != ,(charset-id 'ascii))
+ (if (r0 != ,(charset-id 'eight-bit-graphic))
+ (if (r0 != ,(charset-id 'eight-bit-control))
+ (r1 = ??))))
(write-repeat r1)))))))
`(let ((translation-table (cp-make-translation-table ,v))
(codes (cp-valid-codes ,v)))
(define-translation-table ',decoder translation-table)
(define-translation-table ',encoder
(char-table-extra-slot translation-table 0))
- (cp-fix-safe-chars ',name)
(make-coding-system
',name 4 ,(or mnemonic ?*)
(or ,doc-string (format "%s encoding" ',name))
(list l)))
non-iso-charset-alist))))
+(eval-when-compile (defvar non-iso-charset-alist))
;; These tables were mostly derived by running somthing like
;; `recode -f cpxxx/..utf-8' on a binary file filled by
?\ţ
?\˙])
-;; be_BY, bg_BG
-(cp-make-coding-system
- windows-1251
- [?\Ђ
- ?\Ѓ
- ?\‚
- ?\ѓ
- ?\„
- ?\…
- ?\†
- ?\‡
- ?\€
- ?\‰
- ?\Љ
- ?\‹
- ?\Њ
- ?\Ќ
- ?\Ћ
- ?\Џ
- ?\ђ
- ?\‘
- ?\’
- ?\“
- ?\”
- ?\•
- ?\–
- ?\—
- nil
- ?\™
- ?\љ
- ?\›
- ?\њ
- ?\ќ
- ?\ћ
- ?\џ
- ?\
- ?\Ў
- ?\ў
- ?\Ј
- ?\¤
- ?\Ґ
- ?\¦
- ?\§
- ?\Ё
- ?\©
- ?\Є
- ?\«
- ?\¬
- ?\
- ?\®
- ?\Ї
- ?\°
- ?\±
- ?\І
- ?\і
- ?\ґ
- ?\µ
- ?\¶
- ?\·
- ?\ё
- ?\№
- ?\є
- ?\»
- ?\ј
- ?\Ѕ
- ?\ѕ
- ?\ї
- ?\А
- ?\Б
- ?\В
- ?\Г
- ?\Д
- ?\Е
- ?\Ж
- ?\З
- ?\И
- ?\Й
- ?\К
- ?\Л
- ?\М
- ?\Н
- ?\О
- ?\П
- ?\Р
- ?\С
- ?\Т
- ?\У
- ?\Ф
- ?\Х
- ?\Ц
- ?\Ч
- ?\Ш
- ?\Щ
- ?\Ъ
- ?\Ы
- ?\Ь
- ?\Э
- ?\Ю
- ?\Я
- ?\а
- ?\б
- ?\в
- ?\г
- ?\д
- ?\е
- ?\ж
- ?\з
- ?\и
- ?\й
- ?\к
- ?\л
- ?\м
- ?\н
- ?\о
- ?\п
- ?\р
- ?\с
- ?\т
- ?\у
- ?\ф
- ?\х
- ?\ц
- ?\ч
- ?\ш
- ?\щ
- ?\ъ
- ?\ы
- ?\ь
- ?\э
- ?\ю
- ?\я]
- nil ?b)
-
-(cp-make-coding-system
- windows-1252
- [?\€
- nil
- ?\‚
- ?\ƒ
- ?\„
- ?\…
- ?\†
- ?\‡
- ?\ˆ
- ?\‰
- ?\Š
- ?\‹
- ?\Œ
- nil
- ?\Ž
- ?\ž
- nil
- ?\‘
- ?\’
- ?\“
- ?\”
- ?\•
- ?\–
- ?\—
- ?\˜
- ?\™
- ?\š
- ?\›
- ?\œ
- nil
- nil
- ?\Ÿ
- ?\
- ?\¡
- ?\¢
- ?\£
- ?\¤
- ?\¥
- ?\¦
- ?\§
- ?\¨
- ?\©
- ?\ª
- ?\«
- ?\¬
- ?\
- ?\®
- ?\¯
- ?\°
- ?\±
- ?\²
- ?\³
- ?\´
- ?\µ
- ?\¶
- ?\·
- ?\¸
- ?\¹
- ?\º
- ?\»
- ?\¼
- ?\½
- ?\¾
- ?\¿
- ?\À
- ?\Á
- ?\Â
- ?\Ã
- ?\Ä
- ?\Å
- ?\Æ
- ?\Ç
- ?\È
- ?\É
- ?\Ê
- ?\Ë
- ?\Ì
- ?\Í
- ?\Î
- ?\Ï
- ?\Ð
- ?\Ñ
- ?\Ò
- ?\Ó
- ?\Ô
- ?\Õ
- ?\Ö
- ?\×
- ?\Ø
- ?\Ù
- ?\Ú
- ?\Û
- ?\Ü
- ?\Ý
- ?\Þ
- ?\ß
- ?\à
- ?\á
- ?\â
- ?\ã
- ?\ä
- ?\å
- ?\æ
- ?\ç
- ?\è
- ?\é
- ?\ê
- ?\ë
- ?\ì
- ?\í
- ?\î
- ?\ï
- ?\ð
- ?\ñ
- ?\ò
- ?\ó
- ?\ô
- ?\õ
- ?\ö
- ?\÷
- ?\ø
- ?\ù
- ?\ú
- ?\û
- ?\ü
- ?\ý
- ?\þ
- ?\ÿ])
-
(cp-make-coding-system
windows-1253
[?\€
(cp-make-coding-system
windows-1256
[?\€
- ?\Ù
+ ?\Ù¾
?\‚
- ?\١
+ ?\ƒ
?\„
?\…
?\†
?\‡
- ?\٢
- ?\٣
- ?\Ù¤
+ ?\ˆ
+ ?\‰
+ ?\Ù¹
?\‹
- ?\٥
- ?\٦
- ?\٧
- ?\٨
- ?\٩
+ ?\Œ
+ ?\چ
+ ?\ژ
+ ?\ڈ
+ ?\گ
?\‘
?\’
?\“
?\•
?\–
?\—
- ?\؛
+ ?\ک
?\™
- ?\؟
+ ?\ڑ
?\›
- ?\ء
- ?\آ
- ?\أ
- ?\Ÿ
+ ?\œ
+ ?\
+ ?\
+ ?\ں
?\
- ?\ؤ
- ?\إ
+ ?\Ø\8c
+ ?\¢
?\£
?\¤
- ?\ئ
+ ?\¥
?\¦
?\§
- ?\ا
+ ?\¨
?\©
- ?\ب
+ ?\ھ
?\«
?\¬
?\
?\®
- ?\پ
+ ?\¯
?\°
?\±
- ?\ة
- ?\ت
- ?\ث
+ ?\²
+ ?\³
+ ?\´
?\µ
?\¶
?\·
+ ?\¸
+ ?\¹
+ ?\؛
+ ?\»
+ ?\¼
+ ?\½
+ ?\¾
+ ?\؟
+ ?\ہ
+ ?\ء
+ ?\آ
+ ?\أ
+ ?\ؤ
+ ?\إ
+ ?\ئ
+ ?\ا
+ ?\ب
+ ?\ة
+ ?\ت
+ ?\ث
?\ج
- ?\چ
?\ح
- ?\»
?\خ
?\د
?\ذ
?\ر
- ?\À
?\ز
- ?\Â
- ?\ژ
?\س
?\ش
?\ص
- ?\Ç
- ?\È
- ?\É
- ?\Ê
- ?\Ë
?\ض
+ ?\×
?\ط
- ?\Î
- ?\Ï
- ?\ㄓ
+ ?\ظ
?\ع
?\غ
?\ـ
- ?\Ô
?\ف
?\ق
- ?\×
?\ك
- ?\Ù
- ?\گ
- ?\Û
- ?\Ü
+ ?\à
?\ل
+ ?\â
?\م
?\ن
- ?\à
?\ه
- ?\â
- ?\ځ
?\و
- ?\ى
- ?\ي
?\ç
?\è
?\é
?\ê
?\ë
- ?\Ù\8b
- ?\Ù\8c
+ ?\Ù\89
+ ?\Ù\8a
?\î
?\ï
+ ?\ً
+ ?\ٌ
?\ٍ
?\َ
+ ?\ô
?\ُ
?\ِ
- ?\ô
- ?\ّ
- ?\ْ
?\÷
- nil
+ ?\ّ
?\ù
- nil
+ ?\ْ
?\û
?\ü
?\
?\
- ?\ÿ]
+ ?\ے]
nil ?a) ;; Arabic
(cp-make-coding-system
?\Ъ]
"Unicode-based KOI8-T encoding for Cyrillic")
(coding-system-put 'koi8-t 'mime-charset nil) ; not in the IANA list
+(define-coding-system-alias 'cyrillic-koi8-t 'koi8-t)
;; Online final ISO draft:
(define-coding-system-alias 'iso-8859-13 'iso-latin-7)
(define-coding-system-alias 'latin-7 'iso-latin-7)
+;; Fixme: check on the C1 characters which libiconv includes. They
+;; are reproduced below, but are probably wrong. I can't find an
+;; official definition of georgian-ps.
(cp-make-coding-system
georgian-ps ; used by glibc for ka_GE
[?\\80
?\і
?\Ї
?\ї
- ?\÷
- ?\±
+ ?\·
+ ?\√
?\№
?\¤
- ?\■
+ ?\■
?\ ])
(define-coding-system-alias 'ruscii 'cp1125)
;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
(define-coding-system-alias 'cp866u 'cp1125)
+(coding-system-put 'cp1125 'mime-charset nil)
+;; Suggested by Anton Zinoviev <anton@lml.bas.bg>: Bulgarian DOS
+;; codepage. Table at
+;; <URL:http://czyborra.com/charsets/bulgarian-mik.txt.gz>.
+(cp-make-coding-system
+ mik
+ [?А ?Б ?В ?Г ?Д ?Е ?Ж ?З ?И ?Й ?К ?Л ?М ?Н ?О ?П ?Р ?С ?Т ?У ?Ф ?Х ?Ц
+ ?Ч ?Ш ?Щ ?Ъ ?Ы ?Ь ?Э ?Ю ?Я ?а ?б ?в ?г ?д ?е ?ж ?з ?и ?й ?к ?л ?м ?н
+ ?о ?п ?р ?с ?т ?у ?ф ?х ?ц ?ч ?ш ?щ ?ъ ?ы ?ь ?э ?ю ?я ?└ ?┴ ?┬ ?├ ?─
+ ?┼ ?╣ ?║ ?╚ ?╔ ?╩ ?╦ ?╠ ?═ ?╬ ?┐ ?░ ?▒ ?▓ ?│ ?┤ ?№ ?§ ?╗ ?╝ ?┘ ?┌ ?█
+ ?▄ ?▌ ?▐ ?▀ ?α ?β ?Γ ?π ?Σ ?σ ?μ ?τ ?Φ ?Θ ?Ω ?δ ?∞ ?∅ ?∈ ?∩ ?≡ ?± ?≥
+ ?≤ ?⌠ ?⌡ ?÷ ?≈ ?° ?∙ ?· ?√ ?ⁿ ?² ?■ ? ])
+(coding-system-put 'mik 'mime-charset nil)
+
+;; Suggested by Anton Zinoviev <anton@lml.bas.bg>: similar to CP1251
+;; and used for some non-Slavic Cyrillic languages. Table found at
+;; <URL:ftp://ftp.logic.ru/pub/logic/linux/cyr-asian/PT154>. See also
+;; <URL:http://lists.w3.org/Archives/Public/ietf-charsets/2002AprJun/0092.html,
+;; which suggests it's used in an Asian Cyrillic context.
+;;;###autoload(autoload-coding-system 'pt154 '(require 'code-pages))
+(cp-make-coding-system
+ pt154
+ [?Җ ?Ғ ?Ӯ ?ғ ?„ ?… ?Ҷ ?Ү ?Ҳ ?ү ?Ҡ ?Ӣ ?Ң ?Қ ?Һ ?Ҹ ?җ ?‘ ?’ ?“ ?” ?• ?–
+ ?— ?ҳ ?ҷ ?ҡ ?ӣ ?ң ?қ ?һ ?ҹ ? ?Ў ?ў ?Ј ?Ө ?Ҙ ?Ұ ?§ ?Ё ?© ?Ә ?\« ?¬ ?ӯ
+ ?® ?Ҝ ?° ?ұ ?І ?і ?ҙ ?ө ?¶ ?· ?ё ?№ ?ә ?» ?ј ?Ҫ ?ҫ ?ҝ ?А ?Б ?В ?Г ?Д
+ ?Е ?Ж ?З ?И ?Й ?К ?Л ?М ?Н ?О ?П ?Р ?С ?Т ?У ?Ф ?Х ?Ц ?Ч ?Ш ?Щ ?Ъ ?Ы
+ ?Ь ?Э ?Ю ?Я ?а ?б ?в ?г ?д ?е ?ж ?з ?и ?й ?к ?л ?м ?н ?о ?п ?р ?с ?т
+ ?у ?ф ?х ?ц ?ч ?ш ?щ ?ъ ?ы ?ь ?э ?ю ?я])
+
+;;;###autoload(autoload-coding-system 'iso-8859-11 '(require 'code-pages))
(cp-make-coding-system
iso-8859-11
- [
-nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
-nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
-? ?ก ?ข ?ฃ ?ค ?ฅ ?ฆ ?ง ?จ ?ฉ ?ช ?ซ ?ฌ ?ญ ?ฎ ?ฏ
-?ฐ ?ฑ ?ฒ ?ณ ?ด ?ต ?ถ ?ท ?ธ ?น ?บ ?ป ?ผ ?ฝ ?พ ?ฟ
-?ภ ?ม ?ย ?ร ?ฤ ?ล ?ฦ ?ว ?ศ ?ษ ?ส ?ห ?ฬ ?อ ?ฮ ?ฯ
-?ะ ?ั ?า ?ำ ?ิ ?ี ?ึ ?ื ?ุ ?ู ?ฺ nil nil nil nil ?฿
-?เ ?แ ?โ ?ใ ?ไ ?ๅ ?ๆ ?็ ?่ ?้ ?๊ ?๋ ?์ ?ํ ?๎ ?๏
-?๐ ?๑ ?๒ ?๓ ?๔ ?๕ ?๖ ?๗ ?๘ ?๙ ?๚ ?๛ nil nil nil nil
-]
+ [nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
+ nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
+ ? ?ก ?ข ?ฃ ?ค ?ฅ ?ฆ ?ง ?จ ?ฉ ?ช ?ซ ?ฌ ?ญ ?ฎ ?ฏ
+ ?ฐ ?ฑ ?ฒ ?ณ ?ด ?ต ?ถ ?ท ?ธ ?น ?บ ?ป ?ผ ?ฝ ?พ ?ฟ
+ ?ภ ?ม ?ย ?ร ?ฤ ?ล ?ฦ ?ว ?ศ ?ษ ?ส ?ห ?ฬ ?อ ?ฮ ?ฯ
+ ?ะ ?ั ?า ?ำ ?ิ ?ี ?ึ ?ื ?ุ ?ู ?ฺ nil nil nil nil ?฿
+ ?เ ?แ ?โ ?ใ ?ไ ?ๅ ?ๆ ?็ ?่ ?้ ?๊ ?๋ ?์ ?ํ ?๎ ?๏
+ ?๐ ?๑ ?๒ ?๓ ?๔ ?๕ ?๖ ?๗ ?๘ ?๙ ?๚ ?๛ nil nil nil nil]
"ISO-8859-11. This is `thai-tis620' with the addition of no-break-space.")
-(dotimes (i 8)
+(dotimes (i 9)
(let ((w (intern (format "windows-125%d" i)))
(c (intern (format "cp125%d" i))))
- (define-coding-system-alias c w)
+ ;; Define cp125* as aliases for all windows-125*, so on Windows
+ ;; we can just concat "cp" to the ANSI codepage we get from the system
+ ;; and not have to worry about whether it should be "cp" or "windows-".
+ (if (coding-system-p w)
+ (define-coding-system-alias c w))
;; Compatibility with codepage.el, though cp... are not the
;; canonical names.
(push (assoc w non-iso-charset-alist) non-iso-charset-alist)))
-;; Use Unicode font under Windows. Jason Rumney fecit.
-(if (and (fboundp 'w32-add-charset-info)
- (not (boundp 'w32-unicode-charset-defined)))
- (w32-add-charset-info "iso10646-1" 'w32-charset-ansi t))
-
(provide 'code-pages)
+;;; arch-tag: 8b6e3c73-b271-4198-866d-ea6d0ceff1b2
;;; code-pages.el ends here