X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/ed7f1a6c5caaf4159125c08db5d18c5471fdd032..73d213f2816876fe9c6c429e75a3be5454a42b34:/lisp/international/mule-conf.el diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el index 9ba95e4d11..cacfc6b6e9 100644 --- a/lisp/international/mule-conf.el +++ b/lisp/international/mule-conf.el @@ -1,6 +1,6 @@ ;;; mule-conf.el --- configure multilingual environment -;; Copyright (C) 1997-2011 Free Software Foundation, Inc. +;; Copyright (C) 1997-2016 Free Software Foundation, Inc. ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 ;; National Institute of Advanced Industrial Science and Technology (AIST) ;; Registration Number H14PRO021 @@ -35,7 +35,9 @@ ;;; Remarks -;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/. +;; The ISO-IR registry is maintained by the Information Processing +;; Society of Japan/Information Technology Standards Commission of +;; Japan (IPSJ/ITSCJ) at https://www.itscj.ipsj.or.jp/itscj_english/. ;; Standards docs equivalent to iso-2022 and iso-8859 are at ;; http://www.ecma.ch/. @@ -218,7 +220,7 @@ (fmakunbound 'define-iso-single-byte-charset) ;; Can this be shared with 8859-11? -;; N.b. not all of these are defined unicodes. +;; N.b. not all of these are defined in Unicode. (define-charset 'thai-tis620 "TIS620.2533" :short-name "TIS620.2533" @@ -708,7 +710,7 @@ (define-charset-alias 'cp866u 'cp1125) ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html -;; shows this as not ASCII comptaible, with various graphics in +;; shows this as not ASCII compatible, with various graphics in ;; 0x01-0x1F. (define-charset 'cp437 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)" @@ -865,7 +867,7 @@ ;; Lao script. ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF. -;; Not all of them are defined unicodes. +;; Not all of them are defined in Unicode. (define-charset 'lao "Lao characters (ISO10646 0E81..0EDF)" :short-name "Lao" @@ -888,21 +890,23 @@ ;; script which IS-13194 supports. (define-charset 'indian-is13194 - "Generic Indian charset for data exchange with IS 13194" - :short-name "IS 13194" - :long-name "Indian IS 13194" + "7-bit representation of IS 13194 (ISCII) for Devanagari" + :short-name "IS 13194 (DEV)" + :long-name "Indian IS 13194 (DEV)" :iso-final-char ?5 :emacs-mule-id 225 :supplementary-p t :code-space [33 126] - :code-offset #x180000) + :code-offset #x180000 + :unify-map "MULE-is13194") (let ((code-offset #x180100)) (dolist (script '(devanagari sanskrit bengali tamil telugu assamese oriya kannada malayalam gujarati punjabi)) (define-charset (intern (format "%s-cdac" script)) - (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'." - (capitalize (symbol-name script))) + (format + "Glyphs of %s script for CDAC font. Subset of `indian-glyph'." + (capitalize (symbol-name script))) :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script))) :supplementary-p t :code-space [0 255] @@ -912,8 +916,9 @@ (dolist (script '(devanagari bengali punjabi gujarati oriya tamil telugu kannada malayalam)) (define-charset (intern (format "%s-akruti" script)) - (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'." - (capitalize (symbol-name script))) + (format + "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'." + (capitalize (symbol-name script))) :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script))) :supplementary-p t :code-space [0 255] @@ -1109,7 +1114,7 @@ :map "MIK") (define-charset 'ptcp154 - "`Paratype' codepage (Asian Cyrillic)" + "ParaType codepage (Asian Cyrillic)" :short-name "PT154" :ascii-compatible-p t :code-space [0 255] @@ -1192,6 +1197,7 @@ (unify-charset 'ipa) (unify-charset 'tibetan) (unify-charset 'ethiopic) +(unify-charset 'indian-is13194) (unify-charset 'japanese-jisx0208-1978) (unify-charset 'japanese-jisx0208) (unify-charset 'japanese-jisx0212) @@ -1225,6 +1231,18 @@ (define-coding-system-alias 'dos 'undecided-dos) (define-coding-system-alias 'mac 'undecided-mac) +(define-coding-system 'prefer-utf-8 + "Like `undecided' but prefer UTF-8 when appropriate. +On decoding, if the source contains 8-bit codes and they all +are valid UTF-8 sequences, detect the source as UTF-8 encoding +regardless of the coding priority. +On encoding, if the source contains non-ASCII characters, encode them +by UTF-8." + :coding-type 'undecided + :mnemonic ?- + :charset-list '(emacs) + :prefer-utf-8 t) + (define-coding-system 'raw-text "Raw text, which means text contains random 8-bit codes. Encoding text with this coding system produces the actual byte @@ -1458,7 +1476,8 @@ for decoding and encoding files, process I/O, etc." :flags '(ascii-at-eol ascii-at-cntl long-form designation locking-shift single-shift) :post-read-conversion 'ctext-post-read-conversion - :pre-write-conversion 'ctext-pre-write-conversion) + :pre-write-conversion 'ctext-pre-write-conversion + :mime-charset 'x-ctext) (define-coding-system-alias 'x-ctext-with-extensions 'compound-text-with-extensions) @@ -1507,6 +1526,7 @@ for decoding and encoding files, process I/O, etc." (setq file-coding-system-alist (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg))) '(("\\.elc\\'" . utf-8-emacs) + ("\\.el\\'" . prefer-utf-8) ("\\.utf\\(-8\\)?\\'" . utf-8) ("\\.xml\\'" . xml-find-file-coding-system) ;; We use raw-text for reading loaddefs.el so that if it