X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/7c82f3e23e37cc848a38b1f8be7149fd672a6393..b7fd432d56e352e25e6db4eaf5b081ddc5a39367:/lisp/international/mule-conf.el diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el index fd444a098e..41ee466aae 100644 --- a/lisp/international/mule-conf.el +++ b/lisp/international/mule-conf.el @@ -1,7 +1,6 @@ ;;; mule-conf.el --- configure multilingual environment -;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, -;; 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +;; Copyright (C) 1997-2014 Free Software Foundation, Inc. ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 ;; National Institute of Advanced Industrial Science and Technology (AIST) ;; Registration Number H14PRO021 @@ -219,7 +218,7 @@ (fmakunbound 'define-iso-single-byte-charset) ;; Can this be shared with 8859-11? -;; N.b. not all of these are defined unicodes. +;; N.b. not all of these are defined in Unicode. (define-charset 'thai-tis620 "TIS620.2533" :short-name "TIS620.2533" @@ -709,7 +708,7 @@ (define-charset-alias 'cp866u 'cp1125) ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html -;; shows this as not ASCII comptaible, with various graphics in +;; shows this as not ASCII compatible, with various graphics in ;; 0x01-0x1F. (define-charset 'cp437 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)" @@ -866,7 +865,7 @@ ;; Lao script. ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF. -;; Not all of them are defined unicodes. +;; Not all of them are defined in Unicode. (define-charset 'lao "Lao characters (ISO10646 0E81..0EDF)" :short-name "Lao" @@ -889,14 +888,15 @@ ;; script which IS-13194 supports. (define-charset 'indian-is13194 - "Generic Indian charset for data exchange with IS 13194" - :short-name "IS 13194" - :long-name "Indian IS 13194" + "7-bit representation of IS 13194 (ISCII) for Devanagari" + :short-name "IS 13194 (DEV)" + :long-name "Indian IS 13194 (DEV)" :iso-final-char ?5 :emacs-mule-id 225 :supplementary-p t :code-space [33 126] - :code-offset #x180000) + :code-offset #x180000 + :unify-map "MULE-is13194") (let ((code-offset #x180100)) (dolist (script '(devanagari sanskrit bengali tamil telugu assamese @@ -1110,7 +1110,7 @@ :map "MIK") (define-charset 'ptcp154 - "`Paratype' codepage (Asian Cyrillic)" + "ParaType codepage (Asian Cyrillic)" :short-name "PT154" :ascii-compatible-p t :code-space [0 255] @@ -1193,6 +1193,7 @@ (unify-charset 'ipa) (unify-charset 'tibetan) (unify-charset 'ethiopic) +(unify-charset 'indian-is13194) (unify-charset 'japanese-jisx0208-1978) (unify-charset 'japanese-jisx0208) (unify-charset 'japanese-jisx0212) @@ -1226,6 +1227,18 @@ (define-coding-system-alias 'dos 'undecided-dos) (define-coding-system-alias 'mac 'undecided-mac) +(define-coding-system 'prefer-utf-8 + "Like `undecided' but prefer UTF-8 when appropriate. +On decoding, if the source contains 8-bit codes and they all +are valid UTF-8 sequences, detect the source as UTF-8 encoding +regardless of the coding priority. +On encoding, if the source contains non-ASCII characters, encode them +by UTF-8." + :coding-type 'undecided + :mnemonic ?- + :charset-list '(emacs) + :prefer-utf-8 t) + (define-coding-system 'raw-text "Raw text, which means text contains random 8-bit codes. Encoding text with this coding system produces the actual byte @@ -1459,7 +1472,8 @@ for decoding and encoding files, process I/O, etc." :flags '(ascii-at-eol ascii-at-cntl long-form designation locking-shift single-shift) :post-read-conversion 'ctext-post-read-conversion - :pre-write-conversion 'ctext-pre-write-conversion) + :pre-write-conversion 'ctext-pre-write-conversion + :mime-charset 'x-ctext) (define-coding-system-alias 'x-ctext-with-extensions 'compound-text-with-extensions) @@ -1508,6 +1522,7 @@ for decoding and encoding files, process I/O, etc." (setq file-coding-system-alist (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg))) '(("\\.elc\\'" . utf-8-emacs) + ("\\.el\\'" . prefer-utf-8) ("\\.utf\\(-8\\)?\\'" . utf-8) ("\\.xml\\'" . xml-find-file-coding-system) ;; We use raw-text for reading loaddefs.el so that if it