-;;; mule-cmds.el --- commands for mulitilingual environment -*-coding: utf-8 -*-
+;;; mule-cmds.el --- commands for mulitilingual environment -*-coding: iso-2022-7bit -*-
-;; Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-;; Copyright (C) 1995, 2003 Electrotechnical Laboratory, JAPAN.
-;; Licensed to the Free Software Foundation.
+;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+;; Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H14PRO021
;; Keywords: mule, multilingual
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
;;; Code:
-(eval-when-compile (defvar dos-codepage))
+(eval-when-compile
+ (defvar dos-codepage)
+ (autoload 'widget-value "wid-edit"))
+
+(defvar mac-system-coding-system)
+(defvar mac-system-locale)
;;; MULE related key bindings and menus.
buffer-file-coding-system)))
(list (read-coding-system
(if default
- (format "Coding system for following command (default, %s): " default)
+ (format "Coding system for following command (default %s): " default)
"Coding system for following command: ")
default))))
(let* ((keyseq (read-key-sequence
o coding system of a newly created buffer
o default coding system for subprocess I/O
This also sets the following values:
- o default value used as `file-name-coding-system' for converting file names.
+ o default value used as `file-name-coding-system' for converting file names
+ if CODING-SYSTEM is ASCII-compatible.
o default value for the command `set-terminal-coding-system' (not on MSDOS)
- o default value for the command `set-keyboard-coding-system'."
+ o default value for the command `set-keyboard-coding-system'
+ if CODING-SYSTEM is ASCII-compatible.."
(check-coding-system coding-system)
(setq-default buffer-file-coding-system coding-system)
(if (fboundp 'ucs-set-table-for-input)
(or (local-variable-p 'buffer-file-coding-system buffer)
(ucs-set-table-for-input buffer))))
- (if default-enable-multibyte-characters
+ (if (and default-enable-multibyte-characters (not (eq system-type 'darwin))
+ (or (not coding-system)
+ (not (coding-system-get coding-system 'ascii-incompatible))))
+ ;; The file-name coding system on Darwin systems is always utf-8.
(setq default-file-name-coding-system coding-system))
;; If coding-system is nil, honor that on MS-DOS as well, so
;; that they could reset the terminal coding system.
(unless (and (eq window-system 'pc) coding-system)
(setq default-terminal-coding-system coding-system))
- (setq default-keyboard-coding-system coding-system)
+ (if (or (not coding-system)
+ (not (coding-system-get coding-system 'ascii-incompatible)))
+ (setq default-keyboard-coding-system coding-system))
;; Preserve eol-type from existing default-process-coding-systems.
;; On non-unix-like systems in particular, these may have been set
;; carefully by the user, or by the startup code, to deal with the
(setq default-process-coding-system
(cons output-coding input-coding))))
-(defalias 'update-iso-coding-systems 'update-coding-systems-internal)
-(make-obsolete 'update-iso-coding-systems 'update-coding-systems-internal "20.3")
-
(defun prefer-coding-system (coding-system)
"Add CODING-SYSTEM at the front of the priority list for automatic detection.
This also sets the following coding systems:
;; CODING-SYSTEM is no-conversion or undecided.
(error "Can't prefer the coding system `%s'" coding-system))
(set coding-category (or base coding-system))
+ ;; Changing the binding of a coding category requires this call.
(update-coding-systems-internal)
(or (eq coding-category (car coding-category-list))
;; We must change the order.
(interactive
(list (let ((default (or buffer-file-coding-system 'us-ascii)))
(read-coding-system
- (format "Coding-system (default, %s): " default)
+ (format "Coding-system (default %s): " default)
default))))
(let ((pos (unencodable-char-position (point) (point-max) coding-system)))
(if pos
only if the user was explicitly asked and specified a coding system.")
(defvar select-safe-coding-system-accept-default-p nil
- "If non-nil, a function to control the behaviour of coding system selection.
+ "If non-nil, a function to control the behavior of coding system selection.
The meaning is the same as the argument ACCEPT-DEFAULT-P of the
function `select-safe-coding-system' (which see). This variable
overrides that argument.")
`prefer-coding-system'.
However, the user is queried if the chosen coding system is
-inconsistent with what would be selected by `set-auto-coding' from
+inconsistent with what would be selected by `find-auto-coding' from
coding cookies &c. if the contents of the region were read from a
file. (That could lead to data corruption in a file subsequently
re-visited and edited.)
(not (listp default-coding-system)))
(setq default-coding-system (list default-coding-system)))
- (let ((no-other-defaults nil))
+ (let ((no-other-defaults nil)
+ auto-cs)
+ (unless (or (stringp from) find-file-literally)
+ ;; Find an auto-coding that is specified for the the current
+ ;; buffer and file from the region FROM and TO.
+ (save-excursion
+ (save-restriction
+ (widen)
+ (goto-char from)
+ (setq auto-cs (find-auto-coding (or file buffer-file-name "")
+ (- to from)))
+ (if auto-cs
+ (if (coding-system-p (car auto-cs))
+ (setq auto-cs (car auto-cs))
+ (display-warning
+ :warning
+ (format "\
+Invalid coding system `%s' is specified
+for the current buffer/file by the %s.
+It is highly recommended to fix it before writing to a file."
+ (car auto-cs)
+ (if (eq (cdr auto-cs) :coding) ":coding tag"
+ (format "variable `%s'" (cdr auto-cs)))))
+ (or (yes-or-no-p "Really proceed with writing? ")
+ (error "Save aborted"))
+ (setq auto-cs nil))))))
+
(if (eq (car default-coding-system) t)
(setq no-other-defaults t
default-coding-system (cdr default-coding-system)))
(mapcar (function (lambda (x) (cons x (coding-system-base x))))
default-coding-system))
+ (if (and auto-cs (not no-other-defaults))
+ ;; If the file has a coding cookie, try to use it before anything
+ ;; else (i.e. before default-coding-system which will typically come
+ ;; from file-coding-system-alist).
+ (let ((base (coding-system-base auto-cs)))
+ (or (memq base '(nil undecided))
+ (rassq base default-coding-system)
+ (push (cons auto-cs base) default-coding-system))))
+
;; From now on, the list of defaults is reversed.
(setq default-coding-system (nreverse default-coding-system))
(coding-system-get preferred 'mime-charset)
(not (rassq base default-coding-system))
(push (cons preferred base)
- default-coding-system)))))
-
- (if select-safe-coding-system-accept-default-p
- (setq accept-default-p select-safe-coding-system-accept-default-p))
-
- (let ((codings (find-coding-systems-region from to))
- (coding-system nil)
- safe rejected unsafe)
- (if (eq (car codings) 'undecided)
- ;; Any coding system is ok.
- (setq coding-system t)
- ;; Classify the defaults into safe, rejected, and unsafe.
- (dolist (elt default-coding-system)
- (if (memq (cdr elt) codings)
- (if (and (functionp accept-default-p)
- (not (funcall accept-default-p (cdr elt))))
- (push (car elt) rejected)
- (push (car elt) safe))
- (push (car elt) unsafe)))
- (if safe
- (setq coding-system (car safe))))
-
- ;; If all the defaults failed, ask a user.
- (when (not coding-system)
- (setq coding-system (select-safe-coding-system-interactively
- from to codings unsafe rejected (car codings))))
-
- (if (vectorp (coding-system-eol-type coding-system))
- (let ((eol (coding-system-eol-type buffer-file-coding-system)))
- (if (numberp eol)
- (setq coding-system
- (coding-system-change-eol-conversion coding-system eol)))))
-
- (if (eq coding-system t)
- (setq coding-system buffer-file-coding-system))
- ;; Check we're not inconsistent with what `coding:' spec &c would
- ;; give when file is re-read.
- ;; But don't do this if we explicitly ignored the cookie
- ;; by using `find-file-literally'.
- (unless (or (stringp from) find-file-literally)
- (let ((auto-cs (save-excursion
- (save-restriction
- (widen)
- (narrow-to-region from to)
- (goto-char (point-min))
- (set-auto-coding (or file buffer-file-name "")
- (buffer-size))))))
+ default-coding-system))))
+
+ (if select-safe-coding-system-accept-default-p
+ (setq accept-default-p select-safe-coding-system-accept-default-p))
+
+ (let ((codings (find-coding-systems-region from to))
+ (coding-system nil)
+ safe rejected unsafe)
+ (if (eq (car codings) 'undecided)
+ ;; Any coding system is ok.
+ (setq coding-system t)
+ ;; Classify the defaults into safe, rejected, and unsafe.
+ (dolist (elt default-coding-system)
+ (if (memq (cdr elt) codings)
+ (if (and (functionp accept-default-p)
+ (not (funcall accept-default-p (cdr elt))))
+ (push (car elt) rejected)
+ (push (car elt) safe))
+ (push (car elt) unsafe)))
+ (if safe
+ (setq coding-system (car safe))))
+
+ ;; If all the defaults failed, ask a user.
+ (when (not coding-system)
+ (setq coding-system (select-safe-coding-system-interactively
+ from to codings unsafe rejected (car codings))))
+
+ (if (vectorp (coding-system-eol-type coding-system))
+ (let ((eol (coding-system-eol-type buffer-file-coding-system)))
+ (if (numberp eol)
+ (setq coding-system
+ (coding-system-change-eol-conversion coding-system eol)))))
+
+ (if (eq coding-system t)
+ (setq coding-system buffer-file-coding-system))
+ ;; Check we're not inconsistent with what `coding:' spec &c would
+ ;; give when file is re-read.
+ ;; But don't do this if we explicitly ignored the cookie
+ ;; by using `find-file-literally'.
+ (when (and auto-cs
+ (not (and
+ coding-system
+ (memq (coding-system-type coding-system) '(0 5)))))
;; Merge coding-system and auto-cs as far as possible.
(if (not coding-system)
(setq coding-system auto-cs)
(format "Selected encoding %s disagrees with \
%s specified by file contents. Really save (else edit coding cookies \
and try again)? " coding-system auto-cs))
- (error "Save aborted")))))
- coding-system))
+ (error "Save aborted"))))
+ coding-system)))
(setq select-safe-coding-system-function 'select-safe-coding-system)
;; We should never use no-conversion for outgoing mail.
(setq coding nil))
(if (fboundp select-safe-coding-system-function)
- (funcall select-safe-coding-system-function
- (point-min) (point-max) coding
- (function (lambda (x) (coding-system-get x 'mime-charset))))
- coding)))
+ (setq coding
+ (funcall select-safe-coding-system-function
+ (point-min) (point-max) coding
+ (function (lambda (x)
+ (coding-system-get x 'mime-charset))))))
+ (if coding
+ ;; Be sure to use LF for end-of-line.
+ (setq coding (coding-system-change-eol-conversion coding 'unix))
+ ;; No coding system is decided. Usually this is the case that
+ ;; the current buffer contains only ASCII. So, we hope
+ ;; iso-8859-1 works.
+ (setq coding 'iso-8859-1-unix))
+ coding))
\f
;;; Language support stuff.
see `language-info-alist'."
(if (symbolp lang-env)
(setq lang-env (symbol-name lang-env)))
+ (set-language-info-internal lang-env key info)
+ (if (equal lang-env current-language-environment)
+ (set-language-environment lang-env)))
+
+(defun set-language-info-internal (lang-env key info)
+ "Internal use only.
+Arguments are the same as `set-language-info'."
(let (lang-slot key-slot)
(setq lang-slot (assoc lang-env language-info-alist))
(if (null lang-slot) ; If no slot for the language, add it.
(define-key-after setup-map (vector (intern lang-env))
(cons lang-env 'setup-specified-language-environment) t)
- (while alist
- (set-language-info lang-env (car (car alist)) (cdr (car alist)))
- (setq alist (cdr alist)))))
+ (dolist (elt alist)
+ (set-language-info-internal lang-env (car elt) (cdr elt)))
+
+ (if (equal lang-env current-language-environment)
+ (set-language-environment lang-env))))
(defun read-language-name (key prompt &optional default)
"Read a language environment name which has information for KEY.
"Describe input method INPUT-METHOD."
(interactive
(list (read-input-method-name
- "Describe input method (default, current choice): ")))
+ "Describe input method (default current choice): ")))
(if (and input-method (symbolp input-method))
(setq input-method (symbol-name input-method)))
(help-setup-xref (list #'describe-input-method
But, if this flag is non-nil, it displays them in echo area instead.")
(defvar input-method-exit-on-invalid-key nil
- "This flag controls the behaviour of an input method on invalid key input.
+ "This flag controls the behavior of an input method on invalid key input.
Usually, when a user types a key which doesn't start any character
handled by the input method, the key is handled by turning off the
input method temporarily. After that key, the input method is re-enabled.
coding-category-ccl
coding-category-binary))
+ ;; Changing the binding of a coding category requires this call.
(update-coding-systems-internal)
(set-default-coding-systems nil)
(setq default-sendmail-coding-system 'iso-latin-1)
+ ;; On Darwin systems, this should be utf-8, but when this file is loaded
+ ;; utf-8 is not yet defined, so we set it in set-locale-environment instead.
(setq default-file-name-coding-system 'iso-latin-1)
;; Preserve eol-type from existing default-process-coding-systems.
;; On non-unix-like systems in particular, these may have been set
(reset-language-environment)
-(defun set-display-table-and-terminal-coding-system (language-name)
+(defun set-display-table-and-terminal-coding-system (language-name &optional coding-system)
"Set up the display table and terminal coding system for LANGUAGE-NAME."
(let ((coding (get-language-info language-name 'unibyte-display)))
- (if coding
+ (if (and coding
+ (or (not coding-system)
+ (coding-system-equal coding coding-system)))
(standard-display-european-internal)
;; The following 2 lines undo the 8-bit display that we set up
;; in standard-display-european-internal, which see. This is in
(dotimes (i 128)
(aset standard-display-table (+ i 128) nil))))
(or (eq window-system 'pc)
- (set-terminal-coding-system coding))))
+ (set-terminal-coding-system (or coding-system coding)))))
(defun set-language-environment (language-name)
"Set up multi-lingual environment for using LANGUAGE-NAME.
specifies the character set for the major languages of Western Europe."
(interactive (list (read-language-name
nil
- "Set language environment (default, English): ")))
+ "Set language environment (default English): ")))
(if language-name
(if (symbolp language-name)
(setq language-name (symbol-name language-name)))
(load syntax nil t))
;; No information for syntax and case. Reset to the defaults.
(let ((syntax-table (standard-syntax-table))
- (case-table (standard-case-table))
+ (standard-table (standard-case-table))
+ (case-table (make-char-table 'case-table))
(ch (if (eq window-system 'pc) 128 160)))
(while (< ch 256)
(modify-syntax-entry ch " " syntax-table)
- (aset case-table ch ch)
(setq ch (1+ ch)))
+ (dotimes (i 128)
+ (aset case-table i (aref standard-table i)))
(set-char-table-extra-slot case-table 0 nil)
(set-char-table-extra-slot case-table 1 nil)
- (set-char-table-extra-slot case-table 2 nil))
- (set-standard-case-table (standard-case-table))
+ (set-char-table-extra-slot case-table 2 nil)
+ (set-standard-case-table case-table))
(let ((list (buffer-list)))
(while list
(with-current-buffer (car list)
;; Don't invoke fontset-related functions if fontsets aren't
;; supported in this build of Emacs.
(when (fboundp 'fontset-list)
- (let ((overriding-fontspec (get-language-info language-name
+ (let ((overriding-fontspec (get-language-info language-name
'overriding-fontspec)))
(if overriding-fontspec
(set-overriding-fontspec-internal overriding-fontspec))))
(if (functionp func)
(funcall func)))
(if (and utf-translate-cjk-mode
- utf-translate-cjk-lang-env
(not (eq utf-translate-cjk-lang-env language-name))
(catch 'tag
(dolist (charset (get-language-info language-name 'charset))
;; different there.
(or (and (eq window-system 'pc) (not default-enable-multibyte-characters))
(progn
- ;; Make non-line-break space display as a plain space.
- ;; Most X fonts do the wrong thing for code 160.
- (aset standard-display-table 160 [32])
- ;; With luck, non-Latin-1 fonts are more recent and so don't
- ;; have this bug.
- (aset standard-display-table (make-char 'latin-iso8859-1 160) [32])
+ ;; Most X fonts used to do the wrong thing for latin-1 code 160.
+ (unless (and (eq window-system 'x)
+ ;; XFree86 4 has fixed the fonts.
+ (string= "The XFree86 Project, Inc" (x-server-vendor))
+ (> (aref (number-to-string (nth 2 (x-server-version))) 0)
+ ?3))
+ ;; Make non-line-break space display as a plain space.
+ (aset standard-display-table 160 [32]))
;; Most Windows programs send out apostrophes as \222. Most X fonts
;; don't contain a character at that position. Map it to the ASCII
;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
;; fonts probably have the appropriate glyph at this position,
;; so they could use standard-display-8bit. It's better to use a
;; proper windows-1252 coding system. --fx]
- (aset standard-display-table 146 [39])
- ;; XFree86 4 has changed most of the fonts from their designed
- ;; versions such that `' no longer appears as balanced quotes.
- ;; Assume it has iso10646 fonts installed, so we can display
- ;; balanced quotes.
- (when (and (eq window-system 'x)
- (string= "The XFree86 Project, Inc" (x-server-vendor))
- (> (aref (number-to-string (nth 2 (x-server-version))) 0)
- ?3))
- ;; We suppress these setting for the moment because the
- ;; above assumption is wrong.
- ;; (aset standard-display-table ?' [?’])
- ;; (aset standard-display-table ?` [?‘])
- ;; The fonts don't have the relevant bug.
- (aset standard-display-table 160 nil)
- (aset standard-display-table (make-char 'latin-iso8859-1 160)
- nil)))))
+ (aset standard-display-table 146 [39]))))
(defun set-language-environment-coding-systems (language-name
&optional eol-type)
(while priority
(set (car categories) (car priority))
(setq priority (cdr priority) categories (cdr categories)))
+ ;; Changing the binding of a coding category requires this call.
(update-coding-systems-internal)))))
(defsubst princ-list (&rest args)
(interactive
(list (read-language-name
'documentation
- "Describe language environment (default, current choice): ")))
+ "Describe language environment (default current choice): ")))
(if (null language-name)
(setq language-name current-language-environment))
(if (or (null language-name)
(setq language-name (symbol-name language-name)))
(dolist (feature (get-language-info language-name 'features))
(require feature))
- (let ((doc (get-language-info language-name 'documentation))
- pos)
+ (let ((doc (get-language-info language-name 'documentation)))
(help-setup-xref (list #'describe-language-environment language-name)
(interactive-p))
(with-output-to-temp-buffer (help-buffer)
(l (copy-sequence input-method-alist)))
(insert "Input methods")
(when input-method
- (insert " (default, " input-method ")")
+ (insert " (default " input-method ")")
(setq input-method (assoc input-method input-method-alist))
(setq l (cons input-method (delete input-method l))))
(insert ":\n")
;; and Chinese are exceptions, which are listed in the
;; non-standard section at the bottom of locale-language-names.
- ; aa Afar
- ; ab Abkhazian
+ ("aa_DJ" . "Latin-1") ; Afar
+ ("aa" . "UTF-8")
+ ;; ab Abkhazian
("af" . "Latin-1") ; Afrikaans
- ("am" . "Ethiopic") ; Amharic
+ ("am" "Ethiopic" utf-8) ; Amharic
+ ("an" . "Latin-9") ; Aragonese
; ar Arabic glibc uses 8859-6
; as Assamese
; ay Aymara
- ; az Azerbaijani
+ ("az" . "UTF-8") ; Azerbaijani
; ba Bashkir
- ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
- ("bg" . "Bulgarian") ; Bulgarian
+ ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
+ ("bg" "Bulgarian" cp1251) ; Bulgarian
; bh Bihari
; bi Bislama
- ; bn Bengali, Bangla
+ ("bn" . "UTF-8") ; Bengali, Bangla
("bo" . "Tibetan")
("br" . "Latin-1") ; Breton
("bs" . "Latin-2") ; Bosnian
+ ("byn" . "UTF-8") ; Bilin; Blin
("ca" . "Latin-1") ; Catalan
; co Corsican
- ("cs" . "Czech")
- ("cy" . "Welsh") ; Welsh [glibc uses Latin-8. Did this change?]
+ ("cs" "Czech" iso-8859-2)
+ ("cy" "Welsh" iso-8859-14)
("da" . "Latin-1") ; Danish
- ("de" . "German")
+ ("de" "German" iso-8859-1)
; dz Bhutani
- ("el" . "Greek")
+ ("el" "Greek" iso-8859-7)
;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
;; That's actually what the GNU locales define, modulo things like
;; en_IN -- fx.
- ("en" . "Latin-1") ; English
+ ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
+ ("en" "English" iso-8859-1) ; English
("eo" . "Latin-3") ; Esperanto
- ("es" . "Spanish")
- ("et" . "Latin-4") ; Estonian
+ ("es" "Spanish" iso-8859-1)
+ ("et" . "Latin-1") ; Estonian
("eu" . "Latin-1") ; Basque
- ; fa Persian glibc uses utf-8
+ ("fa" . "UTF-8") ; Persian
("fi" . "Latin-1") ; Finnish
- ; fj Fiji
+ ("fj" . "Latin-1") ; Fiji
("fo" . "Latin-1") ; Faroese
- ("fr" . "French") ; French
+ ("fr" "French" iso-8859-1) ; French
("fy" . "Latin-1") ; Frisian
("ga" . "Latin-1") ; Irish Gaelic (new orthography)
- ("gd" . "Latin-1") ; Scots Gaelic
- ("gl" . "Latin-1") ; Galician
+ ("gd" . "Latin-9") ; Scots Gaelic
+ ("gez" "Ethiopic" utf-8) ; Geez
+ ("gl" . "Latin-1") ; Gallegan; Galician
; gn Guarani
- ; gu Gujarati
- ("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1
+ ("gu" . "UTF-8") ; Gujarati
+ ("gv" . "Latin-1") ; Manx Gaelic
; ha Hausa
- ("he" . "Hebrew")
- ("hi" . "Devanagari") ; Hindi glibc uses utf-8
- ("hr" . "Croatian") ; Croatian
+ ("he" "Hebrew" iso-8859-8)
+ ("hi" "Devanagari" utf-8) ; Hindi
+ ("hr" "Croatian" iso-8859-2) ; Croatian
("hu" . "Latin-2") ; Hungarian
; hy Armenian
; ia Interlingua
; ie Interlingue
; ik Inupiak
("is" . "Latin-1") ; Icelandic
- ("it" . "Italian") ; Italian
+ ("it" "Italian" iso-8859-1) ; Italian
; iu Inuktitut
- ("ja" . "Japanese")
+ ("iw" "Hebrew" iso-8859-8)
+ ("ja" "Japanese" euc-jp)
; jw Javanese
- ("ka" . "Georgian") ; Georgian
+ ("ka" "Georgian" georgian-ps) ; Georgian
; kk Kazakh
("kl" . "Latin-1") ; Greenlandic
; km Cambodian
- ; kn Kannada
- ("ko" . "Korean")
+ ("kn" "Kannada" utf-8)
+ ("ko" "Korean" euc-kr)
; ks Kashmiri
; ku Kurdish
("kw" . "Latin-1") ; Cornish
; ky Kirghiz
("la" . "Latin-1") ; Latin
("lb" . "Latin-1") ; Luxemburgish
+ ("lg" . "Laint-6") ; Ganda
; ln Lingala
- ("lo" . "Lao") ; Laothian
- ("lt" . "Lithuanian")
+ ("lo" "Lao" utf-8) ; Laothian
+ ("lt" "Lithuanian" iso-8859-13)
("lv" . "Latvian") ; Latvian, Lettish
; mg Malagasy
("mi" . "Latin-7") ; Maori
- ("mk" . "Cyrillic-ISO") ; Macedonian
- ; ml Malayalam
- ; mn Mongolian
+ ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
+ ("ml" "Malayalam" utf-8)
+ ("mn" . "UTF-8") ; Mongolian
; mo Moldavian
- ("mr" . "Devanagari") ; Marathi glibc uses utf-8
+ ("mr" "Devanagari" utf-8) ; Marathi
("ms" . "Latin-1") ; Malay
("mt" . "Latin-3") ; Maltese
; my Burmese
; na Nauru
- ("ne" . "Devanagari") ; Nepali
- ("nl" . "Dutch")
+ ("nb" . "Latin-1") ; Norwegian
+ ("ne" "Devanagari" utf-8) ; Nepali
+ ("nl" "Dutch" iso-8859-1)
("no" . "Latin-1") ; Norwegian
("oc" . "Latin-1") ; Occitan
- ; om (Afan) Oromo
+ ("om_ET" . "UTF-8") ; (Afan) Oromo
+ ("om" . "Latin-1") ; (Afan) Oromo
; or Oriya
- ; pa Punjabi
+ ("pa" . "UTF-8") ; Punjabi
("pl" . "Latin-2") ; Polish
; ps Pashto, Pushto
("pt" . "Latin-1") ; Portuguese
; qu Quechua
("rm" . "Latin-1") ; Rhaeto-Romanic
; rn Kirundi
- ("ro" . "Romanian")
- ("ru.*[_.]koi8" . "Russian")
- ("ru" . "Cyrillic-ISO") ; Russian
+ ("ro" "Romanian" iso-8859-2)
+ ("ru_RU" "Russian" iso-8859-5)
+ ("ru_UA" "Russian" koi8-u)
; rw Kinyarwanda
("sa" . "Devanagari") ; Sanskrit
; sd Sindhi
- ; se Northern Sami
+ ("se" . "UTF-8") ; Northern Sami
; sg Sangho
("sh" . "Latin-2") ; Serbo-Croatian
; si Sinhalese
- ("sk" . "Slovak")
- ("sl" . "Slovenian")
+ ("sid" . "UTF-8") ; Sidamo
+ ("sk" "Slovak" iso-8859-2)
+ ("sl" "Slovenian" iso-8859-2)
; sm Samoan
; sn Shona
- ; so Somali
+ ("so_ET" "UTF-8") ; Somali
+ ("so" "Latin-1") ; Somali
("sq" . "Latin-1") ; Albanian
+ ("sr_YU@cyrillic" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet)
("sr" . "Latin-2") ; Serbian (Latin alphabet)
- ("sr_YU@cyrillic" . "Cyrillic-ISO") ; per glibc
; ss Siswati
- ; st Sesotho
+ ("st" . "Latin-1") ; Sesotho
; su Sundanese
- ("sv" . "Swedish") ; Swedish
+ ("sv" "Swedish" iso-8859-1) ; Swedish
("sw" . "Latin-1") ; Swahili
- ; ta Tamil glibc uses utf-8
- ; te Telugu glibc uses utf-8
- ("tg" . "Tajik")
- ("th" . "Thai")
- ; ti Tigrinya
+ ("ta" "Tamil" utf-8)
+ ("te" . "UTF-8") ; Telugu
+ ("tg" "Tajik" koi8-t)
+ ("th" "Thai" tis-620)
+ ("ti" "Ethiopic" utf-8) ; Tigrinya
+ ("tig_ER" . "UTF-8") ; Tigre
; tk Turkmen
("tl" . "Latin-1") ; Tagalog
; tn Setswana
; to Tonga
- ("tr" . "Turkish")
+ ("tr" "Turkish" iso-8859-9)
; ts Tsonga
- ; tt Tatar
+ ("tt" . "UTF-8") ; Tatar
; tw Twi
; ug Uighur
- ("uk" . "Ukrainian") ; Ukrainian
- ; ur Urdu glibc uses utf-8
+ ("uk" "Ukrainian" koi8-u)
+ ("ur" . "UTF-8") ; Urdu
+ ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
("uz" . "Latin-1") ; Uzbek
- ("vi" . "Vietnamese") ; glibc uses utf-8
+ ("vi" "Vietnamese" utf-8)
; vo Volapuk
("wa" . "Latin-1") ; Walloon
; wo Wolof
- ; xh Xhosa
+ ("xh" . "Latin-1") ; Xhosa
("yi" . "Windows-1255") ; Yiddish
; yo Yoruba
; za Zhuang
-
- ; glibc:
+ ("zh_HK" . "Chinese-Big5")
+ ("zh_TW" . "Chinese-Big5")
+ ("zh_CN" . "Chinese-GB")
+ ("zh" . "Chinese-GB")
; zh_CN.GB18030/GB18030 \
; zh_CN.GBK/GBK \
; zh_HK/BIG5-HKSCS \
-
- ("zh.*[._]big5" . "Chinese-BIG5")
- ("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
- ("zh_tw" . "Chinese-CNS") ; glibc uses big5
- ("zh_tw[._]euc-tw" . "Chinese-EUC-TW")
- ("zh" . "Chinese-GB")
- ; zu Zulu
+ ("zu" . "Latin-1") ; Zulu
;; ISO standard locales
("c$" . "ASCII")
("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
))
- "List of pairs of locale regexps and language names.
-The first element whose locale regexp matches the start of a downcased locale
-specifies the language name corresponding to that locale.
-If the language name is nil, there is no corresponding language environment.")
+ "Alist of locale regexps vs the corresponding languages and coding systems.
+Each element has these form:
+ \(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
+The first element whose LOCALE-REGEXP matches the start of a
+downcased locale specifies the LANG-ENV \(language environtment)
+and CODING-SYSTEM corresponding to that locale. If there is no
+appropriate language environment, the element may have this form:
+ \(LOCALE-REGEXP . LANG-ENV)
+In this case, LANG-ENV is one of generic language environments for an
+specific encoding such as \"Latin-1\" and \"UTF-8\".")
(defconst locale-charset-language-names
(purecopy
"List of pairs of locale regexps and charset language names.
The first element whose locale regexp matches the start of a downcased locale
specifies the language name whose charset corresponds to that locale.
-This language name is used if its charsets disagree with the charsets of
-the language name that would otherwise be used for this locale.")
+This language name is used if the locale is not listed in
+`locale-language-names'")
(defconst locale-preferred-coding-systems
(purecopy
- '(("ja.*[._]euc" . japanese-iso-8bit)
+ '((".*8859[-_]?1\\>" . iso-8859-1)
+ (".*8859[-_]?2\\>" . iso-8859-2)
+ (".*8859[-_]?3\\>" . iso-8859-3)
+ (".*8859[-_]?4\\>" . iso-8859-4)
+ (".*8859[-_]?9\\>" . iso-8859-9)
+ (".*8859[-_]?14\\>" . iso-8859-14)
+ (".*8859[-_]?15\\>" . iso-8859-15)
+ (".*utf\\(?:-?8\\)?" . utf-8)
+ ;; utf-8@euro exists, so put this after utf-8. (@euro really
+ ;; specifies the currency, rather than the charset.)
+ (".*@euro" . iso-8859-15)
+ ("koi8-?r" . koi8-r)
+ ("koi8-?u" . koi8-u)
+ ("tcvn" . tcvn)
+ ("big5" . big5)
+ ("euc-?tw" . euc-tw)
+ ;; We don't support GBK, but as it is upper compatible with
+ ;; GB-2312, we setup the default coding system to gb2312.
+ ("gbk" . gb2312)
+ ;; We don't support BIG5-HKSCS, but as it is upper compatible with
+ ;; BIG5, we setup the default coding system to big5.
+ ("big5hkscs" . big5)
+ ("ja.*[._]euc" . japanese-iso-8bit)
("ja.*[._]jis7" . iso-2022-jp)
("ja.*[._]pck" . japanese-shift-jis)
("ja.*[._]sjis" . japanese-shift-jis)
("jpn" . japanese-shift-jis) ; MS-Windows uses this.
- (".*[._]utf" . utf-8)))
+ ))
"List of pairs of locale regexps and preferred coding systems.
The first element whose locale regexp matches the start of a downcased locale
-specifies the coding system to prefer when using that locale.")
+specifies the coding system to prefer when using that locale.
+This coding system is used if the locale specifies a specific charset.")
(defun locale-name-match (key alist)
"Search for KEY in ALIST, which should be a list of regexp-value pairs.
;; to a system without X.
(setq locale-translation-file-name
(let ((files
- '("/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
+ '("/usr/share/X11/locale/locale.alias" ; e.g. X11R7
+ "/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
"/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2
"/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
;;
(= 0 (length locale))) ; nil or empty string
(setq locale (getenv (pop vars))))))
+ (unless locale
+ ;; The two tests are kept separate so the byte-compiler sees
+ ;; that mac-get-preference is only called after checking its existence.
+ (when (fboundp 'mac-get-preference)
+ (setq locale (mac-get-preference "AppleLocale"))
+ (unless locale
+ (let ((languages (mac-get-preference "AppleLanguages")))
+ (unless (= (length languages) 0) ; nil or empty vector
+ (setq locale (aref languages 0)))))))
+ (unless (or locale (not (boundp 'mac-system-locale)))
+ (setq locale mac-system-locale))
+
(when locale
;; Translate "swedish" into "sv_SE.ISO8859-1", and so on,
(when locale
(if (string-match "\\.\\([^@]+\\)" locale)
(locale-charset-to-coding-system
- (match-string 1 locale)))))))
-
- ;; Give preference to charset-language-name over language-name.
- (if (and charset-language-name
- (not
- (equal (get-language-info language-name 'charset)
- (get-language-info charset-language-name 'charset))))
- (setq language-name charset-language-name))
+ (match-string 1 locale))))
+ (and (eq system-type 'macos) mac-system-coding-system))))
+
+ (if (consp language-name)
+ ;; locale-language-names specify both lang-env and coding.
+ ;; But, what specified in locale-preferred-coding-systems
+ ;; has higher priority.
+ (setq coding-system (or coding-system
+ (nth 1 language-name))
+ language-name (car language-name))
+ ;; Otherwise, if locale is not listed in locale-language-names,
+ ;; use what listed in locale-charset-language-names.
+ (if (not language-name)
+ (setq language-name charset-language-name)))
(when language-name
;; we are using single-byte characters,
;; so the display table and terminal coding system are irrelevant.
(when default-enable-multibyte-characters
- (set-display-table-and-terminal-coding-system language-name))
+ (set-display-table-and-terminal-coding-system
+ language-name coding-system))
;; Set the `keyboard-coding-system' if appropriate (tty
;; only). At least X and MS Windows can generate
(setq locale-coding-system
(car (get-language-info language-name 'coding-priority))))
- (when coding-system
+ (when (and coding-system
+ (not (coding-system-equal coding-system
+ locale-coding-system)))
(prefer-coding-system coding-system)
(setq locale-coding-system coding-system))))
- ;; On Windows, override locale-coding-system, keyboard-coding-system,
- ;; selection-coding-system with system codepage.
+ ;; On Windows, override locale-coding-system,
+ ;; keyboard-coding-system with system codepage. Note:
+ ;; selection-coding-system is already set in w32select.c.
(when (boundp 'w32-ansi-code-page)
(let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page))))
(when (coding-system-p code-page-coding)
(setq locale-coding-system code-page-coding)
- (set-selection-coding-system code-page-coding)
(set-keyboard-coding-system code-page-coding)
(set-terminal-coding-system code-page-coding))))
+ (when (eq system-type 'darwin)
+ ;; On Darwin, file names are always encoded in utf-8, no matter
+ ;; the locale.
+ (setq default-file-name-coding-system 'utf-8)
+ ;; Mac OS X's Terminal.app by default uses utf-8 regardless of
+ ;; the locale.
+ (when (and (null window-system)
+ (equal (getenv "TERM_PROGRAM") "Apple_Terminal"))
+ (set-terminal-coding-system 'utf-8)
+ (set-keyboard-coding-system 'utf-8)))
+
;; Default to A4 paper if we're not in a C, POSIX or US locale.
;; (See comments in Flocale_info.)
(let ((locale locale)
(if (and coding-system (eq (coding-system-type coding-system) 2))
;; Try to get a pretty description for ISO 2022 escape sequences.
(function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
- (format "0x%02X" x))))
- (function (lambda (x) (format "0x%02X" x))))
+ (format "#x%02X" x))))
+ (function (lambda (x) (format "#x%02X" x))))
str " "))
(defun encode-coding-char (char coding-system)