-;;; mule-cmds.el --- commands for mulitilingual environment
-;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
-;; Licensed to the Free Software Foundation.
-;; Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+;;; mule-cmds.el --- commands for mulitilingual environment -*-coding: iso-2022-7bit -*-
+
+;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+;; Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H14PRO021
;; Keywords: mule, multilingual
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
;;; Code:
-(eval-when-compile (defvar dos-codepage))
+(eval-when-compile
+ (defvar dos-codepage)
+ (autoload 'widget-value "wid-edit"))
+
+(defvar mac-system-coding-system)
+(defvar mac-system-locale)
;;; MULE related key bindings and menus.
(define-key-after mule-menu-keymap [set-language-environment]
(list 'menu-item "Set Language Environment" setup-language-environment-map
:help "Multilingual environment suitable for a specific language"))
-(define-key-after mule-menu-keymap [mouse-set-font]
- '(menu-item "Set Font/Fontset" mouse-set-font
- :visible (fboundp 'generate-fontset-menu)
- :help "Select a font from list of known fonts/fontsets"))
(define-key-after mule-menu-keymap [separator-mule]
'("--")
t)
t)
(define-key-after mule-menu-keymap [set-various-coding-system]
(list 'menu-item "Set Coding Systems" set-coding-system-map
- :enable 'enable-multibyte-characters))
+ :enable 'default-enable-multibyte-characters))
(define-key-after mule-menu-keymap [view-hello-file]
'(menu-item "Show Multi-lingual Text" view-hello-file
:enable (file-readable-p
buffer-file-coding-system)))
(list (read-coding-system
(if default
- (format "Coding system for following command (default, %s): " default)
+ (format "Coding system for following command (default %s): " default)
"Coding system for following command: ")
default))))
(let* ((keyseq (read-key-sequence
(not (eq cmd 'universal-argument-other-key)))
(let ((current-prefix-arg prefix-arg)
;; Have to bind `last-command-char' here so that
- ;; `digit-argument', for isntance, can compute the
+ ;; `digit-argument', for instance, can compute the
;; prefix arg.
(last-command-char (aref keyseq 0)))
(call-interactively cmd)))
- ;; This is the final call to `univeral-argument-other-key', which
+ ;; This is the final call to `universal-argument-other-key', which
;; set's the final `prefix-arg.
(let ((current-prefix-arg prefix-arg))
(call-interactively cmd))
o coding system of a newly created buffer
o default coding system for subprocess I/O
This also sets the following values:
- o default value used as `file-name-coding-system' for converting file names.
+ o default value used as `file-name-coding-system' for converting file names
+ if CODING-SYSTEM is ASCII-compatible.
o default value for the command `set-terminal-coding-system' (not on MSDOS)
- o default value for the command `set-keyboard-coding-system'."
+ o default value for the command `set-keyboard-coding-system'
+ if CODING-SYSTEM is ASCII-compatible.."
(check-coding-system coding-system)
(setq-default buffer-file-coding-system coding-system)
- (if default-enable-multibyte-characters
+ (if (fboundp 'ucs-set-table-for-input)
+ (dolist (buffer (buffer-list))
+ (or (local-variable-p 'buffer-file-coding-system buffer)
+ (ucs-set-table-for-input buffer))))
+
+ (if (and default-enable-multibyte-characters (not (eq system-type 'darwin))
+ (or (not coding-system)
+ (not (coding-system-get coding-system 'ascii-incompatible))))
+ ;; The file-name coding system on Darwin systems is always utf-8.
(setq default-file-name-coding-system coding-system))
;; If coding-system is nil, honor that on MS-DOS as well, so
;; that they could reset the terminal coding system.
(unless (and (eq window-system 'pc) coding-system)
(setq default-terminal-coding-system coding-system))
- (setq default-keyboard-coding-system coding-system)
+ (if (or (not coding-system)
+ (not (coding-system-get coding-system 'ascii-incompatible)))
+ (setq default-keyboard-coding-system coding-system))
;; Preserve eol-type from existing default-process-coding-systems.
;; On non-unix-like systems in particular, these may have been set
;; carefully by the user, or by the startup code, to deal with the
(setq default-process-coding-system
(cons output-coding input-coding))))
-(defalias 'update-iso-coding-systems 'update-coding-systems-internal)
-(make-obsolete 'update-iso-coding-systems 'update-coding-systems-internal "20.3")
-
(defun prefer-coding-system (coding-system)
"Add CODING-SYSTEM at the front of the priority list for automatic detection.
This also sets the following coding systems:
;; CODING-SYSTEM is no-conversion or undecided.
(error "Can't prefer the coding system `%s'" coding-system))
(set coding-category (or base coding-system))
+ ;; Changing the binding of a coding category requires this call.
(update-coding-systems-internal)
(or (eq coding-category (car coding-category-list))
;; We must change the order.
where
CHARSET is a character set,
COUNT is a number of characters,
- CHARs are found characters of the character set.
+ CHARs are the characters found from the character set.
Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
Optional 4th arg EXCLUDE is a list of character sets to be ignored.
(interactive
(list (let ((default (or buffer-file-coding-system 'us-ascii)))
(read-coding-system
- (format "Coding-system (default, %s): " default)
+ (format "Coding-system (default %s): " default)
default))))
(let ((pos (unencodable-char-position (point) (point-max) coding-system)))
(if pos
only if the user was explicitly asked and specified a coding system.")
(defvar select-safe-coding-system-accept-default-p nil
- "If non-nil, a function to control the behaviour of coding system selection.
+ "If non-nil, a function to control the behavior of coding system selection.
The meaning is the same as the argument ACCEPT-DEFAULT-P of the
function `select-safe-coding-system' (which see). This variable
overrides that argument.")
+(defun select-safe-coding-system-interactively (from to codings unsafe
+ &optional rejected default)
+ "Select interactively a coding system for the region FROM ... TO.
+FROM can be a string, as in `write-region'.
+CODINGS is the list of base coding systems known to be safe for this region,
+ typically obtained with `find-coding-systems-region'.
+UNSAFE is a list of coding systems known to be unsafe for this region.
+REJECTED is a list of coding systems which were safe but for some reason
+ were not recommended in the particular context.
+DEFAULT is the coding system to use by default in the query."
+ ;; At first, if some defaults are unsafe, record at most 11
+ ;; problematic characters and their positions for them by turning
+ ;; (CODING ...)
+ ;; into
+ ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
+ (if unsafe
+ (setq unsafe
+ (mapcar #'(lambda (coding)
+ (cons coding
+ (if (stringp from)
+ (mapcar #'(lambda (pos)
+ (cons pos (aref from pos)))
+ (unencodable-char-position
+ 0 (length from) coding
+ 11 from))
+ (mapcar #'(lambda (pos)
+ (cons pos (char-after pos)))
+ (unencodable-char-position
+ from to coding 11)))))
+ unsafe)))
+
+ ;; Change each safe coding system to the corresponding
+ ;; mime-charset name if it is also a coding system. Such a name
+ ;; is more friendly to users.
+ (let ((l codings)
+ mime-charset)
+ (while l
+ (setq mime-charset (coding-system-get (car l) 'mime-charset))
+ (if (and mime-charset (coding-system-p mime-charset))
+ (setcar l mime-charset))
+ (setq l (cdr l))))
+
+ ;; Don't offer variations with locking shift, which you
+ ;; basically never want.
+ (let (l)
+ (dolist (elt codings (setq codings (nreverse l)))
+ (unless (or (eq 'coding-category-iso-7-else
+ (coding-system-category elt))
+ (eq 'coding-category-iso-8-else
+ (coding-system-category elt)))
+ (push elt l))))
+
+ ;; Remove raw-text, emacs-mule and no-conversion unless nothing
+ ;; else is available.
+ (setq codings
+ (or (delq 'raw-text
+ (delq 'emacs-mule
+ (delq 'no-conversion codings)))
+ '(raw-text emacs-mule no-conversion)))
+
+ (let ((window-configuration (current-window-configuration))
+ (bufname (buffer-name))
+ coding-system)
+ (save-excursion
+ ;; If some defaults are unsafe, make sure the offending
+ ;; buffer is displayed.
+ (when (and unsafe (not (stringp from)))
+ (pop-to-buffer bufname)
+ (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
+ unsafe))))
+ ;; Then ask users to select one from CODINGS while showing
+ ;; the reason why none of the defaults are not used.
+ (with-output-to-temp-buffer "*Warning*"
+ (with-current-buffer standard-output
+ (if (and (null rejected) (null unsafe))
+ (insert "No default coding systems to try for "
+ (if (stringp from)
+ (format "string \"%s\"." from)
+ (format "buffer `%s'." bufname)))
+ (insert
+ "These default coding systems were tried to encode"
+ (if (stringp from)
+ (concat " \"" (if (> (length from) 10)
+ (concat (substring from 0 10) "...\"")
+ (concat from "\"")))
+ (format " text\nin the buffer `%s'" bufname))
+ ":\n")
+ (let ((pos (point))
+ (fill-prefix " "))
+ (dolist (x (append rejected unsafe))
+ (princ " ") (princ (car x)))
+ (insert "\n")
+ (fill-region-as-paragraph pos (point)))
+ (when rejected
+ (insert "These safely encodes the target text,
+but it is not recommended for encoding text in this context,
+e.g., for sending an email message.\n ")
+ (dolist (x rejected)
+ (princ " ") (princ x))
+ (insert "\n"))
+ (when unsafe
+ (insert (if rejected "And the others"
+ "However, each of them")
+ " encountered these problematic characters:\n")
+ (dolist (coding unsafe)
+ (insert (format " %s:" (car coding)))
+ (let ((i 0)
+ (func1
+ #'(lambda (bufname pos)
+ (when (buffer-live-p (get-buffer bufname))
+ (pop-to-buffer bufname)
+ (goto-char pos))))
+ (func2
+ #'(lambda (bufname pos coding)
+ (when (buffer-live-p (get-buffer bufname))
+ (pop-to-buffer bufname)
+ (if (< (point) pos)
+ (goto-char pos)
+ (forward-char 1)
+ (search-unencodable-char coding)
+ (forward-char -1))))))
+ (dolist (elt (cdr coding))
+ (insert " ")
+ (if (stringp from)
+ (insert (if (< i 10) (cdr elt) "..."))
+ (if (< i 10)
+ (insert-text-button
+ (cdr elt)
+ :type 'help-xref
+ 'help-echo
+ "mouse-2, RET: jump to this character"
+ 'help-function func1
+ 'help-args (list bufname (car elt)))
+ (insert-text-button
+ "..."
+ :type 'help-xref
+ 'help-echo
+ "mouse-2, RET: next unencodable character"
+ 'help-function func2
+ 'help-args (list bufname (car elt)
+ (car coding)))))
+ (setq i (1+ i))))
+ (insert "\n"))
+ (insert "\
+The first problematic character is at point in the displayed buffer,\n"
+ (substitute-command-keys "\
+and \\[universal-argument] \\[what-cursor-position] will give information about it.\n"))))
+ (insert "\nSelect \
+one of the following safe coding systems, or edit the buffer:\n")
+ (let ((pos (point))
+ (fill-prefix " "))
+ (dolist (x codings)
+ (princ " ") (princ x))
+ (insert "\n")
+ (fill-region-as-paragraph pos (point)))
+ (insert "Or specify any other coding system
+at the risk of losing the problematic characters.\n")))
+
+ ;; Read a coding system.
+ (setq coding-system
+ (read-coding-system
+ (format "Select coding system (default %s): " default)
+ default))
+ (setq last-coding-system-specified coding-system))
+
+ (kill-buffer "*Warning*")
+ (set-window-configuration window-configuration)
+ coding-system))
+
(defun select-safe-coding-system (from to &optional default-coding-system
accept-default-p file)
"Ask a user to select a safe coding system from candidates.
between FROM and TO are shown in a popup window. Among them, the most
proper one is suggested as the default.
-The list of `buffer-file-coding-system' of the current buffer and the
+The list of `buffer-file-coding-system' of the current buffer,
+the `default-buffer-file-coding-system', and the
most preferred coding system (if it corresponds to a MIME charset) is
treated as the default coding system list. Among them, the first one
that safely encodes the text is normally selected silently and
`prefer-coding-system'.
However, the user is queried if the chosen coding system is
-inconsistent with what would be selected by `set-auto-coding' from
+inconsistent with what would be selected by `find-auto-coding' from
coding cookies &c. if the contents of the region were read from a
file. (That could lead to data corruption in a file subsequently
re-visited and edited.)
list of coding systems to be prepended to the default coding system
list. However, if DEFAULT-CODING-SYSTEM is a list and the first
element is t, the cdr part is used as the defualt coding system list,
-i.e. `buffer-file-coding-system' and the most prepended coding system
-is not used.
+i.e. `buffer-file-coding-system', `default-buffer-file-coding-system',
+and the most preferred coding system are not used.
Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
determine the acceptability of the silently selected coding system.
(not (listp default-coding-system)))
(setq default-coding-system (list default-coding-system)))
- (let ((no-other-defaults nil))
+ (let ((no-other-defaults nil)
+ auto-cs)
+ (unless (or (stringp from) find-file-literally)
+ ;; Find an auto-coding that is specified for the the current
+ ;; buffer and file from the region FROM and TO.
+ (save-excursion
+ (save-restriction
+ (widen)
+ (goto-char from)
+ (setq auto-cs (find-auto-coding (or file buffer-file-name "")
+ (- to from)))
+ (if auto-cs
+ (if (coding-system-p (car auto-cs))
+ (setq auto-cs (car auto-cs))
+ (display-warning
+ :warning
+ (format "\
+Invalid coding system `%s' is specified
+for the current buffer/file by the %s.
+It is highly recommended to fix it before writing to a file."
+ (car auto-cs)
+ (if (eq (cdr auto-cs) :coding) ":coding tag"
+ (format "variable `%s'" (cdr auto-cs)))))
+ (or (yes-or-no-p "Really proceed with writing? ")
+ (error "Save aborted"))
+ (setq auto-cs nil))))))
+
(if (eq (car default-coding-system) t)
(setq no-other-defaults t
default-coding-system (cdr default-coding-system)))
(mapcar (function (lambda (x) (cons x (coding-system-base x))))
default-coding-system))
+ (if (and auto-cs (not no-other-defaults))
+ ;; If the file has a coding cookie, try to use it before anything
+ ;; else (i.e. before default-coding-system which will typically come
+ ;; from file-coding-system-alist).
+ (let ((base (coding-system-base auto-cs)))
+ (or (memq base '(nil undecided))
+ (rassq base default-coding-system)
+ (push (cons auto-cs base) default-coding-system))))
+
+ ;; From now on, the list of defaults is reversed.
+ (setq default-coding-system (nreverse default-coding-system))
+
(unless no-other-defaults
;; If buffer-file-coding-system is not nil nor undecided, append it
;; to the defaults.
(let ((base (coding-system-base buffer-file-coding-system)))
(or (eq base 'undecided)
(rassq base default-coding-system)
- (setq default-coding-system
- (append default-coding-system
- (list (cons buffer-file-coding-system base)))))))
+ (push (cons buffer-file-coding-system base)
+ default-coding-system))))
+
+ ;; If default-buffer-file-coding-system is not nil nor undecided,
+ ;; append it to the defaults.
+ (if default-buffer-file-coding-system
+ (let ((base (coding-system-base default-buffer-file-coding-system)))
+ (or (eq base 'undecided)
+ (rassq base default-coding-system)
+ (push (cons default-buffer-file-coding-system base)
+ default-coding-system))))
;; If the most preferred coding system has the property mime-charset,
;; append it to the defaults.
(let ((tail coding-category-list)
preferred base)
- (while (and tail
- (not (setq preferred (symbol-value (car tail)))))
+ (while (and tail (not (setq preferred (symbol-value (car tail)))))
(setq tail (cdr tail)))
(and (coding-system-p preferred)
(setq base (coding-system-base preferred))
(coding-system-get preferred 'mime-charset)
(not (rassq base default-coding-system))
- (setq default-coding-system
- (append default-coding-system
- (list (cons preferred base))))))))
-
- (if select-safe-coding-system-accept-default-p
- (setq accept-default-p select-safe-coding-system-accept-default-p))
-
- (let ((codings (find-coding-systems-region from to))
- (coding-system nil)
- (bufname (buffer-name))
- safe rejected unsafe)
- (if (eq (car codings) 'undecided)
- ;; Any coding system is ok.
- (setq coding-system t)
- ;; Classify the defaults into safe, rejected, and unsafe.
- (dolist (elt default-coding-system)
- (if (memq (cdr elt) codings)
- (if (and (functionp accept-default-p)
- (not (funcall accept-default-p (cdr elt))))
- (push (car elt) rejected)
- (push (car elt) safe))
- (push (car elt) unsafe)))
- (if safe
- (setq coding-system (car (last safe)))))
-
- ;; If all the defaults failed, ask a user.
- (when (not coding-system)
- ;; At first, if some defaults are unsafe, record at most 11
- ;; problematic characters and their positions for them by turning
- ;; (CODING ...)
- ;; into
- ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
- (if unsafe
- (if (stringp from)
- (setq unsafe
- (mapcar #'(lambda (coding)
- (cons coding
- (mapcar #'(lambda (pos)
- (cons pos (aref from pos)))
- (unencodable-char-position
- 0 (length from) coding
- 11 from))))
- unsafe))
- (setq unsafe
- (mapcar #'(lambda (coding)
- (cons coding
- (mapcar #'(lambda (pos)
- (cons pos (char-after pos)))
- (unencodable-char-position
- from to coding 11))))
- unsafe))))
-
- ;; Change each safe coding system to the corresponding
- ;; mime-charset name if it is also a coding system. Such a name
- ;; is more friendly to users.
- (let ((l codings)
- mime-charset)
- (while l
- (setq mime-charset (coding-system-get (car l) 'mime-charset))
- (if (and mime-charset (coding-system-p mime-charset))
- (setcar l mime-charset))
- (setq l (cdr l))))
-
- ;; Don't offer variations with locking shift, which you
- ;; basically never want.
- (let (l)
- (dolist (elt codings (setq codings (nreverse l)))
- (unless (or (eq 'coding-category-iso-7-else
- (coding-system-category elt))
- (eq 'coding-category-iso-8-else
- (coding-system-category elt)))
- (push elt l))))
-
- ;; Remove raw-text, emacs-mule and no-conversion unless nothing
- ;; else is available.
- (setq codings
- (or (delq 'raw-text
- (delq 'emacs-mule
- (delq 'no-conversion codings)))
- '(raw-text emacs-mule no-conversion)))
-
- (let ((window-configuration (current-window-configuration)))
- (save-excursion
- ;; If some defaults are unsafe, make sure the offending
- ;; buffer is displayed.
- (when (and unsafe (not (stringp from)))
- (pop-to-buffer bufname)
- (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
- unsafe))))
- ;; Then ask users to select one from CODINGS while showing
- ;; the reason why none of the defaults are not used.
- (with-output-to-temp-buffer "*Warning*"
- (save-excursion
- (set-buffer standard-output)
- (if (not default-coding-system)
- (insert "No default coding systems to try for "
- (if (stringp from)
- (format "string \"%s\"." from)
- (format "buffer `%s'." bufname)))
- (insert
- "These default coding systems were tried to encode"
- (if (stringp from)
- (concat " \"" (if (> (length from) 10)
- (concat (substring from 0 10) "...\"")
- (concat from "\"")))
- (format " text\nin the buffer `%s'" bufname))
- ":\n")
- (let ((pos (point))
- (fill-prefix " "))
- (mapc #'(lambda (x) (princ " ") (princ (car x)))
- default-coding-system)
- (insert "\n")
- (fill-region-as-paragraph pos (point)))
- (when rejected
- (insert "These safely encodes the target text,
-but it is not recommended for encoding text in this context,
-e.g., for sending an email message.\n ")
- (mapc #'(lambda (x) (princ " ") (princ x)) rejected)
- (insert "\n"))
- (when unsafe
- (insert (if rejected "And the others"
- "However, each of them")
- " encountered these problematic characters:\n")
- (mapc
- #'(lambda (coding)
- (insert (format " %s:" (car coding)))
- (let ((i 0)
- (func1
- #'(lambda (bufname pos)
- (when (buffer-live-p (get-buffer bufname))
- (pop-to-buffer bufname)
- (goto-char pos))))
- (func2
- #'(lambda (bufname pos coding)
- (when (buffer-live-p (get-buffer bufname))
- (pop-to-buffer bufname)
- (if (< (point) pos)
- (goto-char pos)
- (forward-char 1)
- (search-unencodable-char coding)
- (forward-char -1))))))
- (dolist (elt (cdr coding))
- (insert " ")
- (if (stringp from)
- (insert (if (< i 10) (cdr elt) "..."))
- (if (< i 10)
- (insert-text-button
- (cdr elt)
- :type 'help-xref
- 'help-echo
- "mouse-2, RET: jump to this character"
- 'help-function func1
- 'help-args (list bufname (car elt)))
- (insert-text-button
- "..."
- :type 'help-xref
- 'help-echo
- "mouse-2, RET: next unencodable character"
- 'help-function func2
- 'help-args (list bufname (car elt)
- (car coding)))))
- (setq i (1+ i))))
- (insert "\n"))
- unsafe)
- (insert "\
-The first problematic character is at point in the displayed buffer,\n"
- (substitute-command-keys "\
-and \\[universal-argument] \\[what-cursor-position] will give information about it.\n"))))
- (insert (if safe
- "\nSelect the above, or "
- "\nSelect ")
- "\
-one of the following safe coding systems, or edit the buffer:\n")
- (let ((pos (point))
- (fill-prefix " "))
- (mapcar (function (lambda (x) (princ " ") (princ x)))
- codings)
- (insert "\n")
- (fill-region-as-paragraph pos (point)))
- (insert "Or specify any other coding system
-on your risk of losing the problematic characters.\n")))
-
- ;; Read a coding system.
- (setq default-coding-system (or (car safe) (car codings)))
- (setq coding-system
- (read-coding-system
- (format "Select coding system (default %s): "
- default-coding-system)
- default-coding-system))
- (setq last-coding-system-specified coding-system))
-
- (kill-buffer "*Warning*")
- (set-window-configuration window-configuration)))
-
- (if (vectorp (coding-system-eol-type coding-system))
- (let ((eol (coding-system-eol-type buffer-file-coding-system)))
- (if (numberp eol)
- (setq coding-system
- (coding-system-change-eol-conversion coding-system eol)))))
-
- (if (eq coding-system t)
- (setq coding-system buffer-file-coding-system))
- ;; Check we're not inconsistent with what `coding:' spec &c would
- ;; give when file is re-read.
- ;; But don't do this if we explicitly ignored the cookie
- ;; by using `find-file-literally'.
- (unless (or (stringp from) find-file-literally)
- (let ((auto-cs (save-excursion
- (save-restriction
- (widen)
- (narrow-to-region from to)
- (goto-char (point-min))
- (set-auto-coding (or file buffer-file-name "")
- (buffer-size))))))
- (if (and auto-cs coding-system
+ (push (cons preferred base)
+ default-coding-system))))
+
+ (if select-safe-coding-system-accept-default-p
+ (setq accept-default-p select-safe-coding-system-accept-default-p))
+
+ (let ((codings (find-coding-systems-region from to))
+ (coding-system nil)
+ safe rejected unsafe)
+ (if (eq (car codings) 'undecided)
+ ;; Any coding system is ok.
+ (setq coding-system t)
+ ;; Classify the defaults into safe, rejected, and unsafe.
+ (dolist (elt default-coding-system)
+ (if (memq (cdr elt) codings)
+ (if (and (functionp accept-default-p)
+ (not (funcall accept-default-p (cdr elt))))
+ (push (car elt) rejected)
+ (push (car elt) safe))
+ (push (car elt) unsafe)))
+ (if safe
+ (setq coding-system (car safe))))
+
+ ;; If all the defaults failed, ask a user.
+ (when (not coding-system)
+ (setq coding-system (select-safe-coding-system-interactively
+ from to codings unsafe rejected (car codings))))
+
+ (if (vectorp (coding-system-eol-type coding-system))
+ (let ((eol (coding-system-eol-type buffer-file-coding-system)))
+ (if (numberp eol)
+ (setq coding-system
+ (coding-system-change-eol-conversion coding-system eol)))))
+
+ (if (eq coding-system t)
+ (setq coding-system buffer-file-coding-system))
+ ;; Check we're not inconsistent with what `coding:' spec &c would
+ ;; give when file is re-read.
+ ;; But don't do this if we explicitly ignored the cookie
+ ;; by using `find-file-literally'.
+ (when (and auto-cs
+ (not (and
+ coding-system
+ (memq (coding-system-type coding-system) '(0 5)))))
+ ;; Merge coding-system and auto-cs as far as possible.
+ (if (not coding-system)
+ (setq coding-system auto-cs)
+ (if (not auto-cs)
+ (setq auto-cs coding-system)
+ (let ((eol-type-1 (coding-system-eol-type coding-system))
+ (eol-type-2 (coding-system-eol-type auto-cs)))
+ (if (eq (coding-system-base coding-system) 'undecided)
+ (setq coding-system (coding-system-change-text-conversion
+ coding-system auto-cs))
+ (if (eq (coding-system-base auto-cs) 'undecided)
+ (setq auto-cs (coding-system-change-text-conversion
+ auto-cs coding-system))))
+ (if (vectorp eol-type-1)
+ (or (vectorp eol-type-2)
+ (setq coding-system (coding-system-change-eol-conversion
+ coding-system eol-type-2)))
+ (if (vectorp eol-type-2)
+ (setq auto-cs (coding-system-change-eol-conversion
+ auto-cs eol-type-1)))))))
+
+ (if (and auto-cs
;; Don't barf if writing a compressed file, say.
;; This check perhaps isn't ideal, but is probably
;; the best thing to do.
(not (auto-coding-alist-lookup (or file buffer-file-name "")))
- (not (coding-system-equal (coding-system-base coding-system)
- (coding-system-base auto-cs))))
+ (not (coding-system-equal coding-system auto-cs)))
(unless (yes-or-no-p
(format "Selected encoding %s disagrees with \
%s specified by file contents. Really save (else edit coding cookies \
and try again)? " coding-system auto-cs))
- (error "Save aborted")))))
- coding-system))
+ (error "Save aborted"))))
+ coding-system)))
(setq select-safe-coding-system-function 'select-safe-coding-system)
;; We should never use no-conversion for outgoing mail.
(setq coding nil))
(if (fboundp select-safe-coding-system-function)
- (funcall select-safe-coding-system-function
- (point-min) (point-max) coding
- (function (lambda (x) (coding-system-get x 'mime-charset))))
- coding)))
+ (setq coding
+ (funcall select-safe-coding-system-function
+ (point-min) (point-max) coding
+ (function (lambda (x)
+ (coding-system-get x 'mime-charset))))))
+ (if coding
+ ;; Be sure to use LF for end-of-line.
+ (setq coding (coding-system-change-eol-conversion coding 'unix))
+ ;; No coding system is decided. Usually this is the case that
+ ;; the current buffer contains only ASCII. So, we hope
+ ;; iso-8859-1 works.
+ (setq coding 'iso-8859-1-unix))
+ coding))
\f
;;; Language support stuff.
environment.
features value is a list of features requested in this
language environment.
+ ctext-non-standard-encodings
+ value is a list of non-standard encoding
+ names used in extended segments of CTEXT.
+ See the variable
+ `ctext-non-standard-encodings' for more
+ detail.
The following keys take effect only when multibyte characters are
globally disabled, i.e. the value of `default-enable-multibyte-characters'
see `language-info-alist'."
(if (symbolp lang-env)
(setq lang-env (symbol-name lang-env)))
- (let ((lang-slot (assoc-ignore-case lang-env language-info-alist)))
+ (let ((lang-slot (assoc-string lang-env language-info-alist t)))
(if lang-slot
(cdr (assq key (cdr lang-slot))))))
see `language-info-alist'."
(if (symbolp lang-env)
(setq lang-env (symbol-name lang-env)))
+ (set-language-info-internal lang-env key info)
+ (if (equal lang-env current-language-environment)
+ (set-language-environment lang-env)))
+
+(defun set-language-info-internal (lang-env key info)
+ "Internal use only.
+Arguments are the same as `set-language-info'."
(let (lang-slot key-slot)
(setq lang-slot (assoc lang-env language-info-alist))
(if (null lang-slot) ; If no slot for the language, add it.
(define-key-after setup-map (vector (intern lang-env))
(cons lang-env 'setup-specified-language-environment) t)
- (while alist
- (set-language-info lang-env (car (car alist)) (cdr (car alist)))
- (setq alist (cdr alist)))))
+ (dolist (elt alist)
+ (set-language-info-internal lang-env (car elt) (cdr elt)))
+
+ (if (equal lang-env current-language-environment)
+ (set-language-environment lang-env))))
(defun read-language-name (key prompt &optional default)
"Read a language environment name which has information for KEY.
"*Default input method for multilingual text (a string).
This is the input method activated automatically by the command
`toggle-input-method' (\\[toggle-input-method])."
+ :link '(custom-manual "(emacs)Input Methods")
:group 'mule
- :type '(choice (const nil) string)
+ :type '(choice (const nil) (string
+ :completion-ignore-case t
+ :complete-function widget-string-complete
+ :completion-alist input-method-alist
+ :prompt-history input-method-history))
:set-after '(current-language-environment))
(put 'input-method-function 'permanent-local t)
current-input-method-title nil)
(force-mode-line-update)))))
-(defun set-input-method (input-method)
+(defun set-input-method (input-method &optional interactive)
"Select and activate input method INPUT-METHOD for the current buffer.
This also sets the default input method to the one you specify.
If INPUT-METHOD is nil, this function turns off the input method, and
also causes you to be prompted for a name of an input method the next
time you invoke \\[toggle-input-method].
+When called interactively, the optional arg INTERACTIVE is non-nil,
+which marks the variable `default-input-method' as set for Custom buffers.
To deactivate the input method interactively, use \\[toggle-input-method].
To deactivate it programmatically, use \\[inactivate-input-method]."
(let* ((default (or (car input-method-history) default-input-method)))
(list (read-input-method-name
(if default "Select input method (default %s): " "Select input method: ")
- default t))))
+ default t)
+ t)))
(activate-input-method input-method)
(setq default-input-method input-method)
- (when (interactive-p)
+ (when interactive
(customize-mark-as-set 'default-input-method))
default-input-method)
-(defun toggle-input-method (&optional arg)
+(defun toggle-input-method (&optional arg interactive)
"Enable or disable multilingual text input method for the current buffer.
Only one input method can be enabled at any time in a given buffer.
With a prefix argument, read an input method name with the minibuffer
and enable that one. The default is the most recent input method specified
-\(not including the currently active input method, if any)."
+\(not including the currently active input method, if any).
- (interactive "P")
+When called interactively, the optional arg INTERACTIVE is non-nil,
+which marks the variable `default-input-method' as set for Custom buffers."
+
+ (interactive "P\np")
(if (and current-input-method (not arg))
(inactivate-input-method)
(let ((default (or (car input-method-history) default-input-method)))
(unless default-input-method
(prog1
(setq default-input-method current-input-method)
- (when (interactive-p)
+ (when interactive
(customize-mark-as-set 'default-input-method)))))))
(defun describe-input-method (input-method)
"Describe input method INPUT-METHOD."
(interactive
(list (read-input-method-name
- "Describe input method (default, current choice): ")))
+ "Describe input method (default current choice): ")))
(if (and input-method (symbolp input-method))
(setq input-method (symbol-name input-method)))
(help-setup-xref (list #'describe-input-method
But, if this flag is non-nil, it displays them in echo area instead.")
(defvar input-method-exit-on-invalid-key nil
- "This flag controls the behaviour of an input method on invalid key input.
+ "This flag controls the behavior of an input method on invalid key input.
Usually, when a user types a key which doesn't start any character
handled by the input method, the key is handled by turning off the
input method temporarily. After that key, the input method is re-enabled.
:link '(custom-manual "(emacs)Language Environments")
:set (lambda (symbol value) (set-language-environment value))
:get (lambda (x)
- (or (car-safe (assoc-ignore-case
+ (or (car-safe (assoc-string
(if (symbolp current-language-environment)
(symbol-name current-language-environment)
current-language-environment)
- language-info-alist))
+ language-info-alist t))
"English"))
;; custom type will be updated with `set-language-info'.
:type (if language-info-alist
coding-category-iso-8-1 iso-latin-1
coding-category-iso-8-2 iso-latin-1
coding-category-utf-8 mule-utf-8
- coding-category-utf-16-be mule-utf-16-be-with-signature
- coding-category-utf-16-le mule-utf-16-le-with-signature
+ coding-category-utf-16-be mule-utf-16be-with-signature
+ coding-category-utf-16-le mule-utf-16le-with-signature
coding-category-iso-7-tight iso-2022-jp
coding-category-iso-7 iso-2022-7bit
coding-category-iso-7-else iso-2022-7bit-lock
coding-category-raw-text 'raw-text
coding-category-sjis 'japanese-shift-jis
coding-category-big5 'chinese-big5
- coding-category-utf-16-be 'mule-utf-16-be-with-signature
- coding-category-utf-16-le 'mule-utf-16-le-with-signature
+ coding-category-utf-16-be 'mule-utf-16be-with-signature
+ coding-category-utf-16-le 'mule-utf-16le-with-signature
coding-category-utf-8 'mule-utf-8
coding-category-ccl nil
coding-category-binary 'no-conversion)
coding-category-ccl
coding-category-binary))
+ ;; Changing the binding of a coding category requires this call.
(update-coding-systems-internal)
(set-default-coding-systems nil)
(setq default-sendmail-coding-system 'iso-latin-1)
+ ;; On Darwin systems, this should be utf-8, but when this file is loaded
+ ;; utf-8 is not yet defined, so we set it in set-locale-environment instead.
(setq default-file-name-coding-system 'iso-latin-1)
;; Preserve eol-type from existing default-process-coding-systems.
;; On non-unix-like systems in particular, these may have been set
;; (set-keyboard-coding-system-internal nil)
(setq nonascii-translation-table nil
- nonascii-insert-offset 0))
+ nonascii-insert-offset 0)
+
+ ;; Don't invoke fontset-related functions if fontsets aren't
+ ;; supported in this build of Emacs.
+ (and (fboundp 'fontset-list)
+ (set-overriding-fontspec-internal nil)))
(reset-language-environment)
-(defun set-display-table-and-terminal-coding-system (language-name)
+(defun set-display-table-and-terminal-coding-system (language-name &optional coding-system)
"Set up the display table and terminal coding system for LANGUAGE-NAME."
(let ((coding (get-language-info language-name 'unibyte-display)))
- (if coding
+ (if (and coding
+ (or (not coding-system)
+ (coding-system-equal coding coding-system)))
(standard-display-european-internal)
- (standard-display-default (if (eq window-system 'pc) 128 160) 255)
- (aset standard-display-table 146 nil))
+ ;; The following 2 lines undo the 8-bit display that we set up
+ ;; in standard-display-european-internal, which see. This is in
+ ;; case the user has used standard-display-european earlier in
+ ;; this session. (The MS-DOS port doesn't use that setup, so it
+ ;; doesn't need to undo it.)
+ (when standard-display-table
+ (dotimes (i 128)
+ (aset standard-display-table (+ i 128) nil))))
(or (eq window-system 'pc)
- (set-terminal-coding-system coding))))
+ (set-terminal-coding-system (or coding-system coding)))))
(defun set-language-environment (language-name)
"Set up multi-lingual environment for using LANGUAGE-NAME.
specifies the character set for the major languages of Western Europe."
(interactive (list (read-language-name
nil
- "Set language environment (default, English): ")))
+ "Set language environment (default English): ")))
(if language-name
(if (symbolp language-name)
(setq language-name (symbol-name language-name)))
(setq language-name "English"))
- (or (assoc-ignore-case language-name language-info-alist)
+ (let ((slot (assoc-string language-name language-info-alist t)))
+ (unless slot
(error "Language environment not defined: %S" language-name))
+ (setq language-name (car slot)))
(if current-language-environment
(let ((func (get-language-info current-language-environment
'exit-function)))
(load syntax nil t))
;; No information for syntax and case. Reset to the defaults.
(let ((syntax-table (standard-syntax-table))
- (case-table (standard-case-table))
+ (standard-table (standard-case-table))
+ (case-table (make-char-table 'case-table))
(ch (if (eq window-system 'pc) 128 160)))
(while (< ch 256)
(modify-syntax-entry ch " " syntax-table)
- (aset case-table ch ch)
(setq ch (1+ ch)))
+ (dotimes (i 128)
+ (aset case-table i (aref standard-table i)))
(set-char-table-extra-slot case-table 0 nil)
(set-char-table-extra-slot case-table 1 nil)
- (set-char-table-extra-slot case-table 2 nil))
- (set-standard-case-table (standard-case-table))
+ (set-char-table-extra-slot case-table 2 nil)
+ (set-standard-case-table case-table))
(let ((list (buffer-list)))
(while list
(with-current-buffer (car list)
(while required-features
(require (car required-features))
(setq required-features (cdr required-features))))
+
+ ;; Don't invoke fontset-related functions if fontsets aren't
+ ;; supported in this build of Emacs.
+ (when (fboundp 'fontset-list)
+ (let ((overriding-fontspec (get-language-info language-name
+ 'overriding-fontspec)))
+ (if overriding-fontspec
+ (set-overriding-fontspec-internal overriding-fontspec))))
+
(let ((func (get-language-info language-name 'setup-function)))
(if (functionp func)
(funcall func)))
+ (if (and utf-translate-cjk-mode
+ (not (eq utf-translate-cjk-lang-env language-name))
+ (catch 'tag
+ (dolist (charset (get-language-info language-name 'charset))
+ (if (memq charset utf-translate-cjk-charsets)
+ (throw 'tag t)))
+ nil))
+ (utf-translate-cjk-load-tables))
(run-hooks 'set-language-environment-hook)
(force-mode-line-update t))
;; different there.
(or (and (eq window-system 'pc) (not default-enable-multibyte-characters))
(progn
- ;; Make non-line-break space display as a plain space.
- ;; Most X fonts do the wrong thing for code 160.
- (aset standard-display-table 160 [32])
- ;; With luck, non-Latin-1 fonts are more recent and so don't
- ;; have this bug.
- (aset standard-display-table 2208 [32]) ; Latin-1 NBSP
+ ;; Most X fonts used to do the wrong thing for latin-1 code 160.
+ (unless (and (eq window-system 'x)
+ ;; XFree86 4 has fixed the fonts.
+ (string= "The XFree86 Project, Inc" (x-server-vendor))
+ (> (aref (number-to-string (nth 2 (x-server-version))) 0)
+ ?3))
+ ;; Make non-line-break space display as a plain space.
+ (aset standard-display-table 160 [32]))
;; Most Windows programs send out apostrophes as \222. Most X fonts
;; don't contain a character at that position. Map it to the ASCII
;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
(while priority
(set (car categories) (car priority))
(setq priority (cdr priority) categories (cdr categories)))
+ ;; Changing the binding of a coding category requires this call.
(update-coding-systems-internal)))))
(defsubst princ-list (&rest args)
(put 'describe-specified-language-support 'apropos-inhibit t)
-;; Print a language specific information such as input methods,
+;; Print language-specific information such as input methods,
;; charsets, and coding systems. This function is intended to be
;; called from the menu:
;; [menu-bar mule describe-language-environment LANGUAGE]
(interactive
(list (read-language-name
'documentation
- "Describe language environment (default, current choice): ")))
+ "Describe language environment (default current choice): ")))
(if (null language-name)
(setq language-name current-language-environment))
(if (or (null language-name)
(setq language-name (symbol-name language-name)))
(dolist (feature (get-language-info language-name 'features))
(require feature))
- (let ((doc (get-language-info language-name 'documentation))
- pos)
+ (let ((doc (get-language-info language-name 'documentation)))
(help-setup-xref (list #'describe-language-environment language-name)
(interactive-p))
(with-output-to-temp-buffer (help-buffer)
(l (copy-sequence input-method-alist)))
(insert "Input methods")
(when input-method
- (insert " (default, " input-method ")")
+ (insert " (default " input-method ")")
(setq input-method (assoc input-method input-method-alist))
(setq l (cons input-method (delete input-method l))))
(insert ":\n")
;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
;; CODESET and MODIFIER are implementation-dependent.
- ; aa Afar
- ; ab Abkhazian
+ ;; jasonr comments: MS Windows uses three letter codes for
+ ;; languages instead of the two letter ISO codes that POSIX
+ ;; uses. In most cases the first two letters are the same, so
+ ;; most of the regexps in locale-language-names work. Japanese
+ ;; and Chinese are exceptions, which are listed in the
+ ;; non-standard section at the bottom of locale-language-names.
+
+ ("aa_DJ" . "Latin-1") ; Afar
+ ("aa" . "UTF-8")
+ ;; ab Abkhazian
("af" . "Latin-1") ; Afrikaans
- ("am" . "Ethiopic") ; Amharic
+ ("am" "Ethiopic" utf-8) ; Amharic
+ ("an" . "Latin-9") ; Aragonese
; ar Arabic glibc uses 8859-6
; as Assamese
; ay Aymara
- ; az Azerbaijani
+ ("az" . "UTF-8") ; Azerbaijani
; ba Bashkir
- ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
- ("bg" . "Bulgarian") ; Bulgarian
+ ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
+ ("bg" "Bulgarian" cp1251) ; Bulgarian
; bh Bihari
; bi Bislama
- ; bn Bengali, Bangla
+ ("bn" . "UTF-8") ; Bengali, Bangla
("bo" . "Tibetan")
("br" . "Latin-1") ; Breton
("bs" . "Latin-2") ; Bosnian
+ ("byn" . "UTF-8") ; Bilin; Blin
("ca" . "Latin-1") ; Catalan
; co Corsican
- ("cs" . "Czech")
- ("cy" . "Welsh") ; Welsh [glibc uses Latin-8. Did this change?]
+ ("cs" "Czech" iso-8859-2)
+ ("cy" "Welsh" iso-8859-14)
("da" . "Latin-1") ; Danish
- ("de" . "German")
+ ("de" "German" iso-8859-1)
; dz Bhutani
- ("el" . "Greek")
+ ("el" "Greek" iso-8859-7)
;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
;; That's actually what the GNU locales define, modulo things like
;; en_IN -- fx.
- ("en" . "Latin-1") ; English
+ ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
+ ("en" "English" iso-8859-1) ; English
("eo" . "Latin-3") ; Esperanto
- ("es" . "Spanish")
- ("et" . "Latin-4") ; Estonian
+ ("es" "Spanish" iso-8859-1)
+ ("et" . "Latin-1") ; Estonian
("eu" . "Latin-1") ; Basque
- ; fa Persian glibc uses utf-8
+ ("fa" . "UTF-8") ; Persian
("fi" . "Latin-1") ; Finnish
- ; fj Fiji
+ ("fj" . "Latin-1") ; Fiji
("fo" . "Latin-1") ; Faroese
- ("fr" . "French") ; French
+ ("fr" "French" iso-8859-1) ; French
("fy" . "Latin-1") ; Frisian
("ga" . "Latin-1") ; Irish Gaelic (new orthography)
- ("gd" . "Latin-1") ; Scots Gaelic
- ("gl" . "Latin-1") ; Galician
+ ("gd" . "Latin-9") ; Scots Gaelic
+ ("gez" "Ethiopic" utf-8) ; Geez
+ ("gl" . "Latin-1") ; Gallegan; Galician
; gn Guarani
- ; gu Gujarati
- ("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1
+ ("gu" . "UTF-8") ; Gujarati
+ ("gv" . "Latin-1") ; Manx Gaelic
; ha Hausa
- ("he" . "Hebrew")
- ("hi" . "Devanagari") ; Hindi glibc uses utf-8
- ("hr" . "Croatian") ; Croatian
+ ("he" "Hebrew" iso-8859-8)
+ ("hi" "Devanagari" utf-8) ; Hindi
+ ("hr" "Croatian" iso-8859-2) ; Croatian
("hu" . "Latin-2") ; Hungarian
; hy Armenian
; ia Interlingua
; ie Interlingue
; ik Inupiak
("is" . "Latin-1") ; Icelandic
- ("it" . "Italian") ; Italian
+ ("it" "Italian" iso-8859-1) ; Italian
; iu Inuktitut
- ("ja" . "Japanese")
+ ("iw" "Hebrew" iso-8859-8)
+ ("ja" "Japanese" euc-jp)
; jw Javanese
- ("ka" . "Georgian") ; Georgian
+ ("ka" "Georgian" georgian-ps) ; Georgian
; kk Kazakh
("kl" . "Latin-1") ; Greenlandic
; km Cambodian
- ; kn Kannada
- ("ko" . "Korean")
+ ("kn" "Kannada" utf-8)
+ ("ko" "Korean" euc-kr)
; ks Kashmiri
; ku Kurdish
("kw" . "Latin-1") ; Cornish
; ky Kirghiz
("la" . "Latin-1") ; Latin
("lb" . "Latin-1") ; Luxemburgish
+ ("lg" . "Laint-6") ; Ganda
; ln Lingala
- ("lo" . "Lao") ; Laothian
- ("lt" . "Lithuanian")
+ ("lo" "Lao" utf-8) ; Laothian
+ ("lt" "Lithuanian" iso-8859-13)
("lv" . "Latvian") ; Latvian, Lettish
; mg Malagasy
("mi" . "Latin-7") ; Maori
- ("mk" . "Cyrillic-ISO") ; Macedonian
- ; ml Malayalam
- ; mn Mongolian
+ ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
+ ("ml" "Malayalam" utf-8)
+ ("mn" . "UTF-8") ; Mongolian
; mo Moldavian
- ("mr" . "Devanagari") ; Marathi glibc uses utf-8
+ ("mr" "Devanagari" utf-8) ; Marathi
("ms" . "Latin-1") ; Malay
("mt" . "Latin-3") ; Maltese
; my Burmese
; na Nauru
- ("ne" . "Devanagari") ; Nepali
- ("nl" . "Dutch")
+ ("nb" . "Latin-1") ; Norwegian
+ ("ne" "Devanagari" utf-8) ; Nepali
+ ("nl" "Dutch" iso-8859-1)
("no" . "Latin-1") ; Norwegian
("oc" . "Latin-1") ; Occitan
- ; om (Afan) Oromo
+ ("om_ET" . "UTF-8") ; (Afan) Oromo
+ ("om" . "Latin-1") ; (Afan) Oromo
; or Oriya
- ; pa Punjabi
+ ("pa" . "UTF-8") ; Punjabi
("pl" . "Latin-2") ; Polish
; ps Pashto, Pushto
("pt" . "Latin-1") ; Portuguese
; qu Quechua
("rm" . "Latin-1") ; Rhaeto-Romanic
; rn Kirundi
- ("ro" . "Romanian")
- ("ru.*[_.]koi8" . "Russian")
- ("ru" . "Cyrillic-ISO") ; Russian
+ ("ro" "Romanian" iso-8859-2)
+ ("ru_RU" "Russian" iso-8859-5)
+ ("ru_UA" "Russian" koi8-u)
; rw Kinyarwanda
("sa" . "Devanagari") ; Sanskrit
; sd Sindhi
- ; se Northern Sami
+ ("se" . "UTF-8") ; Northern Sami
; sg Sangho
("sh" . "Latin-2") ; Serbo-Croatian
; si Sinhalese
- ("sk" . "Slovak")
- ("sl" . "Slovenian")
+ ("sid" . "UTF-8") ; Sidamo
+ ("sk" "Slovak" iso-8859-2)
+ ("sl" "Slovenian" iso-8859-2)
; sm Samoan
; sn Shona
- ; so Somali
+ ("so_ET" "UTF-8") ; Somali
+ ("so" "Latin-1") ; Somali
("sq" . "Latin-1") ; Albanian
+ ("sr_YU@cyrillic" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet)
("sr" . "Latin-2") ; Serbian (Latin alphabet)
- ("sr_YU@cyrillic" . "Cyrillic-ISO") ; per glibc
; ss Siswati
- ; st Sesotho
+ ("st" . "Latin-1") ; Sesotho
; su Sundanese
- ("sv" . "Swedish") ; Swedish
+ ("sv" "Swedish" iso-8859-1) ; Swedish
("sw" . "Latin-1") ; Swahili
- ; ta Tamil glibc uses utf-8
- ; te Telugu glibc uses utf-8
- ("tg" . "Tajik")
- ("th" . "Thai")
- ; ti Tigrinya
+ ("ta" "Tamil" utf-8)
+ ("te" . "UTF-8") ; Telugu
+ ("tg" "Tajik" koi8-t)
+ ("th" "Thai" tis-620)
+ ("ti" "Ethiopic" utf-8) ; Tigrinya
+ ("tig_ER" . "UTF-8") ; Tigre
; tk Turkmen
("tl" . "Latin-1") ; Tagalog
; tn Setswana
; to Tonga
- ("tr" . "Turkish")
+ ("tr" "Turkish" iso-8859-9)
; ts Tsonga
- ; tt Tatar
+ ("tt" . "UTF-8") ; Tatar
; tw Twi
; ug Uighur
- ("uk" . "Ukrainian") ; Ukrainian
- ; ur Urdu glibc uses utf-8
+ ("uk" "Ukrainian" koi8-u)
+ ("ur" . "UTF-8") ; Urdu
+ ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
("uz" . "Latin-1") ; Uzbek
- ("vi" . "Vietnamese") ; glibc uses utf-8
+ ("vi" "Vietnamese" utf-8)
; vo Volapuk
("wa" . "Latin-1") ; Walloon
; wo Wolof
- ; xh Xhosa
+ ("xh" . "Latin-1") ; Xhosa
("yi" . "Windows-1255") ; Yiddish
; yo Yoruba
; za Zhuang
-
- ; glibc:
+ ("zh_HK" . "Chinese-Big5")
+ ("zh_TW" . "Chinese-Big5")
+ ("zh_CN" . "Chinese-GB")
+ ("zh" . "Chinese-GB")
; zh_CN.GB18030/GB18030 \
; zh_CN.GBK/GBK \
; zh_HK/BIG5-HKSCS \
-
- ("zh.*[._]big5" . "Chinese-BIG5")
- ("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
- ("zh_tw" . "Chinese-CNS") ; glibc uses big5
- ("zh_tw[._]euc-tw" . "Chinese-EUC-TW")
- ("zh" . "Chinese-GB")
- ; zu Zulu
+ ("zu" . "Latin-1") ; Zulu
;; ISO standard locales
("c$" . "ASCII")
("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
))
- "List of pairs of locale regexps and language names.
-The first element whose locale regexp matches the start of a downcased locale
-specifies the language name corresponding to that locale.
-If the language name is nil, there is no corresponding language environment.")
+ "Alist of locale regexps vs the corresponding languages and coding systems.
+Each element has these form:
+ \(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
+The first element whose LOCALE-REGEXP matches the start of a
+downcased locale specifies the LANG-ENV \(language environtment)
+and CODING-SYSTEM corresponding to that locale. If there is no
+appropriate language environment, the element may have this form:
+ \(LOCALE-REGEXP . LANG-ENV)
+In this case, LANG-ENV is one of generic language environments for an
+specific encoding such as \"Latin-1\" and \"UTF-8\".")
(defconst locale-charset-language-names
(purecopy
(".*8859[-_]?9\\>" . "Latin-5")
(".*8859[-_]?14\\>" . "Latin-8")
(".*8859[-_]?15\\>" . "Latin-9")
- (".*utf\\(-?8\\)\\>" . "UTF-8")
+ (".*utf\\(?:-?8\\)?\\>" . "UTF-8")
;; utf-8@euro exists, so put this last. (@euro really specifies
;; the currency, rather than the charset.)
(".*@euro\\>" . "Latin-9")))
"List of pairs of locale regexps and charset language names.
The first element whose locale regexp matches the start of a downcased locale
-specifies the language name whose charsets corresponds to that locale.
-This language name is used if its charsets disagree with the charsets of
-the language name that would otherwise be used for this locale.")
+specifies the language name whose charset corresponds to that locale.
+This language name is used if the locale is not listed in
+`locale-language-names'")
(defconst locale-preferred-coding-systems
(purecopy
- '(("ja.*[._]euc" . japanese-iso-8bit)
+ '((".*8859[-_]?1\\>" . iso-8859-1)
+ (".*8859[-_]?2\\>" . iso-8859-2)
+ (".*8859[-_]?3\\>" . iso-8859-3)
+ (".*8859[-_]?4\\>" . iso-8859-4)
+ (".*8859[-_]?9\\>" . iso-8859-9)
+ (".*8859[-_]?14\\>" . iso-8859-14)
+ (".*8859[-_]?15\\>" . iso-8859-15)
+ (".*utf\\(?:-?8\\)?" . utf-8)
+ ;; utf-8@euro exists, so put this after utf-8. (@euro really
+ ;; specifies the currency, rather than the charset.)
+ (".*@euro" . iso-8859-15)
+ ("koi8-?r" . koi8-r)
+ ("koi8-?u" . koi8-u)
+ ("tcvn" . tcvn)
+ ("big5" . big5)
+ ("euc-?tw" . euc-tw)
+ ;; We don't support GBK, but as it is upper compatible with
+ ;; GB-2312, we setup the default coding system to gb2312.
+ ("gbk" . gb2312)
+ ;; We don't support BIG5-HKSCS, but as it is upper compatible with
+ ;; BIG5, we setup the default coding system to big5.
+ ("big5hkscs" . big5)
+ ("ja.*[._]euc" . japanese-iso-8bit)
("ja.*[._]jis7" . iso-2022-jp)
("ja.*[._]pck" . japanese-shift-jis)
("ja.*[._]sjis" . japanese-shift-jis)
("jpn" . japanese-shift-jis) ; MS-Windows uses this.
- (".*[._]utf" . utf-8)))
+ ))
"List of pairs of locale regexps and preferred coding systems.
The first element whose locale regexp matches the start of a downcased locale
-specifies the coding system to prefer when using that locale.")
+specifies the coding system to prefer when using that locale.
+This coding system is used if the locale specifies a specific charset.")
(defun locale-name-match (key alist)
"Search for KEY in ALIST, which should be a list of regexp-value pairs.
(setq alist (cdr alist)))
(cdr element)))
+(defun locale-charset-match-p (charset1 charset2)
+ "Whether charset names (strings) CHARSET1 and CHARSET2 are equivalent.
+Matching is done ignoring case and any hyphens and underscores in the
+names. E.g. `ISO_8859-1' and `iso88591' both match `iso-8859-1'."
+ (setq charset1 (replace-regexp-in-string "[-_]" "" charset1))
+ (setq charset2 (replace-regexp-in-string "[-_]" "" charset2))
+ (eq t (compare-strings charset1 nil nil charset2 nil nil t)))
+
+(defvar locale-charset-alist nil
+ "Coding system alist keyed on locale-style charset name.
+Used by `locale-charset-to-coding-system'.")
+
+(defun locale-charset-to-coding-system (charset)
+ "Find coding system corresponding to CHARSET.
+CHARSET is any sort of non-Emacs charset name, such as might be used
+in a locale codeset, or elsewhere. It is matched to a coding system
+first by case-insensitive lookup in `locale-charset-alist'. Then
+matches are looked for in the coding system list, treating case and
+the characters `-' and `_' as insignificant. The coding system base
+is returned. Thus, for instance, if charset \"ISO8859-2\",
+`iso-latin-2' is returned."
+ (or (car (assoc-string charset locale-charset-alist t))
+ (let ((cs coding-system-alist)
+ c)
+ (while (and (not c) cs)
+ (if (locale-charset-match-p charset (caar cs))
+ (setq c (intern (caar cs)))
+ (pop cs)))
+ (if c (coding-system-base c)))))
+
+;; Fixme: This ought to deal with the territory part of the locale
+;; too, for setting things such as calendar holidays, ps-print paper
+;; size, spelling dictionary.
+
(defun set-locale-environment (&optional locale-name)
"Set up multi-lingual environment for using LOCALE-NAME.
This sets the language environment, the coding system priority,
the default input method and sometimes other things.
-LOCALE-NAME should be a string
-which is the name of a locale supported by the system;
-often it is of the form xx_XX.CODE, where xx is a language,
-XX is a country, and CODE specifies a character set and coding system.
-For example, the locale name \"ja_JP.EUC\" might name a locale
-for Japanese in Japan using the `japanese-iso-8bit' coding-system.
+LOCALE-NAME should be a string which is the name of a locale supported
+by the system. Often it is of the form xx_XX.CODE, where xx is a
+language, XX is a country, and CODE specifies a character set and
+coding system. For example, the locale name \"ja_JP.EUC\" might name
+a locale for Japanese in Japan using the `japanese-iso-8bit'
+coding-system. The name may also have a modifier suffix, e.g. `@euro'
+or `@cyrillic'.
If LOCALE-NAME is nil, its value is taken from the environment
variables LC_ALL, LC_CTYPE and LANG (the first one that is set).
;; to a system without X.
(setq locale-translation-file-name
(let ((files
- '("/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
- "/usr/X11R6/lib/X11/locale/locale.alias" ; e.g. RedHat 4.2
+ '("/usr/share/X11/locale/locale.alias" ; e.g. X11R7
+ "/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
+ "/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2
"/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
;;
;; The following name appears after the X-related names above,
(= 0 (length locale))) ; nil or empty string
(setq locale (getenv (pop vars))))))
+ (unless locale
+ ;; The two tests are kept separate so the byte-compiler sees
+ ;; that mac-get-preference is only called after checking its existence.
+ (when (fboundp 'mac-get-preference)
+ (setq locale (mac-get-preference "AppleLocale"))
+ (unless locale
+ (let ((languages (mac-get-preference "AppleLanguages")))
+ (unless (= (length languages) 0) ; nil or empty vector
+ (setq locale (aref languages 0)))))))
+ (unless (or locale (not (boundp 'mac-system-locale)))
+ (setq locale mac-system-locale))
+
(when locale
;; Translate "swedish" into "sv_SE.ISO8859-1", and so on,
(charset-language-name
(locale-name-match locale locale-charset-language-names))
(coding-system
- (locale-name-match locale locale-preferred-coding-systems)))
-
- ;; Give preference to charset-language-name over language-name.
- (if (and charset-language-name
- (not
- (equal (get-language-info language-name 'charset)
- (get-language-info charset-language-name 'charset))))
- (setq language-name charset-language-name))
+ (or (locale-name-match locale locale-preferred-coding-systems)
+ (when locale
+ (if (string-match "\\.\\([^@]+\\)" locale)
+ (locale-charset-to-coding-system
+ (match-string 1 locale))))
+ (and (eq system-type 'macos) mac-system-coding-system))))
+
+ (if (consp language-name)
+ ;; locale-language-names specify both lang-env and coding.
+ ;; But, what specified in locale-preferred-coding-systems
+ ;; has higher priority.
+ (setq coding-system (or coding-system
+ (nth 1 language-name))
+ language-name (car language-name))
+ ;; Otherwise, if locale is not listed in locale-language-names,
+ ;; use what listed in locale-charset-language-names.
+ (if (not language-name)
+ (setq language-name charset-language-name)))
(when language-name
;; we are using single-byte characters,
;; so the display table and terminal coding system are irrelevant.
(when default-enable-multibyte-characters
- (set-display-table-and-terminal-coding-system language-name))
+ (set-display-table-and-terminal-coding-system
+ language-name coding-system))
;; Set the `keyboard-coding-system' if appropriate (tty
;; only). At least X and MS Windows can generate
(setq locale-coding-system
(car (get-language-info language-name 'coding-priority))))
- (when coding-system
+ (when (and coding-system
+ (not (coding-system-equal coding-system
+ locale-coding-system)))
(prefer-coding-system coding-system)
(setq locale-coding-system coding-system))))
+ ;; On Windows, override locale-coding-system,
+ ;; keyboard-coding-system with system codepage. Note:
+ ;; selection-coding-system is already set in w32select.c.
+ (when (boundp 'w32-ansi-code-page)
+ (let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page))))
+ (when (coding-system-p code-page-coding)
+ (setq locale-coding-system code-page-coding)
+ (set-keyboard-coding-system code-page-coding)
+ (set-terminal-coding-system code-page-coding))))
+
+ (when (eq system-type 'darwin)
+ ;; On Darwin, file names are always encoded in utf-8, no matter
+ ;; the locale.
+ (setq default-file-name-coding-system 'utf-8)
+ ;; Mac OS X's Terminal.app by default uses utf-8 regardless of
+ ;; the locale.
+ (when (and (null window-system)
+ (equal (getenv "TERM_PROGRAM") "Apple_Terminal"))
+ (set-terminal-coding-system 'utf-8)
+ (set-keyboard-coding-system 'utf-8)))
+
;; Default to A4 paper if we're not in a C, POSIX or US locale.
- ;; (See comments in Flanginfo.)
+ ;; (See comments in Flocale_info.)
(let ((locale locale)
- (paper (langinfo 'paper)))
+ (paper (locale-info 'paper)))
(if paper
;; This will always be null at the time of writing.
(cond
("posix$" . letter)
(".._us" . letter)
(".._pr" . letter)
- (".._ca" . letter)))
+ (".._ca" . letter)
+ ("enu$" . letter) ; Windows
+ ("esu$" . letter)
+ ("enc$" . letter)
+ ("frc$" . letter)))
'a4))))))
nil)
\f
(if (and coding-system (eq (coding-system-type coding-system) 2))
;; Try to get a pretty description for ISO 2022 escape sequences.
(function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
- (format "%02X" x))))
- (function (lambda (x) (format "0x%02X" x))))
+ (format "#x%02X" x))))
+ (function (lambda (x) (format "#x%02X" x))))
str " "))
(defun encode-coding-char (char coding-system)
(and safe-chars (aref safe-chars char)))
;; We must find the encoded string of CHAR. But, just encoding
;; CHAR will put extra control sequences (usually to designate
- ;; ASCII charaset) at the tail if type of CODING is ISO 2022.
+ ;; ASCII charset) at the tail if type of CODING is ISO 2022.
;; To exclude such tailing bytes, we at first encode one-char
;; string and two-char string, then check how many bytes at the
;; tail of both encoded strings are the same.
(substring enc2 0 i2))))
+;; arch-tag: b382c432-4b36-460e-bf4c-05efd0bb18dc
;;; mule-cmds.el ends here