;;; mule.el --- basic commands for multilingual environment
-;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
;; Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006, 2007
+;; 2005, 2006, 2007, 2008
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H14PRO021
;; Copyright (C) 2003
`:supplementary-p'
VALUE must be nil or t. If the VALUE is t, the charset is
-supplementary, which means it is used only as a parent of some other
-charset.
+supplementary, which means it is used only as a parent or a
+subset of some other charset, or it is provided just for backward
+compatibility.
`:invalid-code'
(let* ((buffer
;; To avoid any autoloading, set default-major-mode to
;; fundamental-mode.
- ;; So that we don't get completely screwed if the
- ;; file is encoded in some complicated character set,
- ;; read it with real decoding, as a multibyte buffer,
- ;; even if this is a --unibyte Emacs session.
- (let ((default-major-mode 'fundamental-mode)
- (default-enable-multibyte-characters t))
+ (let ((default-major-mode 'fundamental-mode))
;; We can't use `generate-new-buffer' because files.el
;; is not yet loaded.
(get-buffer-create (generate-new-buffer-name " *load*"))))
(set-auto-coding-for-load t)
(inhibit-file-name-operation nil))
(with-current-buffer buffer
+ ;; So that we don't get completely screwed if the
+ ;; file is encoded in some complicated character set,
+ ;; read it with real decoding, as a multibyte buffer,
+ ;; even if this is a --unibyte Emacs session.
+ (set-buffer-multibyte t)
;; Don't let deactivate-mark remain set.
(let (deactivate-mark)
(insert-file-contents fullname))
This is normally set according to the selected language environment.
See also the command `set-terminal-coding-system'.")
-(defun set-terminal-coding-system (coding-system)
- "Set coding system of your terminal to CODING-SYSTEM.
-All text output to the terminal will be encoded
+(defun set-terminal-coding-system (coding-system &optional display)
+ "Set coding system of terminal output to CODING-SYSTEM.
+All text output to DISPLAY will be encoded
with the specified coding system.
+
For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
The default is determined by the selected language environment
-or by the previous use of this command."
+or by the previous use of this command.
+
+DISPLAY may be a display id, a frame, or nil for the selected frame's display.
+The setting has no effect on graphical displays."
(interactive
(list (let ((default (if (and (not (terminal-coding-system))
default-terminal-coding-system)
(setq coding-system default-terminal-coding-system))
(if coding-system
(setq default-terminal-coding-system coding-system))
- (set-terminal-coding-system-internal coding-system)
+ (set-terminal-coding-system-internal coding-system display)
(redraw-frame (selected-frame)))
(defvar default-keyboard-coding-system nil
This is normally set according to the selected language environment.
See also the command `set-keyboard-coding-system'.")
-(defun set-keyboard-coding-system (coding-system)
- "Set coding system for keyboard input to CODING-SYSTEM.
-In addition, this command enables Encoded-kbd minor mode.
-\(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
-`encoded-kbd-mode'.)
+(defun set-keyboard-coding-system (coding-system &optional display)
+ "Set coding system for keyboard input on DISPLAY to CODING-SYSTEM.
+In addition, this command calls `encoded-kbd-setup-display' to set up the
+translation of keyboard input events to the specified coding system.
+
For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
The default is determined by the selected language environment
-or by the previous use of this command."
+or by the previous use of this command.
+
+DISPLAY may be a display id, a frame, or nil for the selected frame's display.
+The setting has no effect on graphical displays."
(interactive
(list (let ((default (if (and (not (keyboard-coding-system))
default-keyboard-coding-system)
(not (coding-system-get coding-system :ascii-compatible-p))
(not (coding-system-get coding-system :suitable-for-keyboard)))
(error "%s is not suitable for keyboard" coding-system))
- (set-keyboard-coding-system-internal coding-system)
+ (set-keyboard-coding-system-internal coding-system display)
(setq keyboard-coding-system coding-system)
- (encoded-kbd-mode (if coding-system 1 0)))
+ (encoded-kbd-setup-display display))
(defcustom keyboard-coding-system nil
"Specify coding system for keyboard input.
:link '(info-link "(emacs)Terminal Coding")
:link '(info-link "(emacs)Unibyte Mode")
:set (lambda (symbol value)
- ;; Don't load encoded-kbd-mode unnecessarily.
- (if (or value (boundp 'encoded-kbd-mode))
+ ;; Don't load encoded-kb unnecessarily.
+ (if (or value (boundp 'encoded-kbd-setup-display))
(set-keyboard-coding-system value)
(set-default 'keyboard-coding-system nil))) ; must initialize
:version "22.1"
(defcustom auto-coding-alist
;; .exe and .EXE are added to support archive-mode looking at DOS
;; self-extracting exe archives.
- '(("\\.\\(arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|exe\\)\\'" . no-conversion)
- ("\\.\\(ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|EXE\\)\\'" . no-conversion)
+ '(("\\.\\(\
+arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|rar\\|\
+ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\)\\'"
+ . no-conversion-multibyte)
+ ("\\.\\(exe\\|EXE\\)\\'" . no-conversion)
("\\.\\(sx[dmicw]\\|odt\\|tar\\|tgz\\)\\'" . no-conversion)
("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)
("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion)
sym
(message "Warning: unknown coding system \"%s\"" match)
nil))
- 'utf-8)))))
+ ;; Files without an encoding tag should be UTF-8. But users
+ ;; may be naive about encodings, and have saved the file from
+ ;; another editor that does not help them get the encoding right.
+ ;; Detect the encoding and warn the user if it is detected as
+ ;; something other than UTF-8.
+ (let ((detected
+ (with-coding-priority '(utf-8)
+ (coding-system-base
+ (detect-coding-region (point-min) size t)))))
+ ;; Pure ASCII always comes back as undecided.
+ (if (memq detected '(utf-8 undecided))
+ 'utf-8
+ (warn "File contents detected as %s.
+ Consider adding an encoding attribute to the xml declaration,
+ or saving as utf-8, as mandated by the xml specification." detected)
+ detected)))))))
(defun sgml-html-meta-auto-coding-function (size)
"If the buffer has an HTML meta tag, use it to determine encoding.
This function is intended to be added to `auto-coding-functions'."
- (setq size (min (+ (point) size)
- (save-excursion
- ;; Limit the search by the end of the HTML header.
- (or (search-forward "</head>" size t)
- ;; In case of no header, search only 10 lines.
- (forward-line 10))
- (point))))
- ;; Make sure that the buffer really contains an HTML document, by
- ;; checking that it starts with a doctype or a <HTML> start tag
- ;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is
- ;; useful for Mozilla bookmark files.
- (when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t)
- (re-search-forward "<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*charset=\\(.+?\\)[\"']" size t))
- (let* ((match (match-string 1))
- (sym (intern (downcase match))))
- (if (coding-system-p sym)
- sym
- (message "Warning: unknown coding system \"%s\"" match)
- nil))))
+ (let ((case-fold-search t))
+ (setq size (min (+ (point) size)
+ (save-excursion
+ ;; Limit the search by the end of the HTML header.
+ (or (search-forward "</head>" (+ (point) size) t)
+ ;; In case of no header, search only 10 lines.
+ (forward-line 10))
+ (point))))
+ ;; Make sure that the buffer really contains an HTML document, by
+ ;; checking that it starts with a doctype or a <HTML> start tag
+ ;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is
+ ;; useful for Mozilla bookmark files.
+ (when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t)
+ (re-search-forward "<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*charset=\\(.+?\\)[\"']" size t))
+ (let* ((match (match-string 1))
+ (sym (intern (downcase match))))
+ (if (coding-system-p sym)
+ sym
+ (message "Warning: unknown coding system \"%s\"" match)
+ nil)))))
+
+(defun xml-find-file-coding-system (args)
+ "Determine the coding system of an XML file without a declaration.
+Strictly speaking, the file should be utf-8, but mistakes are
+made, and there are genuine cases where XML fragments are saved,
+with the encoding properly specified in a master document, or
+added by processing software."
+ (if (eq (car args) 'insert-file-contents)
+ (let ((detected
+ (with-coding-priority '(utf-8)
+ (coding-system-base
+ (detect-coding-region (point-min) (point-max) t)))))
+ ;; Pure ASCII always comes back as undecided.
+ (cond
+ ((memq detected '(utf-8 undecided))
+ 'utf-8)
+ ((eq detected 'utf-16le-with-signature) 'utf-16le-with-signature)
+ ((eq detected 'utf-16be-with-signature) 'utf-16be-with-signature)
+ (t
+ (warn "File contents detected as %s.
+ Consider adding an xml declaration with the encoding specified,
+ or saving as utf-8, as mandated by the xml specification." detected)
+ detected)))
+ ;; Don't interfere with the user's wishes for saving the buffer.
+ ;; We did what we could when the buffer was created to ensure the
+ ;; correct encoding was used, or the user was warned, so any
+ ;; non-conformity here is deliberate on the part of the user.
+ 'undecided))
;;;
(provide 'mule)