;;; mule.el --- basic commands for multilingual environment
-;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
;; Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006, 2007
+;; 2005, 2006, 2007, 2008
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H14PRO021
;; Copyright (C) 2003
(let* ((buffer
;; To avoid any autoloading, set default-major-mode to
;; fundamental-mode.
- ;; So that we don't get completely screwed if the
- ;; file is encoded in some complicated character set,
- ;; read it with real decoding, as a multibyte buffer,
- ;; even if this is a --unibyte Emacs session.
- (let ((default-major-mode 'fundamental-mode)
- (default-enable-multibyte-characters t))
+ (let ((default-major-mode 'fundamental-mode))
;; We can't use `generate-new-buffer' because files.el
;; is not yet loaded.
(get-buffer-create (generate-new-buffer-name " *load*"))))
(set-auto-coding-for-load t)
(inhibit-file-name-operation nil))
(with-current-buffer buffer
+ ;; So that we don't get completely screwed if the
+ ;; file is encoded in some complicated character set,
+ ;; read it with real decoding, as a multibyte buffer,
+ ;; even if this is a --unibyte Emacs session.
+ (set-buffer-multibyte t)
;; Don't let deactivate-mark remain set.
(let (deactivate-mark)
(insert-file-contents fullname))
;; .exe and .EXE are added to support archive-mode looking at DOS
;; self-extracting exe archives.
'(("\\.\\(\
-arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|exe\\|rar\\|\
-ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|EXE\\|RAR\\)\\'" . no-conversion)
+arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|rar\\|\
+ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\)\\'"
+ . no-conversion-multibyte)
+ ("\\.\\(exe\\|EXE\\)\\'" . no-conversion)
("\\.\\(sx[dmicw]\\|odt\\|tar\\|tgz\\)\\'" . no-conversion)
("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)
("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion)
sym
(message "Warning: unknown coding system \"%s\"" match)
nil))
- 'utf-8)))))
+ ;; Files without an encoding tag should be UTF-8. But users
+ ;; may be naive about encodings, and have saved the file from
+ ;; another editor that does not help them get the encoding right.
+ ;; Detect the encoding and warn the user if it is detected as
+ ;; something other than UTF-8.
+ (let ((detected
+ (with-coding-priority '(utf-8)
+ (coding-system-base
+ (detect-coding-region (point-min) size t)))))
+ ;; Pure ASCII always comes back as undecided.
+ (if (memq detected '(utf-8 undecided))
+ 'utf-8
+ (warn "File contents detected as %s.
+ Consider adding an encoding attribute to the xml declaration,
+ or saving as utf-8, as mandated by the xml specification." detected)
+ detected)))))))
(defun sgml-html-meta-auto-coding-function (size)
"If the buffer has an HTML meta tag, use it to determine encoding.
(setq size (min (+ (point) size)
(save-excursion
;; Limit the search by the end of the HTML header.
- (or (search-forward "</head>" size t)
+ (or (search-forward "</head>" (+ (point) size) t)
;; In case of no header, search only 10 lines.
(forward-line 10))
(point))))
(message "Warning: unknown coding system \"%s\"" match)
nil)))))
+(defun xml-find-file-coding-system (args)
+ "Determine the coding system of an XML file without a declaration.
+Strictly speaking, the file should be utf-8, but mistakes are
+made, and there are genuine cases where XML fragments are saved,
+with the encoding properly specified in a master document, or
+added by processing software."
+ (if (eq (car args) 'insert-file-contents)
+ (let ((detected
+ (with-coding-priority '(utf-8)
+ (coding-system-base
+ (detect-coding-region (point-min) (point-max) t)))))
+ ;; Pure ASCII always comes back as undecided.
+ (cond
+ ((memq detected '(utf-8 undecided))
+ 'utf-8)
+ ((eq detected 'utf-16le-with-signature) 'utf-16le-with-signature)
+ ((eq detected 'utf-16be-with-signature) 'utf-16be-with-signature)
+ (t
+ (warn "File contents detected as %s.
+ Consider adding an xml declaration with the encoding specified,
+ or saving as utf-8, as mandated by the xml specification." detected)
+ detected)))
+ ;; Don't interfere with the user's wishes for saving the buffer.
+ ;; We did what we could when the buffer was created to ensure the
+ ;; correct encoding was used, or the user was warned, so any
+ ;; non-conformity here is deliberate on the part of the user.
+ 'undecided))
+
;;;
(provide 'mule)