;;; mule.el --- basic commands for multilingual environment
-;; Copyright (C) 1997-2011 Free Software Foundation, Inc.
+;; Copyright (C) 1997-2014 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
;; National Institute of Advanced Industrial Science and Technology (AIST)
;;; Code:
+;; FIXME? Are these still relevant? Nothing uses them AFAICS.
(defconst mule-version "6.0 (HANACHIRUSATO)" "\
Version number and name of this version of MULE (multilingual environment).")
`:min-code'
-VALUE must be an integer specifying the mininum code point of the
+VALUE must be an integer specifying the minimum code point of the
charset. If omitted, it is calculated from `:code-space'. VALUE may
be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
the code point and LOW is the least significant 16 bits.
`:max-code'
-VALUE must be an integer specifying the maxinum code point of the
+VALUE must be an integer specifying the maximum code point of the
charset. If omitted, it is calculated from `:code-space'. VALUE may
be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
the code point and LOW is the least significant 16 bits.
VALUE must be a nonnegative integer that can be used as an invalid
code point of the charset. If the minimum code is 0 and the maximum
-code is greater than Emacs' maximum integer value, `:invalid-code'
+code is greater than Emacs's maximum integer value, `:invalid-code'
should not be omitted.
`:code-offset'
;; because that makes a bootstrapping problem
;; if you need to recompile all the Lisp files using interpreted code.
-(defun charset-id (charset)
+(defun charset-id (_charset)
"Always return 0. This is provided for backward compatibility."
+ (declare (obsolete nil "23.1"))
0)
-(make-obsolete 'charset-id "do not use it." "23.1")
-(defmacro charset-bytes (charset)
+(defmacro charset-bytes (_charset)
"Always return 0. This is provided for backward compatibility."
+ (declare (obsolete nil "23.1"))
0)
-(make-obsolete 'charset-bytes "do not use it." "23.1")
(defun get-charset-property (charset propname)
"Return the value of CHARSET's PROPNAME property.
(defun charset-list ()
"Return list of all charsets ever defined."
+ (declare (obsolete charset-list "23.1"))
charset-list)
-(make-obsolete 'charset-list "use variable `charset-list'." "23.1")
\f
;;; CHARACTER
(define-obsolete-function-alias 'char-valid-p 'characterp "23.1")
-(defun generic-char-p (char)
+(defun generic-char-p (_char)
"Always return nil. This is provided for backward compatibility."
+ (declare (obsolete nil "23.1"))
nil)
-(make-obsolete 'generic-char-p "generic characters no longer exist." "23.1")
(defun make-char-internal (charset-id &optional code1 code2)
(let ((charset (aref emacs-mule-charset-table charset-id)))
composition
euc-tw-shift
use-roman
- use-oldjis)
+ use-oldjis
+ 8-bit-level-4)
"List of symbols that control ISO-2022 encoder/decoder.
The value of the `:flags' attribute in the argument of the function
on decoding, and use locking-shift to invoke a graphic element on
encoding.
-If `single-shift' is specified, decode single-shift code correctly on
-decoding, and use single-shift to invoke a graphic element on encoding.
+If `single-shift' is specified, decode single-shift code
+correctly on decoding, and use single-shift to invoke a graphic
+element on encoding. See also `8-bit-level-4' specification.
If `designation' is specified, decode designation code correctly on
decoding, and use designation to designate a charset to a graphic
of ASCII.
If `use-oldjis' is specified, JIS0208-1976 is designated instead of
-JIS0208-1983.")
+JIS0208-1983.
+
+If `8-bit-level-4' is specified, the decoder assumes the
+implementation level \"4\" for 8-bit codes which means that GL is
+identified as the single-shift area. The default implementation
+level for 8-bit code is \"4A\" which means that GR is identified
+as the single-shift area.")
(defun define-coding-system (name docstring &rest props)
"Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
VALUE must be a list of symbols that control the ISO-2022 converter.
Each must be a member of the list `coding-system-iso-2022-flags'
-\(which see). This attribute has a meaning only when `:coding-type'
+\(which see). This attribute is meaningful only when `:coding-type'
is `iso-2022'.
`:designation'
96 long can be designated to GN. If the first element is a charset,
that charset is initially designated to GN.
-This attribute has a meaning only when `:coding-type' is `iso-2022'.
+This attribute is meaningful only when `:coding-type' is `iso-2022'.
`:bom'
BOM, and on encoding, don't produce BOM bytes.
If the value is t, on decoding, skip the first two-byte as BOM, and on
-encoding, produce BOM bytes accoding to the value of `:endian'.
+encoding, produce BOM bytes according to the value of `:endian'.
If the value is cons, on decoding, check the first two-byte. If they
are 0xFE 0xFF, use the car part coding system of the value. If they
are 0xFF 0xFE, use the cdr part coding system of the value.
Otherwise, treat them as bytes for a normal character. On encoding,
-produce BOM bytes accoding to the value of `:endian'.
+produce BOM bytes according to the value of `:endian'.
-This attribute has a meaning only when `:coding-type' is `utf-16' or
+This attribute is meaningful only when `:coding-type' is `utf-16' or
`utf-8'.
`:endian'
VALUE must be `big' or `little' specifying big-endian and
little-endian respectively. The default value is `big'.
-This attribute has a meaning only when `:coding-type' is `utf-16'.
+This attribute is meaningful only when `:coding-type' is `utf-16'.
`:ccl-decoder'
VALUE is a symbol representing the registered CCL program used for
-decoding. This attribute has a meaning only when `:coding-type' is
+decoding. This attribute is meaningful only when `:coding-type' is
`ccl'.
`:ccl-encoder'
VALUE is a symbol representing the registered CCL program used for
-encoding. This attribute has a meaning only when `:coding-type' is
-`ccl'."
+encoding. This attribute is meaningful only when `:coding-type' is
+`ccl'.
+
+`:inhibit-null-byte-detection'
+
+VALUE non-nil means Emacs ignore null bytes on code detection.
+See the variable `inhibit-null-byte-detection'. This attribute
+is meaningful only when `:coding-type' is `undecided'.
+
+`:inhibit-iso-escape-detection'
+
+VALUE non-nil means Emacs ignores ISO-2022 escape sequences on
+code detection. See the variable `inhibit-iso-escape-detection'.
+This attribute is meaningful only when `:coding-type' is
+`undecided'.
+
+`:prefer-utf-8'
+
+VALUE non-nil means Emacs prefers UTF-8 on code detection for
+non-ASCII files. This attribute is meaningful only when
+`:coding-type' is `undecided'."
(let* ((common-attrs (mapcar 'list
'(:mnemonic
:coding-type
((eq coding-type 'ccl)
'(:ccl-decoder
:ccl-encoder
- :valids))))))
+ :valids))
+ ((eq coding-type 'undecided)
+ '(:inhibit-null-byte-detection
+ :inhibit-iso-escape-detection
+ :prefer-utf-8))))))
(dolist (slot common-attrs)
(setcdr slot (plist-get props (car slot))))
(and (vectorp eol-type-1) (vectorp eol-type-2)))))))
(defun add-to-coding-system-list (coding-system)
- "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
+ "Add CODING-SYSTEM to variable `coding-system-list' while keeping it sorted."
(if (or (null coding-system-list)
(coding-system-lessp coding-system (car coding-system-list)))
(setq coding-system-list (cons coding-system coding-system-list))
eol-type)
"Define a new coding system CODING-SYSTEM (symbol).
This function is provided for backward compatibility."
+ (declare (obsolete define-coding-system "23.1"))
;; For compatibility with XEmacs, we check the type of TYPE. If it
;; is a symbol, perhaps, this function is called with XEmacs-style
;; arguments. Here, try to transform that kind of arguments to
(apply 'define-coding-system coding-system doc-string properties))
-(make-obsolete 'make-coding-system 'define-coding-system "23.1")
-
(defun merge-coding-systems (first second)
"Fill in any unspecified aspects of coding system FIRST from SECOND.
Return the resulting coding system."
(put (intern name) 'coding-system-define-form form)
(setq coding-system-alist (cons (list name) coding-system-alist)))))
-;; This variable is set in these three cases:
+;; This variable is set in these two cases:
;; (1) A file is read by a coding system specified explicitly.
-;; after-insert-file-set-coding sets the car of this value to
-;; coding-system-for-read, and sets the cdr to nil.
-;; (2) A buffer is saved.
-;; After writing, basic-save-buffer-1 sets the car of this value
-;; to last-coding-system-used.
-;; (3) set-buffer-file-coding-system is called.
+;; `after-insert-file-set-coding' sets the car of this value to
+;; `coding-system-for-read', and sets the cdr to nil.
+;; (2) `set-buffer-file-coding-system' is called.
;; The cdr of this value is set to the specified coding system.
-;; This variable is used for decoding in revert-buffer and encoding in
-;; select-safe-coding-system.
+;; This variable is used for decoding in `revert-buffer' and encoding
+;; in `select-safe-coding-system'.
+;;
+;; When saving a buffer, if `buffer-file-coding-system-explicit' is
+;; already non-nil, `basic-save-buffer-1' sets its CAR to the value of
+;; `last-coding-system-used'. (It used to set it unconditionally, but
+;; that seems unnecessary; see Bug#4533.)
+
(defvar buffer-file-coding-system-explicit nil
"The file coding system explicitly specified for the current buffer.
The value is a cons of coding systems for reading (decoding) and
(if (and coding-system buffer-file-coding-system (null force))
(setq coding-system
(merge-coding-systems coding-system buffer-file-coding-system)))
- (when (called-interactively-p 'interactive)
+ (when (and (called-interactively-p 'interactive)
+ (not (memq 'emacs (coding-system-get coding-system
+ :charset-list))))
;; Check whether save would succeed, and jump to the offending char(s)
;; if not.
(let ((css (find-coding-systems-region (point-min) (point-max))))
(if coding-system
(setq default-terminal-coding-system coding-system))
(set-terminal-coding-system-internal coding-system terminal)
- (redraw-frame (selected-frame)))
+ (redraw-frame))
(defvar default-keyboard-coding-system nil
"Default value of the keyboard coding system.
(t
(error "Unsupported coding system for keyboard: %s"
coding-system)))
- (when accept-8-bit
- (or saved-meta-mode
- (set-terminal-parameter terminal
- 'keyboard-coding-saved-meta-mode
- (cons (nth 2 (current-input-mode))
- nil)))
- (set-input-meta-mode 8))
+ (if accept-8-bit
+ (progn
+ (or saved-meta-mode
+ (set-terminal-parameter terminal
+ 'keyboard-coding-saved-meta-mode
+ (cons (nth 2 (current-input-mode))
+ nil)))
+ (set-input-meta-mode 8 terminal))
+ (when saved-meta-mode
+ (set-input-meta-mode (car saved-meta-mode) terminal)
+ (set-terminal-parameter terminal
+ 'keyboard-coding-saved-meta-mode
+ nil)))
;; Avoid end-of-line conversion.
(setq coding-system
(coding-system-change-eol-conversion coding-system 'unix)))
(when saved-meta-mode
- (set-input-meta-mode (car saved-meta-mode))
+ (set-input-meta-mode (car saved-meta-mode) terminal)
(set-terminal-parameter terminal
'keyboard-coding-saved-meta-mode
nil))))
:type '(coding-system :tag "Coding system")
:link '(info-link "(emacs)Terminal Coding")
:link '(info-link "(emacs)Unibyte Mode")
- :set (lambda (symbol value)
+ :set (lambda (_symbol value)
;; Don't load encoded-kb unnecessarily.
(if (or value (boundp 'encoded-kbd-setup-display))
(set-keyboard-coding-system value)
ARG is a list of coding categories ordered by priority.
This function is provided for backward compatibility."
+ (declare (obsolete set-coding-system-priority "23.1"))
(apply 'set-coding-system-priority
(mapcar #'(lambda (x) (symbol-value x)) arg)))
-(make-obsolete 'set-coding-priority 'set-coding-system-priority "23.1")
;;; X selections
;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
;; `ctext-non-standard-encodings' and a list specified by the key
-;; `ctext-non-standard-encodings' for the currrent language
+;; `ctext-non-standard-encodings' for the current language
;; environment. CTEXT-USAGE-INFO is one of the element of
;; `ctext-non-standard-encodings-alist' or nil. In the former case, a
;; character in CHARSET is encoded using extended segment. In the
;; latter case, a character in CHARSET is encoded using normal ISO2022
;; designation sequence. If a character is not in any of CHARSETs, it
-;; is encoded using UTF-8 encoding extention.
+;; is encoded using UTF-8 encoding extension.
(defun ctext-non-standard-encodings-table ()
(let* ((table (append ctext-non-standard-encodings
(insert 2)))
;; Encode this range as characters in CHARSET.
(put-text-property last-pos (point) 'charset charset))
- ;; Encode this range using UTF-8 encoding extention.
+ ;; Encode this range using UTF-8 encoding extension.
(encode-coding-region last-pos (point) 'mule-utf-8)
(save-excursion
(goto-char last-pos)
;;; FILE I/O
+;; TODO many elements of this list are also in inhibit-local-variables-regexps.
(defcustom auto-coding-alist
;; .exe and .EXE are added to support archive-mode looking at DOS
;; self-extracting exe archives.
ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\|7Z\\)\\'"
. no-conversion-multibyte)
("\\.\\(exe\\|EXE\\)\\'" . no-conversion)
- ("\\.\\(sx[dmicw]\\|odt\\|tar\\|tgz\\)\\'" . no-conversion)
+ ("\\.\\(sx[dmicw]\\|odt\\|tar\\|t[bg]z\\)\\'" . no-conversion)
("\\.\\(gz\\|Z\\|bz\\|bz2\\|xz\\|gpg\\)\\'" . no-conversion)
("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion)
("\\.pdf\\'" . no-conversion)
- ("/#[^/]+#\\'" . emacs-mule)))
+ ("/#[^/]+#\\'" . utf-8-emacs-unix)))
"Alist of filename patterns vs corresponding coding systems.
Each element looks like (REGEXP . CODING-SYSTEM).
A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
:type '(repeat function))
(defvar set-auto-coding-for-load nil
- "Non-nil means look for `load-coding' property instead of `coding'.
-This is used for loading and byte-compiling Emacs Lisp files.")
+ "Non-nil means respect a \"unibyte: t\" entry in file local variables.
+Emacs binds this variable to t when loading or byte-compiling Emacs Lisp
+files.")
(defun auto-coding-alist-lookup (filename)
"Return the coding system specified by `auto-coding-alist' for FILENAME."
succeed, it checks to see if any function in `auto-coding-functions'
gives a match.
-If a coding system is specifed, the return value is a cons
+If a coding system is specified, the return value is a cons
\(CODING . SOURCE), where CODING is the specified coding system and
SOURCE is a symbol `auto-coding-alist', `auto-coding-regexp-alist',
`:coding', or `auto-coding-functions' indicating by what CODING is
(head-end (+ head-start (min size 1024)))
(tail-start (+ head-start (max (- size 3072) 0)))
(tail-end (+ head-start size))
- coding-system head-found tail-found pos char-trans)
+ coding-system head-found tail-found char-trans)
;; Try a short cut by searching for the string "coding:"
;; and for "unibyte:" at the head and tail of SIZE bytes.
(setq head-found (or (search-forward "coding:" head-end t)
(re-search-forward
"\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
head-end t))
+ (display-warning 'mule
+ (format "\"unibyte: t\" (in %s) is obsolete; \
+use \"coding: 'raw-text\" instead."
+ (file-relative-name filename))
+ :warning)
(setq coding-system 'raw-text))
(when (and (not coding-system)
(re-search-forward
(goto-char pos)
(when (and set-auto-coding-for-load
(re-search-forward re-unibyte tail-end t))
+ (display-warning 'mule "`unibyte: t' is obsolete; \
+use \"coding: 'raw-text\" instead." :warning)
(setq coding-system 'raw-text))
(when (and (not coding-system)
(re-search-forward re-coding tail-end t))
(let ((funcs auto-coding-functions)
(coding-system nil))
(while (and funcs (not coding-system))
- (setq coding-system (condition-case e
- (save-excursion
- (goto-char (point-min))
- (funcall (pop funcs) size))
- (error nil))))
+ (setq coding-system (ignore-errors
+ (save-excursion
+ (goto-char (point-min))
+ (funcall (pop funcs) size)))))
(if coding-system
(cons coding-system 'auto-coding-functions)))))
(setq ignore-relative-composition
(make-char-table 'ignore-relative-composition))
-(make-obsolete 'set-char-table-default
- "generic characters no longer exist." "23.1")
-
;;; Built-in auto-coding-functions:
(defun sgml-xml-auto-coding-function (size)
;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is
;; useful for Mozilla bookmark files.
(when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t)
- (re-search-forward "<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*charset=\\(.+?\\)[\"']" size t))
- (let* ((match (match-string 1))
+ (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'\\s-/>]" size t))
+ (let* ((match (match-string 2))
(sym (intern (downcase match))))
(if (coding-system-p sym)
sym