(message "Loading %s (source)..." file)
(message "Loading %s..." file)))
(when purify-flag
- (setq preloaded-file-list (cons file preloaded-file-list)))
+ (push file preloaded-file-list))
(unwind-protect
(let ((load-file-name fullname)
(set-auto-coding-for-load t)
"Return character specified by coded character set CCS and CODE-POINT in it.
Return nil if such a character is not supported.
Currently the only supported coded character set is `ucs' (ISO/IEC
-10646: Universal Multi-Octet Coded Character Set).
+10646: Universal Multi-Octet Coded Character Set), and the result is
+translated through the translation-table named
+`utf-translation-table-for-decode' or the translation-hash-table named
+`utf-subst-table-for-decode'.
Optional argument RESTRICTION specifies a way to map the pair of CCS
-and CODE-POINT to a character. Currently not supported and just ignored."
- (cond ((eq ccs 'ucs)
- (cond ((< code-point 160)
- code-point)
- ((< code-point 256)
- (make-char 'latin-iso8859-1 code-point))
- ((< code-point #x2500)
- (setq code-point (- code-point #x0100))
- (make-char 'mule-unicode-0100-24ff
- (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
- ((< code-point #x3400)
- (setq code-point (- code-point #x2500))
- (make-char 'mule-unicode-2500-33ff
- (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
- ((and (>= code-point #xe000) (< code-point #x10000))
- (setq code-point (- code-point #xe000))
- (make-char 'mule-unicode-e000-ffff
- (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
- ))))
+and CODE-POINT to a character. Currently not supported and just ignored."
+ (cond
+ ((eq ccs 'ucs)
+ (or (utf-lookup-subst-table-for-decode code-point)
+ (let ((c (cond
+ ((< code-point 160)
+ code-point)
+ ((< code-point 256)
+ (make-char 'latin-iso8859-1 code-point))
+ ((< code-point #x2500)
+ (setq code-point (- code-point #x0100))
+ (make-char 'mule-unicode-0100-24ff
+ (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
+ ((< code-point #x3400)
+ (setq code-point (- code-point #x2500))
+ (make-char 'mule-unicode-2500-33ff
+ (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
+ ((and (>= code-point #xe000) (< code-point #x10000))
+ (setq code-point (- code-point #xe000))
+ (make-char 'mule-unicode-e000-ffff
+ (+ (/ code-point 96) 32)
+ (+ (% code-point 96) 32))))))
+ (when c
+ (or (aref (get 'utf-translation-table-for-decode
+ 'translation-table) c)
+ c)))))))
(defun encode-char (char ccs &optional restriction)
"Return code-point in coded character set CCS that corresponds to CHAR.
Return nil if CHAR is not included in CCS.
Currently the only supported coded character set is `ucs' (ISO/IEC
-10646: Universal Multi-Octet Coded Character Set).
+10646: Universal Multi-Octet Coded Character Set), and CHAR is first
+translated through the translation-table named
+`utf-translation-table-for-encode' or the translation-hash-table named
+`utf-subst-table-for-encode'.
CHAR should be in one of these charsets:
ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff,
Optional argument RESTRICTION specifies a way to map CHAR to a
code-point in CCS. Currently not supported and just ignored."
(let* ((split (split-char char))
- (charset (car split)))
+ (charset (car split))
+ trans)
(cond ((eq ccs 'ucs)
- (cond ((eq charset 'ascii)
- char)
- ((eq charset 'latin-iso8859-1)
- (+ (nth 1 split) 128))
- ((eq charset 'mule-unicode-0100-24ff)
- (+ #x0100 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'mule-unicode-2500-33ff)
- (+ #x2500 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'mule-unicode-e000-ffff)
- (+ #xe000 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'eight-bit-control)
- char))))))
+ (or (utf-lookup-subst-table-for-encode char)
+ (let ((table (get 'utf-translation-table-for-encode
+ 'translation-table)))
+ (setq trans (aref table char))
+ (if trans
+ (setq split (split-char trans)
+ charset (car split)))
+ (cond ((eq charset 'ascii)
+ (or trans char))
+ ((eq charset 'latin-iso8859-1)
+ (+ (nth 1 split) 128))
+ ((eq charset 'mule-unicode-0100-24ff)
+ (+ #x0100 (+ (* (- (nth 1 split) 32) 96)
+ (- (nth 2 split) 32))))
+ ((eq charset 'mule-unicode-2500-33ff)
+ (+ #x2500 (+ (* (- (nth 1 split) 32) 96)
+ (- (nth 2 split) 32))))
+ ((eq charset 'mule-unicode-e000-ffff)
+ (+ #xe000 (+ (* (- (nth 1 split) 32) 96)
+ (- (nth 2 split) 32))))
+ ((eq charset 'eight-bit-control)
+ char))))))))
\f
;; Coding system stuff
coding system whose eol-type is N."
(get coding-system 'eol-type))
+(defun coding-system-eol-type-mnemonic (coding-system)
+ "Return the string indicating end-of-line format of CODING-SYSTEM."
+ (let* ((eol-type (coding-system-eol-type coding-system))
+ (val (cond ((eq eol-type 0) eol-mnemonic-unix)
+ ((eq eol-type 1) eol-mnemonic-dos)
+ ((eq eol-type 2) eol-mnemonic-mac)
+ (t eol-mnemonic-undecided))))
+ (if (stringp val)
+ val
+ (char-to-string val))))
+
(defun coding-system-lessp (x y)
(cond ((eq x 'no-conversion) t)
((eq y 'no-conversion) nil)
(and (not (> (downcase c1) (downcase c2)))
(< c1 c2)))))))
+(defun coding-system-equal (coding-system-1 coding-system-2)
+ "Return t if and only if CODING-SYSTEM-1 and CODING-SYSTEM-2 are identical.
+Two coding systems are identical if two symbols are equal
+or one is an alias of the other."
+ (or (eq coding-system-1 coding-system-2)
+ (and (equal (coding-system-spec coding-system-1)
+ (coding-system-spec coding-system-2))
+ (let ((eol-type-1 (coding-system-eol-type coding-system-1))
+ (eol-type-2 (coding-system-eol-type coding-system-2)))
+ (or (eq eol-type-1 eol-type-2)
+ (and (vectorp eol-type-1) (vectorp eol-type-2)))))))
+
(defun add-to-coding-system-list (coding-system)
"Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
(if (or (null coding-system-list)
(make-char charset (+ i start) start)
(make-char charset (+ i start) (+ start chars -1)))))))
-(defun register-char-codings (coding-system safe-chars)
- "Add entries for CODING-SYSTEM to `char-coding-system-table'.
-If SAFE-CHARS is a char-table, its non-nil entries specify characters
-which CODING-SYSTEM encodes safely. If SAFE-CHARS is t, register
-CODING-SYSTEM as a general one which can encode all characters."
- (let ((general (char-table-extra-slot char-coding-system-table 0))
- ;; Charsets which have some members in the table, but not all
- ;; of them (i.e. not just a generic character):
- (partials (char-table-extra-slot char-coding-system-table 1)))
- (if (eq safe-chars t)
- (or (memq coding-system general)
- (set-char-table-extra-slot char-coding-system-table 0
- (cons coding-system general)))
- (map-char-table
- (lambda (key val)
- (if (and (>= key 128) val)
- (let ((codings (aref char-coding-system-table key))
- (charset (char-charset key)))
- (unless (memq coding-system codings)
- (if (and (generic-char-p key)
- (memq charset partials))
- ;; The generic char would clobber individual
- ;; entries already in the table. First save the
- ;; separate existing entries for all chars of the
- ;; charset (with the generic entry added, if
- ;; necessary).
- (let (entry existing)
- (map-charset-chars
- (lambda (start end)
- (while (<= start end)
- (setq entry (aref char-coding-system-table start))
- (when entry
- (push (cons
- start
- (if (memq coding-system entry)
- entry
- (cons coding-system entry)))
- existing))
- (setq start (1+ start))))
- charset)
- ;; Update the generic entry.
- (aset char-coding-system-table key
- (cons coding-system codings))
- ;; Override with the saved entries.
- (dolist (elt existing)
- (aset char-coding-system-table (car elt) (cdr elt))))
- (aset char-coding-system-table key
- (cons coding-system codings))
- (unless (or (memq charset partials)
- (generic-char-p key))
- (push charset partials)))))))
- safe-chars)
- (set-char-table-extra-slot char-coding-system-table 1 partials))))
+(defalias 'register-char-codings 'ignore "")
+(make-obsolete 'register-char-codings
+ "it exists just for backward compatibility, and does nothing."
+ "21.3")
+(defconst char-coding-system-table nil
+ "This is an obsolete variable.
+It exists just for backward compatibility, and the value is always nil.")
(defun make-subsidiary-coding-system (coding-system)
"Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM."
The value is a symbol whose name is the `MIME-charset' parameter of
the coding system.
+ o mime-text-unsuitable
+
+ A non-nil value means the `mime-charset' property names a charset
+ which is unsuitable for the top-level media type \"text\".
+
o valid-codes (meaningful only for a coding system based on CCL)
The value is a list to indicate valid byte ranges of the encoded
In the former case, the integer value is a valid byte code. In the
latter case, the integers specify the range of valid byte codes.
+ o composition (meaningful only when TYPE is 0 or 2)
+
+ If the value is non-nil, the coding system preserves composition
+ information.
+
These properties are set in PLIST, a property list. This function
also sets properties `coding-category' and `alias-coding-systems'
automatically.
(if (and (symbolp val)
(get val 'translation-table))
(setq safe-chars (get val 'translation-table)))
- (register-char-codings coding-system safe-chars)
(setq val safe-chars)))
(plist-put plist prop val)))
;; The property `coding-category' may have been set differently
(error "Invalid EOL-TYPE spec:%S" eol-type))))
(put coding-system 'eol-type eol-type)
+ (define-coding-system-internal coding-system)
+
;; At last, register CODING-SYSTEM in `coding-system-list' and
;; `coding-system-alist'.
(add-to-coding-system-list coding-system)
coding-system)
+(put 'safe-chars 'char-table-extra-slots 0)
+
(defun define-coding-system-alias (alias coding-system)
"Define ALIAS as an alias for coding system CODING-SYSTEM."
(put alias 'coding-system (coding-system-spec coding-system))
- (nconc (coding-system-get alias 'alias-coding-systems) (list alias))
(add-to-coding-system-list alias)
(setq coding-system-alist (cons (list (symbol-name alias))
coding-system-alist))
(let ((eol-type (coding-system-eol-type coding-system)))
(if (vectorp eol-type)
- (put alias 'eol-type (make-subsidiary-coding-system alias))
+ (progn
+ (nconc (coding-system-get alias 'alias-coding-systems) (list alias))
+ (put alias 'eol-type (make-subsidiary-coding-system alias)))
(put alias 'eol-type eol-type))))
-(defun set-buffer-file-coding-system (coding-system &optional force)
+(defun merge-coding-systems (first second)
+ "Fill in any unspecified aspects of coding system FIRST from SECOND.
+Return the resulting coding system."
+ (let ((base (coding-system-base second))
+ (eol (coding-system-eol-type second)))
+ ;; If FIRST doesn't specify text conversion, merge with that of SECOND.
+ (if (eq (coding-system-base first) 'undecided)
+ (setq first (coding-system-change-text-conversion first base)))
+ ;; If FIRST doesn't specify eol conversion, merge with that of SECOND.
+ (if (and (vectorp (coding-system-eol-type first))
+ (numberp eol) (>= eol 0) (<= eol 2))
+ (setq first (coding-system-change-eol-conversion
+ first eol)))
+ first))
+
+(defun autoload-coding-system (symbol form)
+ "Define SYMBOL as a coding-system that is defined on demand.
+
+FROM is a form to evaluate to define the coding-system."
+ (put symbol 'coding-system-define-form form)
+ (setq coding-system-alist (cons (list (symbol-name symbol))
+ coding-system-alist)))
+
+(defun set-buffer-file-coding-system (coding-system &optional force nomodify)
"Set the file coding-system of the current buffer to CODING-SYSTEM.
This means that when you save the buffer, it will be converted
according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
use \\[list-coding-systems].
-If the buffer's previous file coding-system value specifies end-of-line
-conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
-merged with the already-specified end-of-line conversion.
-
-If the buffer's previous file coding-system value specifies text
-conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
-merged with the already-specified text conversion.
-
-However, if the optional prefix argument FORCE is non-nil, then
-CODING-SYSTEM is used exactly as specified.
+If CODING-SYSTEM leaves the text conversion unspecified, or if it
+leaves the end-of-line conversion unspecified, FORCE controls what to
+do. If FORCE is nil, get the unspecified aspect (or aspects) from the
+buffer's previous `buffer-file-coding-system' value (if it is
+specified there). Otherwise, leave it unspecified.
This marks the buffer modified so that the succeeding \\[save-buffer]
surely saves the buffer with CODING-SYSTEM. From a program, if you
-don't want to mark the buffer modified, just set the variable
-`buffer-file-coding-system' directly."
- (interactive "zCoding system for visited file (default, nil): \nP")
+don't want to mark the buffer modified, specify t for NOMODIFY.
+If you know exactly what coding system you want to use,
+just set the variable `buffer-file-coding-system' directly."
+ (interactive "zCoding system for saving file (default, nil): \nP")
(check-coding-system coding-system)
(if (and coding-system buffer-file-coding-system (null force))
- (let ((base (coding-system-base buffer-file-coding-system))
- (eol (coding-system-eol-type buffer-file-coding-system)))
- ;; If CODING-SYSTEM doesn't specify text conversion, merge
- ;; with that of buffer-file-coding-system.
- (if (eq (coding-system-base coding-system) 'undecided)
- (setq coding-system (coding-system-change-text-conversion
- coding-system base)))
- ;; If CODING-SYSTEM doesn't specify eol conversion, merge with
- ;; that of buffer-file-coding-system.
- (if (and (vectorp (coding-system-eol-type coding-system))
- (numberp eol) (>= eol 0) (<= eol 2))
- (setq coding-system (coding-system-change-eol-conversion
- coding-system eol)))))
+ (setq coding-system
+ (merge-coding-systems coding-system buffer-file-coding-system)))
(setq buffer-file-coding-system coding-system)
- (set-buffer-modified-p t)
+ ;; This is in case of an explicit call. Normally, `normal-mode' and
+ ;; `set-buffer-major-mode-hook' take care of setting the table.
+ (if (fboundp 'ucs-set-table-for-input) ; don't lose when building
+ (ucs-set-table-for-input))
+ (unless nomodify
+ (set-buffer-modified-p t))
(force-mode-line-update))
+(defun revert-buffer-with-coding-system (coding-system &optional force)
+ "Visit the current buffer's file again using coding system CODING-SYSTEM.
+For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
+
+If CODING-SYSTEM leaves the text conversion unspecified, or if it
+leaves the end-of-line conversion unspecified, FORCE controls what to
+do. If FORCE is nil, get the unspecified aspect (or aspects) from the
+buffer's previous `buffer-file-coding-system' value (if it is
+specified there). Otherwise, determine it from the file contents as
+usual for visiting a file."
+ (interactive "zCoding system for visited file (default, nil): \nP")
+ (check-coding-system coding-system)
+ (if (and coding-system buffer-file-coding-system (null force))
+ (setq coding-system
+ (merge-coding-systems coding-system buffer-file-coding-system)))
+ (let ((coding-system-for-read coding-system))
+ (revert-buffer)))
+
+(defun set-file-name-coding-system (coding-system)
+ "Set coding system for decoding and encoding file names to CODING-SYSTEM.
+It actually just set the variable `file-name-coding-system' (which
+see) to CODING-SYSTEM."
+ (interactive "zCoding system for file names (default, nil): ")
+ (check-coding-system coding-system)
+ (setq file-name-coding-system coding-system))
+
(defvar default-terminal-coding-system nil
"Default value for the terminal coding system.
This is normally set according to the selected language environment.
8-bit characters, you will have to use ESC to type Meta characters.
See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
+On non-windowing terminals, this is set from the locale by default.
+
Setting this variable directly does not take effect;
-use either M-x customize or \\[set-keyboard-coding-system]."
+use either \\[customize] or \\[set-keyboard-coding-system]."
:type '(coding-system :tag "Coding system")
:link '(info-link "(emacs)Specify Coding")
:link '(info-link "(emacs)Single-Byte Character Support")
(if (or value (boundp 'encoded-kbd-mode))
(set-keyboard-coding-system value)
(set-default 'keyboard-coding-system nil))) ; must initialize
- :version "21.1"
+ :version "21.4"
:group 'keyboard
:group 'mule)
(defalias 'set-clipboard-coding-system 'set-selection-coding-system)
(defun set-selection-coding-system (coding-system)
- "Make CODING-SYSTEM used for communicating with other X clients .
+ "Make CODING-SYSTEM used for communicating with other X clients.
When sending or receiving text via cut_buffer, selection, and clipboard,
the text is encoded or decoded by CODING-SYSTEM."
(interactive "zCoding system for X selection: ")
;;; X selections
-(defvar non-standard-icccm-encodings-alist
- '(("ISO8859-15" . latin-iso8859-15)
- ("ISO8859-14" . latin-iso8859-14)
- ("KOI8-R" . koi8-r)
- ("BIG5-0" . big5))
- "Alist of font charset names defined by XLFD, and the corresponding Emacs
-charsets or coding systems.")
+(defvar ctext-non-standard-encodings-alist
+ '(("big5-0" big5 2 (chinese-big5-1 chinese-big5-2))
+ ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
+ ("ISO8859-15" iso-8859-15 1 latin-iso8859-15))
+ "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
+
+It controls how extended segments of a compound text are handled
+by the coding system `compound-text-with-extensions'.
+
+Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET).
+
+ENCODING-NAME is an encoding name of an \"extended segments\".
+
+CODING-SYSTEM is the coding-system to encode (or decode) the
+characters into (or from) the extended segment.
+
+N-OCTET is the number of octets (bytes) that encodes a character
+in the segment. It can be 0 (meaning the number of octets per
+character is variable), 1, 2, 3, or 4.
+
+CHARSET is a charater set containing characters that are encoded
+in the segment. It can be a list of character sets. It can also
+be a char-table, in which case characters that have non-nil value
+in the char-table are the target.
+
+On decoding CTEXT, all encoding names listed here are recognized.
+
+On encoding CTEXT, encoding names in the variable
+`ctext-non-standard-encodings' (which see) and in the information
+listed for the current language environment under the key
+`ctext-non-standard-encodings' are used.")
+
+(defvar ctext-non-standard-encodings
+ '("big5-0")
+ "List of non-standard encoding names used in extended segments of CTEXT.
+Each element must be one of the names listed in the variable
+`ctext-non-standard-encodings-alist' (which see).")
+
+(defvar ctext-non-standard-encodings-regexp
+ (string-to-multibyte
+ (concat
+ ;; For non-standard encodings.
+ "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
+ "\\|"
+ ;; For UTF-8 encoding.
+ "\\(\e%G[^\e]*\e%@\\)")))
;; Functions to support "Non-Standard Character Set Encodings" defined
-;; by the ICCCM spec. We support that by converting the leading
-;; sequence of the ``extended segment'' to the corresponding ISO-2022
-;; sequences (if the leading sequence names an Emacs charset), or decode
-;; the segment (if it names a coding system). Encoding does the reverse.
+;; by the COMPOUND-TEXT spec. They also support "The UTF-8 encoding"
+;; described in the section 7 of the documentation of COMPOUND-TEXT
+;; distributed with XFree86.
+
(defun ctext-post-read-conversion (len)
"Decode LEN characters encoded as Compound Text with Extended Segments."
- (buffer-disable-undo) ; minimize consing due to insertions and deletions
- (narrow-to-region (point) (+ (point) len))
(save-match-data
- (let ((pt (point-marker))
- (oldpt (point-marker))
- (newpt (make-marker))
- (modified-p (buffer-modified-p))
- (case-fold-search nil)
- last-coding-system-used
- encoding textlen chset)
- (while (re-search-forward
- "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
- nil 'move)
- (set-marker newpt (point))
- (set-marker pt (match-beginning 0))
- (setq encoding (match-string 3))
- (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
- (- (aref (match-string 2) 1) 128))
- (1+ (length encoding))))
- (setq
- chset (cdr (assoc-ignore-case encoding
- non-standard-icccm-encodings-alist)))
- (cond ((null chset)
- ;; This charset is not supported--leave this extended
- ;; segment unaltered and skip over it.
- (goto-char (+ (point) textlen)))
- ((charsetp chset)
- ;; If it's a charset, replace the leading escape sequence
- ;; with a standard ISO-2022 sequence. We will decode all
- ;; such segments later, in one go, when we exit the loop
- ;; or find an extended segment that names a coding
- ;; system, not a charset.
- (replace-match
- (concat "\\1"
- (if (= 0 (charset-iso-graphic-plane chset))
- ;; GL charsets
- (if (= 1 (charset-dimension chset)) "(" "$(")
- ;; GR charsets
- (if (= 96 (charset-chars chset))
- "-"
- (if (= 1 (charset-dimension chset)) ")" "$)")))
- (string (charset-iso-final-char chset)))
- t)
- (goto-char (+ (point) textlen)))
- ((coding-system-p chset)
- ;; If it's a coding system, we need to decode the segment
- ;; right away. But first, decode what we've skipped
- ;; across until now.
- (when (> pt oldpt)
- (decode-coding-region oldpt pt 'ctext-no-compositions))
- (delete-region pt newpt)
- (set-marker newpt (+ newpt textlen))
- (decode-coding-region pt newpt chset)
- (goto-char newpt)
- (set-marker oldpt newpt))))
- ;; Decode what's left.
- (when (> (point) oldpt)
- (decode-coding-region oldpt (point) 'ctext-no-compositions))
- ;; This buffer started as unibyte, because the string we get from
- ;; the X selection is a unibyte string. We must now make it
- ;; multibyte, so that the decoded text is inserted as multibyte
- ;; into its buffer.
- (set-buffer-multibyte t)
- (set-buffer-modified-p modified-p)
- (- (point-max) (point-min)))))
-
-;; If you add charsets here, be sure to modify the regexp used by
-;; ctext-pre-write-conversion to look up non-standard charsets.
-(defvar non-standard-designations-alist
- '(("$(0" . (big5 "big5-0" 2))
- ("$(1" . (big5 "big5-0" 2))
- ("-V" . (t "iso8859-10" 1))
- ("-Y" . (t "iso8859-13" 1))
- ("-_" . (t "iso8859-14" 1))
- ("-b" . (t "iso8859-15" 1))
- ("-f" . (t "iso8859-16" 1)))
- "Alist of ctext control sequences that introduce character sets which
-are not in the list of approved ICCCM encodings, and the corresponding
-coding system, identifier string, and number of octets per encoded
-character.
-
-Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
-is the control sequence (sans the leading ESC) that introduces the character
-set in the text encoded by compound-text. ENCODING is a coding system
-symbol; if it is t, it means that the ctext coding system already encodes
-the text correctly, and only the leading control sequence needs to be altered.
-If ENCODING is a coding system, we need to re-encode the text with that
-coding system. CHARSET is the ICCCM name of the charset we need to put into
-the leading control sequence. NOCTETS is the number of octets (bytes) that
-encode each character in this charset. NOCTETS can be 0 (meaning the number
-of octets per character is variable), 1, 2, 3, or 4.")
+ (save-restriction
+ (let ((case-fold-search nil)
+ (in-workbuf (string= (buffer-name) " *code-converting-work*"))
+ last-coding-system-used
+ pos bytes)
+ (or in-workbuf
+ (narrow-to-region (point) (+ (point) len)))
+ (if in-workbuf
+ (set-buffer-multibyte t))
+ (while (re-search-forward ctext-non-standard-encodings-regexp
+ nil 'move)
+ (setq pos (match-beginning 0))
+ (if (match-beginning 1)
+ ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+ (let* ((M (char-after (+ pos 4)))
+ (L (char-after (+ pos 5)))
+ (encoding (match-string 2))
+ (encoding-info (assoc-string
+ encoding
+ ctext-non-standard-encodings-alist t))
+ (coding (if encoding-info
+ (nth 1 encoding-info)
+ (setq encoding (intern (downcase encoding)))
+ (and (coding-system-p encoding)
+ encoding))))
+ (setq bytes (- (+ (* (- M 128) 128) (- L 128))
+ (- (point) (+ pos 6))))
+ (when coding
+ (delete-region pos (point))
+ (forward-char bytes)
+ (decode-coding-region (- (point) bytes) (point) coding)))
+ ;; ESC % G --UTF-8-BYTES-- ESC % @
+ (delete-char -3)
+ (delete-region pos (+ pos 3))
+ (decode-coding-region pos (point) 'utf-8))))
+ (goto-char (point-min))
+ (- (point-max) (point)))))
+
+;; Return a char table of extended segment usage for each character.
+;; Each value of the char table is nil, one of the elements of
+;; `ctext-non-standard-encodings-alist', or the symbol `utf-8'.
+
+(defun ctext-non-standard-encodings-table ()
+ (let ((table (make-char-table 'translation-table)))
+ (aset table (make-char 'mule-unicode-0100-24ff) 'utf-8)
+ (aset table (make-char 'mule-unicode-2500-33ff) 'utf-8)
+ (aset table (make-char 'mule-unicode-e000-ffff) 'utf-8)
+ (dolist (encoding (reverse
+ (append
+ (get-language-info current-language-environment
+ 'ctext-non-standard-encodings)
+ ctext-non-standard-encodings)))
+ (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
+ (charset (nth 3 slot)))
+ (if charset
+ (cond ((charsetp charset)
+ (aset table (make-char charset) slot))
+ ((listp charset)
+ (dolist (elt charset)
+ (aset table (make-char elt) slot)))
+ ((char-table-p charset)
+ (map-char-table #'(lambda (k v)
+ (if (and v (> k 128)) (aset table k slot)))
+ charset))))))
+ table))
(defun ctext-pre-write-conversion (from to)
"Encode characters between FROM and TO as Compound Text w/Extended Segments.
If FROM is a string, or if the current buffer is not the one set up for us
-by run_pre_post_conversion_on_str, generate a new temp buffer, insert the
+by encode-coding-string, generate a new temp buffer, insert the
text, and convert it in the temporary buffer. Otherwise, convert in-place."
- (cond ((and (string= (buffer-name) " *code-converting-work*")
- (not (stringp from)))
- ; Minimize consing due to subsequent insertions and deletions.
- (buffer-disable-undo)
- (narrow-to-region from to))
- (t
- (let ((buf (current-buffer)))
- (set-buffer (generate-new-buffer " *temp"))
- (buffer-disable-undo)
- (if (stringp from)
- (insert from)
- (insert-buffer-substring buf from to))
- (setq from (point-min) to (point-max)))))
- (encode-coding-region from to 'ctext-no-compositions)
- ;; Replace ISO-2022 charset designations with extended segments, for
- ;; those charsets that are not part of the official X registry.
(save-match-data
- (goto-char (point-min))
- (let ((newpt (make-marker))
- (case-fold-search nil)
- pt desig encode-info encoding chset noctets textlen)
- (set-buffer-multibyte nil)
- ;; The regexp below finds the leading sequences for big5 and
- ;; iso8859-1[03-6] charsets.
- (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move)
- (setq desig (match-string 1)
- pt (point-marker)
- encode-info (cdr (assoc desig non-standard-designations-alist))
- encoding (car encode-info)
- chset (cadr encode-info)
- noctets (car (cddr encode-info)))
- (skip-chars-forward "^\e")
- (set-marker newpt (point))
- (cond
- ((eq encoding t) ; only the leading sequence needs to be changed
- (setq textlen (+ (- newpt pt) (length chset) 1))
- ;; Generate the ICCCM control sequence for an extended segment.
- (replace-match (format "\e%%/%d%c%c%s\ 2"
- noctets
- (+ (/ textlen 128) 128)
- (+ (% textlen 128) 128)
- chset)
- t t))
- ((coding-system-p encoding) ; need to recode the entire segment...
- (set-marker pt (match-beginning 0))
- (decode-coding-region pt newpt 'ctext-no-compositions)
- (set-buffer-multibyte t)
- (encode-coding-region pt newpt encoding)
- (set-buffer-multibyte nil)
- (setq textlen (+ (- newpt pt) (length chset) 1))
- (goto-char pt)
- (insert (format "\e%%/%d%c%c%s\ 2"
- noctets
- (+ (/ textlen 128) 128)
- (+ (% textlen 128) 128)
- chset))))
- (goto-char newpt))))
- (set-buffer-multibyte t)
+ ;; Setup a working buffer if necessary.
+ (cond ((stringp from)
+ (let ((buf (current-buffer)))
+ (set-buffer (generate-new-buffer " *temp"))
+ (set-buffer-multibyte (multibyte-string-p from))
+ (insert from)))
+ ((not (string= (buffer-name) " *code-converting-work*"))
+ (let ((buf (current-buffer))
+ (multibyte enable-multibyte-characters))
+ (set-buffer (generate-new-buffer " *temp"))
+ (set-buffer-multibyte multibyte)
+ (insert-buffer-substring buf from to))))
+
+ ;; Now we can encode the whole buffer.
+ (let ((encoding-table (ctext-non-standard-encodings-table))
+ last-coding-system-used
+ last-pos last-encoding-info
+ encoding-info end-pos)
+ (goto-char (setq last-pos (point-min)))
+ (setq end-pos (point-marker))
+ (while (re-search-forward "[^\000-\177]+" nil t)
+ ;; Found a sequence of non-ASCII characters.
+ (setq last-pos (match-beginning 0)
+ last-encoding-info (aref encoding-table (char-after last-pos)))
+ (set-marker end-pos (match-end 0))
+ (goto-char (1+ last-pos))
+ (catch 'tag
+ (while t
+ (setq encoding-info
+ (if (< (point) end-pos)
+ (aref encoding-table (following-char))))
+ (unless (eq last-encoding-info encoding-info)
+ (cond ((consp last-encoding-info)
+ ;; Encode the previous range using an extended
+ ;; segment.
+ (let ((encoding-name (car last-encoding-info))
+ (coding-system (nth 1 last-encoding-info))
+ (noctets (nth 2 last-encoding-info))
+ len)
+ (encode-coding-region last-pos (point) coding-system)
+ (setq len (+ (length encoding-name) 1
+ (- (point) last-pos)))
+ (save-excursion
+ (goto-char last-pos)
+ (insert (string-to-multibyte
+ (format "\e%%/%d%c%c%s\ 2"
+ noctets
+ (+ (/ len 128) 128)
+ (+ (% len 128) 128)
+ encoding-name))))))
+ ((eq last-encoding-info 'utf-8)
+ ;; Encode the previous range using UTF-8 encoding
+ ;; extention.
+ (encode-coding-region last-pos (point) 'mule-utf-8)
+ (save-excursion
+ (goto-char last-pos)
+ (insert "\e%G"))
+ (insert "\e%@")))
+ (setq last-pos (point)
+ last-encoding-info encoding-info))
+ (if (< (point) end-pos)
+ (forward-char 1)
+ (throw 'tag nil)))))
+ (set-marker end-pos nil)
+ (goto-char (point-min))))
;; Must return nil, as build_annotations_2 expects that.
nil)
sgml-html-meta-auto-coding-function)
"A list of functions which attempt to determine a coding system.
-Each function in this list should be written to operate on the current
-buffer, but should not modify it in any way. It should take one
-argument SIZE, past which it should not search. If a function
-succeeds in determining a coding system, it should return that coding
-system. Otherwise, it should return nil.
+Each function in this list should be written to operate on the
+current buffer, but should not modify it in any way. The buffer
+will contain undecoded text of parts of the file. Each function
+should take one argument, SIZE, which says how many
+characters (starting from point) it should look at.
+
+If one of these functions succeeds in determining a coding
+system, it should return that coding system. Otherwise, it
+should return nil.
-Any `coding:' tags present have a higher priority than the
-functions in this list."
+If a file has a `coding:' tag, that takes precedence over these
+functions, so they won't be called at all."
:group 'files
:group 'mule
:type '(repeat function))
(defun auto-coding-alist-lookup (filename)
"Return the coding system specified by `auto-coding-alist' for FILENAME."
(let ((alist auto-coding-alist)
- (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos)))
+ (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos cygwin)))
coding-system)
(while (and alist (not coding-system))
(if (string-match (car (car alist)) filename)
These bytes should include at least the first 1k of the file
and the last 3k of the file, but the middle may be omitted.
-It checks FILENAME against the variable `auto-coding-alist'. If
-FILENAME doesn't match any entries in the variable, it checks the
+The function checks FILENAME against the variable `auto-coding-alist'.
+If FILENAME doesn't match any entries in the variable, it checks the
contents of the current buffer following point against
`auto-coding-regexp-alist'. If no match is found, it checks for a
`coding:' tag in the first one or two lines following point. If no
-`coding:' tag is found, it checks for local variables list in the last
+`coding:' tag is found, it checks any local variables list in the last
3K bytes out of the SIZE bytes. Finally, if none of these methods
-succeed, then it checks to see if any function in
-`auto-coding-functions' gives a match.
+succeed, it checks to see if any function in `auto-coding-functions'
+gives a match.
-The return value is the specified coding system,
-or nil if nothing specified.
+The return value is the specified coding system, or nil if nothing is
+specified.
The variable `set-auto-coding-function' (which see) is set to this
function by default."
(setq coding-system nil)))))
;; If no coding: tag in the head, check the tail.
+ ;; Here we must pay attention to the case that the end-of-line
+ ;; is just "\r" and we can't use "^" nor "$" in regexp.
(when (and tail-found (not coding-system))
(goto-char tail-start)
- (search-forward "\n\^L" nil t)
+ (re-search-forward "[\r\n]\^L" nil t)
(if (re-search-forward
- "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
- ;; The prefix is what comes before "local variables:" in its
- ;; line. The suffix is what comes after "local variables:"
+ "[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]"
+ tail-end t)
+ ;; The prefix is what comes before "local variables:" in its
+ ;; line. The suffix is what comes after "local variables:"
;; in its line.
(let* ((prefix (regexp-quote (match-string 1)))
(suffix (regexp-quote (match-string 2)))
(re-coding
(concat
- "^" prefix
+ "[\r\n]" prefix
;; N.B. without the \n below, the regexp can
;; eat newlines.
- "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
- suffix "$"))
+ "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
+ suffix "[\r\n]"))
(re-unibyte
(concat
- "^" prefix
- "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
- suffix "$"))
+ "[\r\n]" prefix
+ "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
+ suffix "[\r\n]"))
(re-end
- (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$"))
- (pos (point)))
+ (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix
+ "[\r\n]?"))
+ (pos (1- (point))))
+ (forward-char -1) ; skip back \r or \n.
(re-search-forward re-end tail-end 'move)
(setq tail-end (point))
(goto-char pos)
(setq set-auto-coding-function 'set-auto-coding)
-(defun after-insert-file-set-buffer-file-coding-system (inserted)
- "Set `buffer-file-coding-system' of current buffer after text is inserted."
+;; This variable is set in these two cases:
+;; (1) A file is read by a coding system specified explicitly.
+;; after-insert-file-set-coding sets this value to
+;; coding-system-for-read.
+;; (2) A buffer is saved.
+;; After writing, basic-save-buffer-1 sets this value to
+;; last-coding-system-used.
+;; This variable is used for decoding in revert-buffer.
+(defvar buffer-file-coding-system-explicit nil
+ "The file coding system explicitly specified for the current buffer.
+Internal use only.")
+(make-variable-buffer-local 'buffer-file-coding-system-explicit)
+(put 'buffer-file-coding-system-explicit 'permanent-local t)
+
+(defun after-insert-file-set-coding (inserted &optional visit)
+ "Set `buffer-file-coding-system' of current buffer after text is inserted.
+INSERTED is the number of characters that were inserted, as figured
+in the situation before this function. Return the number of characters
+inserted, as figured in the situation after. The two numbers can be
+different if the buffer has become unibyte.
+The optional second arg VISIT non-nil means that we are visiting a file."
+ (if (and visit
+ coding-system-for-read
+ (not (eq coding-system-for-read 'auto-save-coding)))
+ (setq buffer-file-coding-system-explicit coding-system-for-read))
(if last-coding-system-used
(let ((coding-system
(find-new-buffer-file-coding-system last-coding-system-used))
(modified-p (buffer-modified-p)))
(when coding-system
- (set-buffer-file-coding-system coding-system t)
+ ;; Tell set-buffer-file-coding-system not to mark the file
+ ;; as modified; we just read it, and it's supposed to be unmodified.
+ ;; Marking it modified would try to lock it, which would
+ ;; check the modtime, and we don't want to do that again now.
+ (set-buffer-file-coding-system coding-system t t)
(if (and enable-multibyte-characters
(or (eq coding-system 'no-conversion)
(eq (coding-system-type coding-system) 5))
(= (buffer-size) inserted))
;; For coding systems no-conversion and raw-text...,
;; edit the buffer as unibyte.
- (let ((pos-byte (position-bytes (+ (point) inserted))))
+ (let ((pos-marker (copy-marker (+ (point) inserted)))
+ ;; Prevent locking.
+ (buffer-file-name nil))
(set-buffer-multibyte nil)
- (setq inserted (- pos-byte (position-bytes (point))))))
+ (setq inserted (- pos-marker (point)))))
(set-buffer-modified-p modified-p))))
inserted)
-(add-hook 'after-insert-file-functions
- 'after-insert-file-set-buffer-file-coding-system)
-
;; The coding-spec and eol-type of coding-system returned is decided
;; independently in the following order.
;; 1. That of buffer-file-coding-system locally bound.
(cons (cons regexp coding-system)
network-coding-system-alist)))))))
+(defun decode-coding-inserted-region (from to filename
+ &optional visit beg end replace)
+ "Decode the region between FROM and TO as if it is read from file FILENAME.
+The idea is that the text between FROM and TO was just inserted somehow.
+Optional arguments VISIT, BEG, END, and REPLACE are the same as those
+of the function `insert-file-contents'.
+Part of the job of this function is setting `buffer-undo-list' appropriately."
+ (save-excursion
+ (save-restriction
+ (let ((coding coding-system-for-read)
+ undo-list-saved)
+ (if visit
+ ;; Temporarily turn off undo recording, if we're decoding the
+ ;; text of a visited file.
+ (setq buffer-undo-list t)
+ ;; Otherwise, if we can recognize the undo elt for the insertion,
+ ;; remove it and get ready to replace it later.
+ ;; In the mean time, turn off undo recording.
+ (let ((last (car-safe buffer-undo-list)))
+ (if (and (consp last) (eql (car last) from) (eql (cdr last) to))
+ (setq undo-list-saved (cdr buffer-undo-list)
+ buffer-undo-list t))))
+ (narrow-to-region from to)
+ (goto-char (point-min))
+ (or coding
+ (setq coding (funcall set-auto-coding-function
+ filename (- (point-max) (point-min)))))
+ (or coding
+ (setq coding (car (find-operation-coding-system
+ 'insert-file-contents
+ filename visit beg end replace))))
+ (if (coding-system-p coding)
+ (or enable-multibyte-characters
+ (setq coding
+ (coding-system-change-text-conversion coding 'raw-text)))
+ (setq coding nil))
+ (if coding
+ (decode-coding-region (point-min) (point-max) coding)
+ (setq last-coding-system-used coding))
+ ;; If we're decoding the text of a visited file,
+ ;; the undo list should start out empty.
+ (if visit
+ (setq buffer-undo-list nil)
+ ;; If we decided to replace the undo entry for the insertion,
+ ;; do so now.
+ (if undo-list-saved
+ (setq buffer-undo-list
+ (cons (cons from (point-max)) undo-list-saved))))))))
+
+(defun recode-region (start end new-coding coding)
+ "Re-decode the region (previously decoded by CODING) by NEW-CODING."
+ (interactive
+ (list (region-beginning) (region-end)
+ (read-coding-system "Text was really in: ")
+ (let ((coding (or buffer-file-coding-system last-coding-system-used)))
+ (read-coding-system
+ (concat "But was interpreted as"
+ (if coding (format " (default %S): " coding) ": "))
+ coding))))
+ (or (and new-coding coding)
+ (error "Coding system not specified"))
+ ;; Check it before we encode the region.
+ (check-coding-system new-coding)
+ (save-restriction
+ (narrow-to-region start end)
+ (encode-coding-region (point-min) (point-max) coding)
+ (decode-coding-region (point-min) (point-max) new-coding)))
+
(defun make-translation-table (&rest args)
"Make a translation table from arguments.
A translation table is a char table intended for character
(put symbol 'translation-table-id id)
id))
+(defun translate-region (start end table)
+ "From START to END, translate characters according to TABLE.
+TABLE is a string or a char-table.
+If TABLE is a string, the Nth character in it is the mapping
+for the character with code N.
+If TABLE is a char-table, the element for character N is the mapping
+for the character with code N.
+It returns the number of characters changed."
+ (interactive
+ (list (region-beginning)
+ (region-end)
+ (let (table l)
+ (dotimes (i (length translation-table-vector))
+ (if (consp (aref translation-table-vector i))
+ (push (list (symbol-name
+ (car (aref translation-table-vector i)))) l)))
+ (if (not l)
+ (error "No translation table defined"))
+ (while (not table)
+ (setq table (completing-read "Translation table: " l nil t)))
+ (intern table))))
+ (if (symbolp table)
+ (let ((val (get table 'translation-table)))
+ (or (char-table-p val)
+ (error "Invalid translation table name: %s" table))
+ (setq table val)))
+ (translate-region-internal start end table))
+
(put 'with-category-table 'lisp-indent-function 1)
-(defmacro with-category-table (category-table &rest body)
- `(let ((current-category-table (category-table)))
- (set-category-table ,category-table)
- (unwind-protect
- (progn ,@body)
- (set-category-table current-category-table))))
+(defmacro with-category-table (table &rest body)
+ "Evaluate BODY with category table of current buffer set to TABLE.
+The category table of the current buffer is saved, BODY is evaluated,
+then the saved table is restored, even in case of an abnormal exit.
+Value is what BODY returns."
+ (let ((old-table (make-symbol "old-table"))
+ (old-buffer (make-symbol "old-buffer")))
+ `(let ((,old-table (category-table))
+ (,old-buffer (current-buffer)))
+ (unwind-protect
+ (progn
+ (set-category-table ,table)
+ ,@body)
+ (save-current-buffer
+ (set-buffer ,old-buffer)
+ (set-category-table ,old-table))))))
+
+(defun define-translation-hash-table (symbol table)
+ "Define SYMBOL as the name of the hash translation TABLE for use in CCL.
+
+Analogous to `define-translation-table', but updates
+`translation-hash-table-vector' and the table is for use in the CCL
+`lookup-integer' and `lookup-character' functions."
+ (unless (and (symbolp symbol)
+ (hash-table-p table))
+ (error "Bad args to define-translation-hash-table"))
+ (let ((len (length translation-hash-table-vector))
+ (id 0)
+ done)
+ (put symbol 'translation-hash-table table)
+ (while (not done)
+ (if (>= id len)
+ (setq translation-hash-table-vector
+ (vconcat translation-hash-table-vector [nil])))
+ (let ((slot (aref translation-hash-table-vector id)))
+ (if (or (not slot)
+ (eq (car slot) symbol))
+ (progn
+ (aset translation-hash-table-vector id (cons symbol table))
+ (setq done t))
+ (setq id (1+ id)))))
+ (put symbol 'translation-hash-table-id id)
+ id))
;;; Initialize some variables.
(defun sgml-xml-auto-coding-function (size)
"Determine whether the buffer is XML, and if so, its encoding.
This function is intended to be added to `auto-coding-functions'."
- (when (re-search-forward "\\`[[:space:]\n]*<\\?xml")
+ (setq size (+ (point) size))
+ (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t)
(let ((end (save-excursion
;; This is a hack.
(re-search-forward "\"\\s-*\\?>" size t))))
(defun sgml-html-meta-auto-coding-function (size)
"If the buffer has an HTML meta tag, use it to determine encoding.
This function is intended to be added to `auto-coding-functions'."
- (setq size (min size
+ (setq size (min (+ (point) size)
;; Only search forward 10 lines
(save-excursion
(forward-line 10)
(point))))
- (when (and (search-forward "<html>" size t)
+ (when (and (search-forward "<html" size t)
(re-search-forward "<meta\\s-+http-equiv=\"content-type\"\\s-+content=\"text/\\sw+;\\s-*charset=\\(.+?\\)\"" size t))
(let* ((match (match-string 1))
(sym (intern (downcase match))))
;;;
(provide 'mule)
+;;; arch-tag: 9aebaa6e-0e8a-40a9-b857-cb5d04a39e7c
;;; mule.el ends here