]> code.delx.au - gnu-emacs/blob - lisp/international/mule.el
(set-selection-coding-system): Renamed from set-clipboard-coding-system.
[gnu-emacs] / lisp / international / mule.el
1 ;;; mule.el --- basic commands for mulitilingual environment
2
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: mule, multilingual, character set, coding system
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (defconst mule-version "4.0 (HANANOEN)" "\
28 Version number and name of this version of MULE (multilingual environment).")
29
30 (defconst mule-version-date "1998.7.1" "\
31 Distribution date of this version of MULE (multilingual environment).")
32
33 (defun load-with-code-conversion (fullname file &optional noerror nomessage)
34 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
35 The file contents are decoded before evaluation if necessary.
36 If optional second arg NOERROR is non-nil,
37 report no error if FILE doesn't exist.
38 Print messages at start and end of loading unless
39 optional third arg NOMESSAGE is non-nil.
40 Return t if file exists."
41 (if (null (file-readable-p fullname))
42 (and (null noerror)
43 (signal 'file-error (list "Cannot open load file" file)))
44 ;; Read file with code conversion, and then eval.
45 (let* ((buffer
46 ;; To avoid any autoloading, set default-major-mode to
47 ;; fundamental-mode.
48 ;; So that we don't get completely screwed if the
49 ;; file is encoded in some complicated character set,
50 ;; read it with real decoding, as a multibyte buffer,
51 ;; even if this is a --unibyte Emacs session.
52 (let ((default-major-mode 'fundamental-mode)
53 (default-enable-multibyte-characters t))
54 ;; We can't use `generate-new-buffer' because files.el
55 ;; is not yet loaded.
56 (get-buffer-create (generate-new-buffer-name " *load*"))))
57 (load-in-progress t)
58 (source (save-match-data (string-match "\\.el\\'" fullname))))
59 (unless nomessage
60 (if source
61 (message "Loading %s (source)..." file)
62 (message "Loading %s..." file)))
63 (when purify-flag
64 (setq preloaded-file-list (cons file preloaded-file-list)))
65 (unwind-protect
66 (let ((load-file-name fullname)
67 (set-auto-coding-for-load t)
68 (inhibit-file-name-operation nil))
69 (save-excursion
70 (set-buffer buffer)
71 (insert-file-contents fullname)
72 ;; Make `kill-buffer' quiet.
73 (set-buffer-modified-p nil))
74 ;; Have the original buffer current while we eval.
75 (eval-buffer buffer nil file
76 ;; If this Emacs is running with --unibyte,
77 ;; convert multibyte strings to unibyte
78 ;; after reading them.
79 ;; (not default-enable-multibyte-characters)
80 ))
81 (let (kill-buffer-hook kill-buffer-query-functions)
82 (kill-buffer buffer)))
83 (let ((hook (assoc file after-load-alist)))
84 (when hook
85 (mapcar (function eval) (cdr hook))))
86 (unless (or nomessage noninteractive)
87 (if source
88 (message "Loading %s (source)...done" file)
89 (message "Loading %s...done" file)))
90 t)))
91
92 ;; API (Application Program Interface) for charsets.
93
94 ;; Return t if OBJ is a quoted symbol
95 ;; and the symbol is the name of a standard charset.
96 (defsubst charset-quoted-standard-p (obj)
97 (and (listp obj) (eq (car obj) 'quote)
98 (symbolp (car-safe (cdr obj)))
99 (let ((vector (get (car-safe (cdr obj)) 'charset)))
100 (and (vectorp vector)
101 (< (aref vector 0) 160)))))
102
103 (defsubst charsetp (object)
104 "T is OBJECT is a charset."
105 (and (symbolp object) (vectorp (get object 'charset))))
106
107 (defsubst charset-info (charset)
108 "Return a vector of information of CHARSET.
109 The elements of the vector are:
110 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
111 LEADING-CODE-BASE, LEADING-CODE-EXT,
112 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
113 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
114 PLIST,
115 where
116 CHARSET-ID (integer) is the identification number of the charset.
117 DIMENSION (integer) is the number of bytes to represent a character of
118 the charset: 1 or 2.
119 CHARS (integer) is the number of characters in a dimension: 94 or 96.
120 BYTE (integer) is the length of multi-byte form of a character in
121 the charset: one of 1, 2, 3, and 4.
122 WIDTH (integer) is the number of columns a character in the charset
123 occupies on the screen: one of 0, 1, and 2.
124 DIRECTION (integer) is the rendering direction of characters in the
125 charset when rendering. If 0, render from right to left, else
126 render from left to right.
127 LEADING-CODE-BASE (integer) is the base leading-code for the
128 charset.
129 LEADING-CODE-EXT (integer) is the extended leading-code for the
130 charset. All charsets of less than 0xA0 has the value 0.
131 ISO-FINAL-CHAR (character) is the final character of the
132 corresponding ISO 2022 charset.
133 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
134 while encoding to variants of ISO 2022 coding system, one of the
135 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
136 REVERSE-CHARSET (integer) is the charset which differs only in
137 LEFT-TO-RIGHT value from the charset. If there's no such a
138 charset, the value is -1.
139 SHORT-NAME (string) is the short name to refer to the charset.
140 LONG-NAME (string) is the long name to refer to the charset
141 DESCRIPTION (string) is the description string of the charset.
142 PLIST (property list) may contain any type of information a user
143 want to put and get by functions `put-charset-property' and
144 `get-charset-property' respectively."
145 (get charset 'charset))
146
147 (defmacro charset-id (charset)
148 "Return charset identification number of CHARSET."
149 (if (charset-quoted-standard-p charset)
150 (aref (charset-info (nth 1 charset)) 0)
151 `(aref (charset-info ,charset) 0)))
152
153 (defmacro charset-bytes (charset)
154 "Return bytes of CHARSET.
155 See the function `charset-info' for more detail."
156 (if (charset-quoted-standard-p charset)
157 (aref (charset-info (nth 1 charset)) 1)
158 `(aref (charset-info ,charset) 1)))
159
160 (defmacro charset-dimension (charset)
161 "Return dimension of CHARSET.
162 See the function `charset-info' for more detail."
163 (if (charset-quoted-standard-p charset)
164 (aref (charset-info (nth 1 charset)) 2)
165 `(aref (charset-info ,charset) 2)))
166
167 (defmacro charset-chars (charset)
168 "Return character numbers contained in a dimension of CHARSET.
169 See the function `charset-info' for more detail."
170 (if (charset-quoted-standard-p charset)
171 (aref (charset-info (nth 1 charset)) 3)
172 `(aref (charset-info ,charset) 3)))
173
174 (defmacro charset-width (charset)
175 "Return width (how many column occupied on a screen) of CHARSET.
176 See the function `charset-info' for more detail."
177 (if (charset-quoted-standard-p charset)
178 (aref (charset-info (nth 1 charset)) 4)
179 `(aref (charset-info ,charset) 4)))
180
181 (defmacro charset-direction (charset)
182 "Return direction of CHARSET.
183 See the function `charset-info' for more detail."
184 (if (charset-quoted-standard-p charset)
185 (aref (charset-info (nth 1 charset)) 5)
186 `(aref (charset-info ,charset) 5)))
187
188 (defmacro charset-iso-final-char (charset)
189 "Return final char of CHARSET.
190 See the function `charset-info' for more detail."
191 (if (charset-quoted-standard-p charset)
192 (aref (charset-info (nth 1 charset)) 8)
193 `(aref (charset-info ,charset) 8)))
194
195 (defmacro charset-iso-graphic-plane (charset)
196 "Return graphic plane of CHARSET.
197 See the function `charset-info' for more detail."
198 (if (charset-quoted-standard-p charset)
199 (aref (charset-info (nth 1 charset)) 9)
200 `(aref (charset-info ,charset) 9)))
201
202 (defmacro charset-reverse-charset (charset)
203 "Return reverse charset of CHARSET.
204 See the function `charset-info' for more detail."
205 (if (charset-quoted-standard-p charset)
206 (aref (charset-info (nth 1 charset)) 10)
207 `(aref (charset-info ,charset) 10)))
208
209 (defmacro charset-short-name (charset)
210 "Return short name of CHARSET.
211 See the function `charset-info' for more detail."
212 (if (charset-quoted-standard-p charset)
213 (aref (charset-info (nth 1 charset)) 11)
214 `(aref (charset-info ,charset) 11)))
215
216 (defmacro charset-long-name (charset)
217 "Return long name of CHARSET.
218 See the function `charset-info' for more detail."
219 (if (charset-quoted-standard-p charset)
220 (aref (charset-info (nth 1 charset)) 12)
221 `(aref (charset-info ,charset) 12)))
222
223 (defmacro charset-description (charset)
224 "Return descriptoin of CHARSET.
225 See the function `charset-info' for more detail."
226 (if (charset-quoted-standard-p charset)
227 (aref (charset-info (nth 1 charset)) 13)
228 `(aref (charset-info ,charset) 13)))
229
230 (defmacro charset-plist (charset)
231 "Return list charset property of CHARSET.
232 See the function `charset-info' for more detail."
233 (if (charset-quoted-standard-p charset)
234 `(aref ,(charset-info (nth 1 charset)) 14)
235 `(aref (charset-info ,charset) 14)))
236
237 (defun set-charset-plist (charset plist)
238 "Set CHARSET's property list to PLIST, and retrun PLIST."
239 (aset (charset-info charset) 14 plist))
240
241 (defun make-char (charset &optional c1 c2)
242 "Return a character of CHARSET and position-codes CODE1 and CODE2.
243 CODE1 and CODE2 are optional, but if you don't supply
244 sufficient position-codes, return a generic character which stands for
245 all characters or group of characters in the character sets.
246 A generic character can be used to index a char table (e.g. syntax-table)."
247 (make-char-internal (charset-id charset) c1 c2))
248
249 (put 'make-char 'byte-compile
250 (function
251 (lambda (form)
252 (let ((charset (nth 1 form)))
253 (if (charset-quoted-standard-p charset)
254 (byte-compile-normal-call
255 (cons 'make-char-internal
256 (cons (charset-id (nth 1 charset)) (nthcdr 2 form))))
257 (byte-compile-normal-call
258 (cons 'make-char-internal
259 (cons (list 'charset-id charset) (nthcdr 2 form)))))))))
260
261 (defun charset-list ()
262 "Return list of charsets ever defined.
263
264 This function is provided for backward compatibility.
265 Now we have the variable `charset-list'."
266 charset-list)
267
268 (defsubst generic-char-p (char)
269 "Return t if and only if CHAR is a generic character.
270 See also the documentation of make-char."
271 (and (>= char 0400)
272 (let ((l (split-char char)))
273 (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0))
274 (not (eq (car l) 'composition))))))
275
276 \f
277 ;; Coding system staffs
278
279 ;; Coding system is a symbol that has the property `coding-system'.
280 ;;
281 ;; The value of the property `coding-system' is a vector of the
282 ;; following format:
283 ;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
284 ;; We call this vector as coding-spec. See comments in src/coding.c
285 ;; for more detail.
286
287 (defconst coding-spec-type-idx 0)
288 (defconst coding-spec-mnemonic-idx 1)
289 (defconst coding-spec-doc-string-idx 2)
290 (defconst coding-spec-plist-idx 3)
291 (defconst coding-spec-flags-idx 4)
292
293 ;; PLIST is a property list of a coding system. To share PLIST among
294 ;; alias coding systems, a coding system has PLIST in coding-spec
295 ;; instead of having it in normal property list of Lisp symbol.
296 ;; Here's a list of coding system properties currently being used.
297 ;;
298 ;; o coding-category
299 ;;
300 ;; The value is a coding category the coding system belongs to. The
301 ;; function `make-coding-system' and `define-coding-system-alias' sets
302 ;; this value automatically.
303 ;;
304 ;; o alias-coding-systems
305 ;;
306 ;; The value is a list of coding systems of the same alias group. The
307 ;; first element is the coding system made at first, which we call as
308 ;; `base coding system'. The function `make-coding-system' and
309 ;; `define-coding-system-alias' set this value automatically.
310 ;;
311 ;; o post-read-conversion
312 ;;
313 ;; The value is a function to call after some text is inserted and
314 ;; decoded by the coding system itself and before any functions in
315 ;; `after-insert-functions' are called. The arguments to this
316 ;; function is the same as those of a function in
317 ;; `after-insert-functions', i.e. LENGTH of a text while putting point
318 ;; at the head of the text to be decoded
319 ;;
320 ;; o pre-write-conversion
321 ;;
322 ;; The value is a function to call after all functions in
323 ;; `write-region-annotate-functions' and `buffer-file-format' are
324 ;; called, and before the text is encoded by the coding system itself.
325 ;; The arguments to this function is the same as those of a function
326 ;; in `write-region-annotate-functions', i.e. FROM and TO specifying
327 ;; region of a text.
328 ;;
329 ;; o translation-table-for-decode
330 ;;
331 ;; The value is a translation table to be applied on decoding. See
332 ;; the function `make-translation-table' for the format of translation
333 ;; table.
334 ;;
335 ;; o translation-table-for-encode
336 ;;
337 ;; The value is a translation table to be applied on encoding.
338 ;;
339 ;; o safe-charsets
340 ;;
341 ;; The value is a list of charsets safely supported by the coding
342 ;; system. The value t means that all charsets Emacs handles are
343 ;; supported. Even if some charset is not in this list, it doesn't
344 ;; mean that the charset can't be encoded in the coding system,
345 ;; instead, it just means that some other receiver of a text encoded
346 ;; in the coding system won't be able to handle that charset.
347 ;;
348 ;; o mime-charset
349 ;;
350 ;; The value is a symbol of which name is `MIME-charset' parameter of
351 ;; the coding system.
352
353 ;; Return coding-spec of CODING-SYSTEM
354 (defsubst coding-system-spec (coding-system)
355 (get (check-coding-system coding-system) 'coding-system))
356
357 (defun coding-system-type (coding-system)
358 "Return the coding type of CODING-SYSTEM.
359 A coding type is an integer value indicating the encoding method
360 of CODING-SYSTEM. See the function `make-coding-system' for more detail."
361 (aref (coding-system-spec coding-system) coding-spec-type-idx))
362
363 (defun coding-system-mnemonic (coding-system)
364 "Return the mnemonic character of CODING-SYSTEM.
365 The mnemonic character of a coding system is used in mode line
366 to indicate the coding system. If the arg is nil, return ?-."
367 (let ((spec (coding-system-spec coding-system)))
368 (if spec (aref spec coding-spec-mnemonic-idx) ?-)))
369
370 (defun coding-system-doc-string (coding-system)
371 "Return the documentation string for CODING-SYSTEM."
372 (aref (coding-system-spec coding-system) coding-spec-doc-string-idx))
373
374 (defun coding-system-plist (coding-system)
375 "Return the property list of CODING-SYSTEM."
376 (aref (coding-system-spec coding-system) coding-spec-plist-idx))
377
378 (defun coding-system-flags (coding-system)
379 "Return `flags' of CODING-SYSTEM.
380 A `flags' of a coding system is a vector of length 32 indicating detailed
381 information of a coding system. See the function `make-coding-system'
382 for more detail."
383 (aref (coding-system-spec coding-system) coding-spec-flags-idx))
384
385 (defun coding-system-get (coding-system prop)
386 "Extract a value from CODING-SYSTEM's property list for property PROP."
387 (plist-get (coding-system-plist coding-system) prop))
388
389 (defun coding-system-put (coding-system prop val)
390 "Change value in CODING-SYSTEM's property list PROP to VAL."
391 (let ((plist (coding-system-plist coding-system)))
392 (if plist
393 (plist-put plist prop val)
394 (aset (coding-system-spec coding-system) coding-spec-plist-idx
395 (list prop val)))))
396
397 (defun coding-system-category (coding-system)
398 "Return the coding category of CODING-SYSTEM."
399 (coding-system-get coding-system 'coding-category))
400
401 (defun coding-system-base (coding-system)
402 "Return the base coding system of CODING-SYSTEM.
403 A base coding system is what made by `make-coding-system'.
404 Any alias nor subsidiary coding systems are not base coding system."
405 (car (coding-system-get coding-system 'alias-coding-systems)))
406
407 (defalias 'coding-system-parent 'coding-system-base)
408 (make-obsolete 'coding-system-parent 'coding-system-base)
409
410 ;; Coding system also has a property `eol-type'.
411 ;;
412 ;; This property indicates how the coding system handles end-of-line
413 ;; format. The value is integer 0, 1, 2, or a vector of three coding
414 ;; systems. Each integer value 0, 1, and 2 indicates the format of
415 ;; end-of-line LF, CRLF, and CR respectively. A vector value
416 ;; indicates that the format of end-of-line should be detected
417 ;; automatically. Nth element of the vector is the subsidiary coding
418 ;; system whose `eol-type' property is N.
419
420 (defun coding-system-eol-type (coding-system)
421 "Return eol-type of CODING-SYSTEM.
422 An eol-type is integer 0, 1, 2, or a vector of coding systems.
423
424 Integer values 0, 1, and 2 indicate a format of end-of-line; LF,
425 CRLF, and CR respectively.
426
427 A vector value indicates that a format of end-of-line should be
428 detected automatically. Nth element of the vector is the subsidiary
429 coding system whose eol-type is N."
430 (get coding-system 'eol-type))
431
432 ;; Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM.
433 (defun make-subsidiary-coding-system (coding-system)
434 (let ((coding-spec (coding-system-spec coding-system))
435 (subsidiaries (vector (intern (format "%s-unix" coding-system))
436 (intern (format "%s-dos" coding-system))
437 (intern (format "%s-mac" coding-system))))
438 (i 0)
439 temp)
440 (while (< i 3)
441 (put (aref subsidiaries i) 'coding-system coding-spec)
442 (put (aref subsidiaries i) 'eol-type i)
443 (setq coding-system-list
444 (cons (aref subsidiaries i) coding-system-list))
445 (setq coding-system-alist
446 (cons (list (symbol-name (aref subsidiaries i)))
447 coding-system-alist))
448 (setq i (1+ i)))
449 subsidiaries))
450
451 (defun make-coding-system (coding-system type mnemonic doc-string
452 &optional flags properties)
453 "Define a new coding system CODING-SYSTEM (symbol).
454 Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional),
455 and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM
456 in the following format:
457 [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
458
459 TYPE is an integer value indicating the type of the coding system as follows:
460 0: Emacs internal format,
461 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC,
462 2: ISO-2022 including many variants,
463 3: Big5 used mainly on Chinese PC,
464 4: private, CCL programs provide encoding/decoding algorithm,
465 5: Raw-text, which means that text contains random 8-bit codes.
466
467 MNEMONIC is a character to be displayed on mode line for the coding system.
468
469 DOC-STRING is a documentation string for the coding system.
470
471 FLAGS specifies more detailed information of the coding system as follows:
472
473 If TYPE is 2 (ISO-2022), FLAGS is a list of these elements:
474 CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM,
475 ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT,
476 USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL,
477 SAFE, ACCEPT-LATIN-EXTRA-CODE.
478 CHARSETn are character sets initially designated to Gn graphic registers.
479 If CHARSETn is nil, Gn is never used.
480 If CHARSETn is t, Gn can be used but nothing designated initially.
481 If CHARSETn is a list of character sets, those character sets are
482 designated to Gn on output, but nothing designated to Gn initially.
483 SHORT-FORM non-nil means use short designation sequence on output.
484 ASCII-EOL non-nil means designate ASCII to g0 at end of line on output.
485 ASCII-CNTL non-nil means designate ASCII to g0 before control codes and
486 SPACE on output.
487 SEVEN non-nil means use 7-bit code only on output.
488 LOCKING-SHIFT non-nil means use locking-shift.
489 SINGLE-SHIFT non-nil means use single-shift.
490 USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII.
491 USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983.
492 NO-ISO6429 non-nil means not use ISO6429's direction specification.
493 INIT-BOL non-nil means any designation state is assumed to be reset
494 to initial at each beginning of line on output.
495 DESIGNATION-BOL non-nil means designation sequences should be placed
496 at beginning of line on output.
497 SAFE non-nil means convert unsafe characters to `?' on output.
498 Unsafe characters are what not specified in SAFE-CHARSET.
499 ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts
500 a code specified in `latin-extra-code-table' (which see) as a valid
501 code of the coding system.
502
503 If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for
504 decoding and encoding. CCL programs should be specified by their
505 symbols.
506
507 PROPERTIES is an alist of properties vs the corresponding values.
508 These properties are set in PLIST, a property list. This function
509 also sets properties `coding-category' and `alias-coding-systems'
510 automatically.
511
512 Kludgy features for backward compatibility:
513
514 1. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is
515 treated as a compiled CCL code.
516
517 2. If PROPERTIES is just a list of character sets, the list is set as
518 a value of `safe-charsets' in PLIST."
519 (if (memq coding-system coding-system-list)
520 (error "Coding system %s already exists" coding-system))
521
522 ;; Set a value of `coding-system' property.
523 (let ((coding-spec (make-vector 5 nil))
524 (no-initial-designation t)
525 (no-alternative-designation t)
526 coding-category)
527 (if (or (not (integerp type)) (< type 0) (> type 5))
528 (error "TYPE argument must be 0..5"))
529 (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127))
530 (error "MNEMONIC arguemnt must be an ASCII printable character."))
531 (aset coding-spec coding-spec-type-idx type)
532 (aset coding-spec coding-spec-mnemonic-idx mnemonic)
533 (aset coding-spec coding-spec-doc-string-idx
534 (if (stringp doc-string) doc-string ""))
535 (cond ((= type 0)
536 (setq coding-category 'coding-category-emacs-mule))
537 ((= type 1)
538 (setq coding-category 'coding-category-sjis))
539 ((= type 2) ; ISO2022
540 (let ((i 0)
541 (vec (make-vector 32 nil))
542 (g1-designation nil))
543 (while (< i 4)
544 (let ((charset (car flags)))
545 (if (and no-initial-designation
546 (> i 0)
547 (or (charsetp charset)
548 (and (consp charset)
549 (charsetp (car charset)))))
550 (setq no-initial-designation nil))
551 (if (charsetp charset)
552 (if (= i 1) (setq g1-designation charset))
553 (if (consp charset)
554 (let ((tail charset)
555 elt)
556 (while tail
557 (setq elt (car tail))
558 (if (eq elt t)
559 (setq no-alternative-designation nil)
560 (if (and elt (not (charsetp elt)))
561 (error "Invalid charset: %s" elt)))
562 (setq tail (cdr tail)))
563 (setq g1-designation (car charset)))
564 (if charset
565 (if (eq charset t)
566 (setq no-alternative-designation nil)
567 (error "Invalid charset: %s" charset)))))
568 (aset vec i charset))
569 (setq flags (cdr flags) i (1+ i)))
570 (while (and (< i 32) flags)
571 (aset vec i (car flags))
572 (setq flags (cdr flags) i (1+ i)))
573 (aset coding-spec 4 vec)
574 (setq coding-category
575 (if (aref vec 8) ; Use locking-shift.
576 (or (and (aref vec 7) 'coding-category-iso-7-else)
577 'coding-category-iso-8-else)
578 (if (aref vec 7) ; 7-bit only.
579 (if (aref vec 9) ; Use single-shift.
580 'coding-category-iso-7-else
581 (if no-alternative-designation
582 'coding-category-iso-7-tight
583 'coding-category-iso-7))
584 (if (or no-initial-designation
585 (not no-alternative-designation))
586 'coding-category-iso-8-else
587 (if (and (charsetp g1-designation)
588 (= (charset-dimension g1-designation) 2))
589 'coding-category-iso-8-2
590 'coding-category-iso-8-1)))))))
591 ((= type 3)
592 (setq coding-category 'coding-category-big5))
593 ((= type 4) ; private
594 (setq coding-category 'coding-category-binary)
595 (if (not (consp flags))
596 (error "Invalid FLAGS argument for TYPE 4 (CCL)")
597 (let ((decoder (check-ccl-program
598 (car flags)
599 (intern (format "%s-decoder" coding-system))))
600 (encoder (check-ccl-program
601 (cdr flags)
602 (intern (format "%s-encoder" coding-system)))))
603 (if (and decoder encoder)
604 (aset coding-spec 4 (cons decoder encoder))
605 (error "Invalid FLAGS argument for TYPE 4 (CCL)")))))
606 (t ; i.e. (= type 5)
607 (setq coding-category 'coding-category-raw-text)))
608
609 (let ((plist (list 'coding-category coding-category
610 'alias-coding-systems (list coding-system))))
611 (if no-initial-designation
612 (plist-put plist 'no-initial-designation t))
613 (if (and properties
614 (or (eq properties t)
615 (not (consp (car properties)))))
616 ;; In the old version, the arg PROPERTIES is a list to be
617 ;; set in PLIST as a value of property `safe-charsets'.
618 (plist-put plist 'safe-charsets properties)
619 (while properties
620 (plist-put plist (car (car properties)) (cdr (car properties)))
621 (setq properties (cdr properties))))
622 (aset coding-spec coding-spec-plist-idx plist))
623 (put coding-system 'coding-system coding-spec)
624 (put coding-category 'coding-systems
625 (cons coding-system (get coding-category 'coding-systems))))
626
627 ;; Next, set a value of `eol-type' property. The value is a vector
628 ;; of subsidiary coding systems, each corresponds to a coding system
629 ;; for the detected end-of-line format.
630 (put coding-system 'eol-type
631 (if (or (<= type 3) (= type 5))
632 (make-subsidiary-coding-system coding-system)
633 0))
634
635 ;; At last, register CODING-SYSTEM in `coding-system-list' and
636 ;; `coding-system-alist'.
637 (setq coding-system-list (cons coding-system coding-system-list))
638 (setq coding-system-alist (cons (list (symbol-name coding-system))
639 coding-system-alist))
640 coding-system)
641
642 (defun define-coding-system-alias (alias coding-system)
643 "Define ALIAS as an alias for coding system CODING-SYSTEM."
644 (put alias 'coding-system (coding-system-spec coding-system))
645 (nconc (coding-system-get alias 'alias-coding-systems) (list alias))
646 (setq coding-system-list (cons alias coding-system-list))
647 (setq coding-system-alist (cons (list (symbol-name alias))
648 coding-system-alist))
649 (let ((eol-type (coding-system-eol-type coding-system)))
650 (if (vectorp eol-type)
651 (put alias 'eol-type (make-subsidiary-coding-system alias))
652 (put alias 'eol-type eol-type))))
653
654 (defun set-buffer-file-coding-system (coding-system &optional force)
655 "Set the file coding-system of the current buffer to CODING-SYSTEM.
656 This means that when you save the buffer, it will be converted
657 according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
658 use \\[list-coding-systems].
659
660 If the buffer's previous file coding-system value specifies end-of-line
661 conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
662 merged with the already-specified end-of-line conversion.
663 However, if the optional prefix argument FORCE is non-nil,
664 then CODING-SYSTEM is used exactly as specified."
665 (interactive "zCoding system for visited file (default, nil): \nP")
666 (check-coding-system coding-system)
667 (if (null force)
668 (let ((x (coding-system-eol-type buffer-file-coding-system))
669 (y (coding-system-eol-type coding-system)))
670 (if (and (numberp x) (>= x 0) (<= x 2) (vectorp y))
671 (setq coding-system (aref y x)))))
672 (setq buffer-file-coding-system coding-system)
673 (set-buffer-modified-p t)
674 (force-mode-line-update))
675
676 (defvar default-terminal-coding-system nil
677 "Default value for the terminal coding system.
678 This is normally set according to the selected language environment.
679 See also the command `set-terminal-coding-system'.")
680
681 (defun set-terminal-coding-system (coding-system)
682 "Set coding system of your terminal to CODING-SYSTEM.
683 All text output to the terminal will be encoded
684 with the specified coding system.
685 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
686 The default is determined by the selected language environment
687 or by the previous use of this command."
688 (interactive
689 (list (let ((default (if (and (not (terminal-coding-system))
690 default-terminal-coding-system)
691 default-terminal-coding-system)))
692 (read-coding-system
693 (format "Coding system for terminal display (default, %s): "
694 default)
695 default))))
696 (if (and (not coding-system)
697 (not (terminal-coding-system)))
698 (setq coding-system default-terminal-coding-system))
699 (if coding-system
700 (setq default-terminal-coding-system coding-system))
701 (set-terminal-coding-system-internal coding-system)
702 (redraw-frame (selected-frame)))
703
704 (defvar default-keyboard-coding-system nil
705 "Default value of the keyboard coding system.
706 This is normally set according to the selected language environment.
707 See also the command `set-keyboard-coding-system'.")
708
709 (defun set-keyboard-coding-system (coding-system)
710 "Set coding system for keyboard input to CODING-SYSTEM.
711 In addition, this command enables Encoded-kbd minor mode.
712 \(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off.)
713 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
714 The default is determined by the selected language environment
715 or by the previous use of this command."
716 (interactive
717 (list (let ((default (if (and (not (keyboard-coding-system))
718 default-keyboard-coding-system)
719 default-keyboard-coding-system)))
720 (read-coding-system
721 (format "Coding system for keyboard input (default, %s): "
722 default)
723 default))))
724 (if (and (not coding-system)
725 (not (keyboard-coding-system)))
726 (setq coding-system default-keyboard-coding-system))
727 (if coding-system
728 (setq default-keyboard-coding-system coding-system))
729 (set-keyboard-coding-system-internal coding-system)
730 (encoded-kbd-mode (if coding-system 1 0)))
731
732 (defun set-buffer-process-coding-system (decoding encoding)
733 "Set coding systems for the process associated with the current buffer.
734 DECODING is the coding system to be used to decode input from the process,
735 ENCODING is the coding system to be used to encode output to the process.
736
737 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
738 (interactive
739 "zCoding-system for process input: \nzCoding-system for process output: ")
740 (let ((proc (get-buffer-process (current-buffer))))
741 (if (null proc)
742 (error "no process")
743 (check-coding-system decoding)
744 (check-coding-system encoding)
745 (set-process-coding-system proc decoding encoding)))
746 (force-mode-line-update))
747
748 (defun set-selection-coding-system (coding-system)
749 "Make CODING-SYSTEM used for communicating with other X clients .
750 When sending or receiving text via cut_buffer, selection, and clipboard,
751 the text is encoded or decoded by CODING-SYSTEM."
752 (check-coding-system coding-system)
753 (setq selection-coding-system coding-system))
754
755 (defun set-coding-priority (arg)
756 "Set priority of coding categories according to LIST.
757 LIST is a list of coding categories ordered by priority."
758 (let ((l arg)
759 (current-list (copy-sequence coding-category-list)))
760 ;; Check the varidity of ARG while deleting coding categories in
761 ;; ARG from CURRENT-LIST. We assume that CODING-CATEGORY-LIST
762 ;; contains all coding categories.
763 (while l
764 (if (or (null (get (car l) 'coding-category-index))
765 (null (memq (car l) current-list)))
766 (error "Invalid or duplicated element in argument: %s" arg))
767 (setq current-list (delq (car l) current-list))
768 (setq l (cdr l)))
769 ;; Update `coding-category-list' and return it.
770 (setq coding-category-list (append arg current-list))
771 (set-coding-priority-internal)))
772
773 ;;; FILE I/O
774
775 (defvar set-auto-coding-for-load nil
776 "Non-nil means look for `load-coding' property instead of `coding'.
777 This is used for loading and byte-compiling Emacs Lisp files.")
778
779 (defun set-auto-coding (size)
780 "Return coding system for a file of which SIZE bytes follow point.
781 These bytes should include at least the first 1k of the file
782 and the last 3k of the file, but the middle may be omitted.
783
784 It checks for a `coding:' tag in the first one or two lines following
785 point. If no `coding:' tag is found, it checks for alocal variables
786 list in the last 3K bytes out of the SIZE bytes.
787
788 The return value is the specified coding system,
789 or nil if nothing specified.
790
791 The variable `set-auto-coding-function' (which see) is set to this
792 function by default."
793 (let* ((case-fold-search t)
794 (head-start (point))
795 (head-end (+ head-start (min size 1024)))
796 (tail-start (+ head-start (max (- size 3072) 0)))
797 (tail-end (+ head-start size))
798 coding-system head-found tail-found pos)
799 ;; Try a short cut by searching for the string "coding:"
800 ;; and for "unibyte:" at th ehead and tail of SIZE bytes.
801 (setq head-found (or (search-forward "coding:" head-end t)
802 (search-forward "unibyte:" head-end t)))
803 (if (and head-found (> head-found tail-start))
804 ;; Head and tail are overlapped.
805 (setq tail-found head-found)
806 (goto-char tail-start)
807 (setq tail-found (or (search-forward "coding:" tail-end t)
808 (search-forward "unibyte:" tail-end t))))
809
810 ;; At first check the head.
811 (when head-found
812 (goto-char head-start)
813 (setq pos (re-search-forward "[\n\r]" head-end t))
814 (if (and pos
815 (= (char-after head-start) ?#)
816 (= (char-after (1+ head-start)) ?!))
817 ;; If the file begins with "#!" (exec interpreter magic),
818 ;; look for coding frobs in the first two lines. You cannot
819 ;; necessarily put them in the first line of such a file
820 ;; without screwing up the interpreter invocation.
821 (setq pos (search-forward "\n" head-end t)))
822 (if pos (setq head-end pos))
823 (when (< head-found head-end)
824 (goto-char head-start)
825 (when (and set-auto-coding-for-load
826 (re-search-forward
827 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
828 head-end t))
829 (setq coding-system 'raw-text))
830 (when (and (not coding-system)
831 (re-search-forward
832 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
833 head-end t))
834 (setq coding-system (intern (match-string 2)))
835 (or (coding-system-p coding-system)
836 (setq coding-system nil)))))
837
838 ;; If no coding: tag in the head, check the tail.
839 (when (and tail-found (not coding-system))
840 (goto-char tail-start)
841 (search-forward "\n\^L" nil t)
842 (if (re-search-forward
843 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
844 ;; The prefix is what comes before "local variables:" in its
845 ;; line. The suffix is what comes after "local variables:"
846 ;; in its line.
847 (let* ((prefix (regexp-quote (match-string 1)))
848 (suffix (regexp-quote (match-string 2)))
849 (re-coding (concat
850 "^" prefix
851 "coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
852 suffix "$"))
853 (re-unibyte (concat
854 "^" prefix
855 "unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
856 suffix "$"))
857 (re-end (concat
858 "^" prefix "end *:[ \t]*" suffix "$"))
859 (pos (point)))
860 (re-search-forward re-end tail-end 'move)
861 (setq tail-end (point))
862 (goto-char pos)
863 (when (and set-auto-coding-for-load
864 (re-search-forward re-unibyte tail-end t))
865 (setq coding-system 'raw-text))
866 (when (and (not coding-system)
867 (re-search-forward re-coding tail-end t))
868 (setq coding-system (intern (match-string 1)))
869 (or (coding-system-p coding-system)
870 (setq coding-system nil))))))
871 coding-system))
872
873 (setq set-auto-coding-function 'set-auto-coding)
874
875 ;; Set buffer-file-coding-system of the current buffer after some text
876 ;; is inserted.
877 (defun after-insert-file-set-buffer-file-coding-system (inserted)
878 (if last-coding-system-used
879 (let ((coding-system
880 (find-new-buffer-file-coding-system last-coding-system-used))
881 (modified-p (buffer-modified-p)))
882 (when coding-system
883 (set-buffer-file-coding-system coding-system)
884 (if (and (or (eq coding-system 'no-conversion)
885 (eq (coding-system-type coding-system) 5))
886 ;; If buffer was unmodified, we must be visiting it.
887 (not modified-p))
888 ;; For coding systems no-conversion and raw-text...,
889 ;; edit the buffer as unibyte.
890 (set-buffer-multibyte nil))
891 (set-buffer-modified-p modified-p))))
892 nil)
893
894 (add-hook 'after-insert-file-functions
895 'after-insert-file-set-buffer-file-coding-system)
896
897 ;; The coding-spec and eol-type of coding-system returned is decided
898 ;; independently in the following order.
899 ;; 1. That of buffer-file-coding-system locally bound.
900 ;; 2. That of CODING.
901
902 (defun find-new-buffer-file-coding-system (coding)
903 "Return a coding system for a buffer when a file of CODING is inserted.
904 The local variable `buffer-file-coding-system' of the current buffer
905 is set to the returned value.
906 Return nil if there's no need of setting new buffer-file-coding-system."
907 (let (local-coding local-eol
908 found-coding found-eol
909 new-coding new-eol)
910 (if (null coding)
911 ;; Nothing found about coding.
912 nil
913
914 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
915 ;; and LOCAL-CODING.
916 (setq local-eol (coding-system-eol-type buffer-file-coding-system))
917 (if (null (numberp local-eol))
918 ;; But eol-type is not yet set.
919 (setq local-eol nil))
920 (if (and buffer-file-coding-system
921 (not (eq (coding-system-type buffer-file-coding-system) t)))
922 ;; This is not `undecided'.
923 (setq local-coding (coding-system-base buffer-file-coding-system)))
924
925 (if (and (local-variable-p 'buffer-file-coding-system)
926 local-eol local-coding)
927 ;; The current buffer has already set full coding-system, we
928 ;; had better not change it.
929 nil
930
931 (setq found-eol (coding-system-eol-type coding))
932 (if (null (numberp found-eol))
933 ;; But eol-type is not found.
934 (setq found-eol nil))
935 (if (not (eq (coding-system-type coding) t))
936 ;; This is not `undecided'.
937 (setq found-coding (coding-system-base coding)))
938
939 ;; The local setting takes precedence over the found one.
940 (setq new-coding (or (and (local-variable-p 'buffer-file-coding-system)
941 local-coding)
942 found-coding
943 local-coding))
944 (setq new-eol (or (and (local-variable-p 'buffer-file-coding-system)
945 local-eol)
946 found-eol
947 local-eol))
948 (when (numberp new-eol)
949 (or new-coding
950 (setq new-coding 'undecided))
951 (if (vectorp (coding-system-eol-type new-coding))
952 (setq new-coding
953 (aref (coding-system-eol-type new-coding) new-eol))))
954 ;; Return a new coding system only when it is different from
955 ;; the current one.
956 (if (not (eq buffer-file-coding-system new-coding))
957 new-coding)))))
958
959 (defun modify-coding-system-alist (target-type regexp coding-system)
960 "Modify one of look up tables for finding a coding system on I/O operation.
961 There are three of such tables, `file-coding-system-alist',
962 `process-coding-system-alist', and `network-coding-system-alist'.
963
964 TARGET-TYPE specifies which of them to modify.
965 If it is `file', it affects `file-coding-system-alist' (which see).
966 If it is `process', it affects `process-coding-system-alist' (which see).
967 If it is `network', it affects `network-codign-system-alist' (which see).
968
969 REGEXP is a regular expression matching a target of I/O operation.
970 The target is a file name if TARGET-TYPE is `file', a program name if
971 TARGET-TYPE is `process', or a network service name or a port number
972 to connect to if TARGET-TYPE is `network'.
973
974 CODING-SYSTEM is a coding system to perform code conversion on the I/O
975 operation, or a cons cell (DECODING . ENCODING) specifying the coding systems
976 for decoding and encoding respectively,
977 or a function symbol which, when called, returns such a cons cell."
978 (or (memq target-type '(file process network))
979 (error "Invalid target type: %s" target-type))
980 (or (stringp regexp)
981 (and (eq target-type 'network) (integerp regexp))
982 (error "Invalid regular expression: %s" regexp))
983 (if (symbolp coding-system)
984 (if (not (fboundp coding-system))
985 (progn
986 (check-coding-system coding-system)
987 (setq coding-system (cons coding-system coding-system))))
988 (check-coding-system (car coding-system))
989 (check-coding-system (cdr coding-system)))
990 (cond ((eq target-type 'file)
991 (let ((slot (assoc regexp file-coding-system-alist)))
992 (if slot
993 (setcdr slot coding-system)
994 (setq file-coding-system-alist
995 (cons (cons regexp coding-system)
996 file-coding-system-alist)))))
997 ((eq target-type 'process)
998 (let ((slot (assoc regexp process-coding-system-alist)))
999 (if slot
1000 (setcdr slot coding-system)
1001 (setq process-coding-system-alist
1002 (cons (cons regexp coding-system)
1003 process-coding-system-alist)))))
1004 (t
1005 (let ((slot (assoc regexp network-coding-system-alist)))
1006 (if slot
1007 (setcdr slot coding-system)
1008 (setq network-coding-system-alist
1009 (cons (cons regexp coding-system)
1010 network-coding-system-alist)))))))
1011
1012 (defun make-translation-table (&rest args)
1013 "Make a translation table (char table) from arguments.
1014 Each argument is a list of the form (FROM . TO),
1015 where FROM is a character to be translated to TO.
1016
1017 FROM can be a generic character (see `make-char'). In this case, TO is
1018 a generic character containing the same number of characters, or a
1019 ordinary character. If FROM and TO are both generic characters, all
1020 characters belonging to FROM are translated to characters belonging to TO
1021 without changing their position code(s)."
1022 (let ((table (make-char-table 'translation-table))
1023 revlist)
1024 (while args
1025 (let ((elts (car args)))
1026 (while elts
1027 (let* ((from (car (car elts)))
1028 (from-i 0) ; degree of freedom of FROM
1029 (from-rev (nreverse (split-char from)))
1030 (to (cdr (car elts)))
1031 (to-i 0) ; degree of freedom of TO
1032 (to-rev (nreverse (split-char to))))
1033 ;; Check numbers of heading 0s in FROM-REV and TO-REV.
1034 (while (eq (car from-rev) 0)
1035 (setq from-i (1+ from-i) from-rev (cdr from-rev)))
1036 (while (eq (car to-rev) 0)
1037 (setq to-i (1+ to-i) to-rev (cdr to-rev)))
1038 (if (and (/= from-i to-i) (/= to-i 0))
1039 (error "Invalid character pair (%d . %d)" from to))
1040 ;; If we have already translated TO to TO-ALT, FROM should
1041 ;; also be translated to TO-ALT. But, this is only if TO
1042 ;; is a generic character or TO-ALT is not a generic
1043 ;; character.
1044 (let ((to-alt (aref table to)))
1045 (if (and to-alt
1046 (or (> to-i 0) (not (generic-char-p to-alt))))
1047 (setq to to-alt)))
1048 (if (> from-i 0)
1049 (set-char-table-default table from to)
1050 (aset table from to))
1051 ;; If we have already translated some chars to FROM, they
1052 ;; should also be translated to TO.
1053 (let ((l (assq from revlist)))
1054 (if l
1055 (let ((ch (car l)))
1056 (setcar l to)
1057 (setq l (cdr l))
1058 (while l
1059 (aset table ch to)
1060 (setq l (cdr l)) ))))
1061 ;; Now update REVLIST.
1062 (let ((l (assq to revlist)))
1063 (if l
1064 (setcdr l (cons from (cdr l)))
1065 (setq revlist (cons (list to from) revlist)))))
1066 (setq elts (cdr elts))))
1067 (setq args (cdr args)))
1068 ;; Return TABLE just created.
1069 table))
1070
1071 (defun define-translation-table (symbol &rest args)
1072 "Define SYMBOL as a name of translation table makde by ARGS.
1073
1074 See the documentation of the function `make-translation-table' for the
1075 meaning of ARGS.
1076
1077 This function sets properties `translation-table' and
1078 `translation-table-id' of SYMBOL to the created table itself and
1079 identification number of the table respectively."
1080 (let ((table (apply 'make-translation-table args))
1081 (len (length translation-table-vector))
1082 (id 0)
1083 (done nil))
1084 (put symbol 'translation-table table)
1085 (while (not done)
1086 (if (>= id len)
1087 (setq translation-table-vector
1088 (vconcat translation-table-vector (make-vector len nil))))
1089 (let ((slot (aref translation-table-vector id)))
1090 (if (or (not slot)
1091 (eq (car slot) symbol))
1092 (progn
1093 (aset translation-table-vector id (cons symbol table))
1094 (setq done t))))
1095 (setq id (1+ id)))
1096 (put symbol 'translation-table-id id)
1097 id))
1098
1099 ;;; Initialize some variables.
1100
1101 (put 'use-default-ascent 'char-table-extra-slots 0)
1102 (setq use-default-ascent (make-char-table 'use-default-ascent))
1103 (put 'ignore-relative-composition 'char-table-extra-slots 0)
1104 (setq ignore-relative-composition
1105 (make-char-table 'ignore-relative-composition))
1106
1107 ;;;
1108 (provide 'mule)
1109
1110 ;;; mule.el ends here