ucs-mule-to-mule-unicode)
(define-translation-table 'utf-translation-table-for-encode)))
(set-default s v))
- :version "21.4"
+ :version "22.1"
:type 'boolean
:group 'mule)
(defconst utf-translate-cjk-charsets '(chinese-gb2312
chinese-big5-1 chinese-big5-2
japanese-jisx0208 japanese-jisx0212
+ katakana-jisx0201
korean-ksc5601)
"List of charsets supported by `utf-translate-cjk-mode'.")
-(defconst utf-translate-cjk-unicode-range
- '((#x2e80 . #xd7a3)
- (#xff00 . #xffef))
- "List of Unicode code ranges supported by `utf-translate-cjk-mode'.")
+(defvar utf-translate-cjk-lang-env nil
+ "Language environment in which tables for `utf-translate-cjk-mode' is loaded.
+The value nil means that the tables are not yet loaded.")
+
+(defvar utf-translate-cjk-unicode-range)
+
+;; String generated from utf-translate-cjk-unicode-range. It is
+;; suitable for an argument to skip-chars-forward.
+(defvar utf-translate-cjk-unicode-range-string nil)
+
+(defun utf-translate-cjk-set-unicode-range (range)
+ (setq utf-translate-cjk-unicode-range range)
+ (setq utf-translate-cjk-unicode-range-string
+ (let ((decode-char-no-trans
+ #'(lambda (x)
+ (cond ((< x #x100) (make-char 'latin-iso8859-1 x))
+ ((< x #x2500)
+ (setq x (- x #x100))
+ (make-char 'mule-unicode-0100-24ff
+ (+ (/ x 96) 32) (+ (% x 96) 32)))
+ ((< x #x3400)
+ (setq x (- x #x2500))
+ (make-char 'mule-unicode-2500-33ff
+ (+ (/ x 96) 32) (+ (% x 96) 32)))
+ (t
+ (setq x (- x #xe000))
+ (make-char 'mule-unicode-e000-ffff
+ (+ (/ x 96) 32) (+ (% x 96) 32))))))
+ ranges from to)
+ (dolist (elt range)
+ (setq from (max #xA0 (car elt)) to (min #xffff (cdr elt)))
+ (if (and (>= to #x3400) (< to #xE000))
+ (setq to #x33FF))
+ (cond ((< from #x100)
+ (if (>= to #xE000)
+ (setq ranges (cons (cons #xE000 to) ranges)
+ to #x33FF))
+ (if (>= to #x2500)
+ (setq ranges (cons (cons #x2500 to) ranges)
+ to #x24FF))
+ (if (>= to #x100)
+ (setq ranges (cons (cons #x100 to) ranges)
+ to #xFF)))
+ ((< from #x2500)
+ (if (>= to #xE000)
+ (setq ranges (cons (cons #xE000 to) ranges)
+ to #x33FF))
+ (if (>= to #x2500)
+ (setq ranges (cons (cons #x2500 to) ranges)
+ to #x24FF)))
+ ((< from #x3400)
+ (if (>= to #xE000)
+ (setq ranges (cons (cons #xE000 to) ranges)
+ to #x33FF))))
+ (if (<= from to)
+ (setq ranges (cons (cons from to) ranges))))
+ (mapconcat #'(lambda (x)
+ (format "%c-%c"
+ (funcall decode-char-no-trans (car x))
+ (funcall decode-char-no-trans (cdr x))))
+ ranges "")))
+ ;; These forces loading and settting tables for
+ ;; utf-translate-cjk-mode.
+ (setq utf-translate-cjk-lang-env nil
+ ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
+ ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)))
+
+(defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3)
+ (#xff00 . #xffef))
+ "List of Unicode code ranges supported by `utf-translate-cjk-mode'.
+Setting this variable directly does not take effect;
+use either \\[customize] or the function
+`utf-translate-cjk-set-unicode-range'."
+ :version "22.1"
+ :type '(repeat (cons integer integer))
+ :set (lambda (symbol value)
+ (utf-translate-cjk-set-unicode-range value))
+ :group 'mule)
;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'.
(defsubst utf-translate-cjk-substitutable-p (code-point)
(setq elt nil)))
elt))
-(defvar utf-translate-cjk-lang-env nil
- "Language environment in which tables for `utf-translate-cjk-mode' is loaded.
-The value nil means that the tables are not yet loaded.")
-
(defun utf-translate-cjk-load-tables ()
"Load tables for `utf-translate-cjk-mode'."
;; Fixme: Allow the use of the CJK charsets to be
;; Load the files explicitly, to avoid having to keep
;; around the large tables they contain (as well as the
;; ones which get built).
- (cond ((string= "Korean" current-language-environment)
- (load "subst-jis")
- (load "subst-big5")
- (load "subst-gb2312")
- (load "subst-ksc"))
- ((string= "Chinese-BIG5" current-language-environment)
- (load "subst-jis")
- (load "subst-ksc")
- (load "subst-gb2312")
- (load "subst-big5"))
- ((string= "Chinese-GB" current-language-environment)
- (load "subst-jis")
- (load "subst-ksc")
- (load "subst-big5")
- (load "subst-gb2312"))
- (t
- (load "subst-ksc")
- (load "subst-gb2312")
- (load "subst-big5")
- (load "subst-jis"))) ; jis covers as much as big5, gb2312
+ ;; Here we bind coding-system-for-read to nil so that coding tags
+ ;; in the files are respected even if the files are not yet
+ ;; byte-compiled
+ (let ((coding-system-for-read nil))
+ (cond ((string= "Korean" current-language-environment)
+ (load "subst-jis")
+ (load "subst-big5")
+ (load "subst-gb2312")
+ (load "subst-ksc"))
+ ((string= "Chinese-BIG5" current-language-environment)
+ (load "subst-jis")
+ (load "subst-ksc")
+ (load "subst-gb2312")
+ (load "subst-big5"))
+ ((string= "Chinese-GB" current-language-environment)
+ (load "subst-jis")
+ (load "subst-ksc")
+ (load "subst-big5")
+ (load "subst-gb2312"))
+ (t
+ (load "subst-ksc")
+ (load "subst-gb2312")
+ (load "subst-big5")
+ (load "subst-jis")))) ; jis covers as much as big5, gb2312
(when redefined
(define-translation-hash-table 'utf-subst-table-for-decode
by the above coding systems, you can customize the user option
`utf-translate-cjk-mode' to nil."
:init-value t
- :version "21.4"
+ :version "22.1"
:type 'boolean
:group 'mule
:global t
;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
((r1 &= #x3F)
(r1 |= ((r0 & #x1F) << 6))
- ;; Now r2 holds scalar value. We don't have to check
+ ;; Now r1 holds scalar value. We don't have to check
;; `overlong sequence' because r0 >= 0xC2.
(if (r1 >= 256)
;; mule-unicode-0100-24ff (< 0800)
- ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
- (r1 -= #x0100)
- (r2 = (((r1 / 96) + 32) << 7))
- (r1 %= 96)
- (r1 += (r2 + 32))
- (translate-character
- utf-translation-table-for-decode r0 r1)
+ ((r0 = r1)
+ (lookup-integer utf-subst-table-for-decode r0 r1)
+ (if (r7 == 0)
+ ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
+ (r1 -= #x0100)
+ (r2 = (((r1 / 96) + 32) << 7))
+ (r1 %= 96)
+ (r1 += (r2 + 32))
+ (translate-character
+ utf-translation-table-for-decode r0 r1)))
(write-multibyte-character r0 r1)
(read r0)
(repeat))
(if (r1 >= 160)
;; latin-iso8859-1
- ((r1 -= 128)
- (write-multibyte-character r6 r1)
+ ((r0 = r1)
+ (lookup-integer utf-subst-table-for-decode r0 r1)
+ (if (r7 == 0)
+ ((r1 -= 128)
+ (write-multibyte-character r6 r1))
+ ((write-multibyte-character r0 r1)))
(read r0)
(repeat))
;; eight-bit-control
(if (r3 < #x2500)
;; mule-unicode-0100-24ff (>= 0800)
- ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
- (r3 -= #x0100)
- (r3 //= 96)
- (r1 = (r7 + 32))
- (r1 += ((r3 + 32) << 7))
- (translate-character
- utf-translation-table-for-decode r0 r1)
+ ((r0 = r3)
+ (lookup-integer utf-subst-table-for-decode r0 r1)
+ (if (r7 == 0)
+ ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
+ (r3 -= #x0100)
+ (r3 //= 96)
+ (r1 = (r7 + 32))
+ (r1 += ((r3 + 32) << 7))
+ (translate-character
+ utf-translation-table-for-decode r0 r1)))
(write-multibyte-character r0 r1)
(read r0)
(repeat)))
(defcustom utf-8-compose-scripts nil
"*Non-nil means compose various scripts on decoding utf-8 text."
:group 'mule
- :version "21.4"
+ :version "22.1"
:type 'boolean)
(defun utf-8-post-read-conversion (length)
hash-table ch)
(set-buffer-multibyte t)
(when utf-translate-cjk-mode
- (if (not utf-translate-cjk-lang-env)
- ;; Check these characters:
- ;; "U+2e80-U+33ff", "U+ff00-U+ffef"
- ;; We may have to translate them to CJK charsets.
- (let ((range2 "\e$,29@\e(B-\e$,2G\7f\e$,3r`\e(B-\e$,3u/\e(B"))
- (skip-chars-forward (concat range range2))
- (unless (eobp)
- (utf-translate-cjk-load-tables)
- (setq range (concat range range2)))
- (setq hash-table (get 'utf-subst-table-for-decode
- 'translation-hash-table)))))
+ (unless utf-translate-cjk-lang-env
+ ;; Check these characters in utf-translate-cjk-range.
+ ;; We may have to translate them to CJK charsets.
+ (skip-chars-forward
+ (concat range utf-translate-cjk-unicode-range-string))
+ (unless (eobp)
+ (utf-translate-cjk-load-tables)
+ (setq range
+ (concat range utf-translate-cjk-unicode-range-string)))
+ (setq hash-table (get 'utf-subst-table-for-decode
+ 'translation-hash-table))))
(while (and (skip-chars-forward range)
(not (eobp)))
(setq ch (following-char))