(setq l (cdr l))))
\f
-(setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
-(makunbound 'saved-utf-translate-cjk-mode)
+;; CJK double width characters.
+(let ((l '((#x1100 . #x11FF)
+ (#x2E80 . #x9FAF)
+ (#xAC00 . #xD7AF)
+ (#xF900 . #xFAFF)
+ (#xFE30 . #xFE4F)
+ (#xFF00 . #xFF5F)
+ (#xFFE0 . #xFFEF)
+ (#x20000 . #x2AFFF)
+ (#x2F800 . #x2FFFF))))
+ (dolist (elt l)
+ (set-char-table-range char-width-table
+ (cons (car elt) (cdr elt))
+ 2)))
+;; Fixme: Doing this affects non-CJK characters through unification,
+;; but presumably CJK users expect those characters to be
+;; double-width when using these charsets.
+;; (map-charset-chars
+;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
+;; 'japanese-jisx0208)
+;; (map-charset-chars
+;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
+;; 'japanese-jisx0212)
+;; (map-charset-chars
+;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
+;; 'japanese-jisx0213-1)
+;; (map-charset-chars
+;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
+;; 'japanese-jisx0213-2)
+;; (map-charset-chars
+;; (lambda (range ignore) (set-char-table-range char-width-table range 2))
+;; 'korean-ksc5601)
+
+;; Other double width
+(map-charset-chars
+ (lambda (range ignore) (set-char-table-range char-width-table range 2))
+ 'ethiopic)
+(map-charset-chars
+ (lambda (range ignore) (set-char-table-range char-width-table range 2))
+ 'tibetan)
+(map-charset-chars
+ (lambda (range ignore) (set-char-table-range char-width-table range 2))
+ 'indian-2-column)
+(map-charset-chars
+ (lambda (range ignore) (set-char-table-range char-width-table range 2))
+ 'arabic-2-column)
+
+(optimize-char-table (standard-case-table))
+(optimize-char-table char-width-table)
+(optimize-char-table (standard-category-table))
+(optimize-char-table (standard-syntax-table))
+
+;; The Unicode blocks actually extend past some of these ranges with
+;; undefined codepoints.
+(let ((script-list nil))
+ (dolist
+ (elt
+ '((#x0000 #x007F latin)
+ (#x00A0 #x036F latin)
+ (#x0370 #x03E1 greek)
+ (#x03E2 #x03EF coptic)
+ (#x03F0 #x03F3 greek)
+ (#x0400 #x04FF cyrillic)
+ (#x0530 #x058F armenian)
+ (#x0590 #x05FF hebrew)
+ (#x0600 #x06FF arabic)
+ (#x0700 #x074F syriac)
+ (#x0780 #x07BF thaana)
+ (#x0900 #x097F devanagari)
+ (#x0980 #x09FF bengali)
+ (#x0A00 #x0A7F gurmukhi)
+ (#x0A80 #x0AFF gujarati)
+ (#x0B00 #x0B7F oriya)
+ (#x0B80 #x0BFF tamil)
+ (#x0C00 #x0C7F telugu)
+ (#x0C80 #x0CFF kannada)
+ (#x0D00 #x0D7F malayalam)
+ (#x0D80 #x0DFF sinhala)
+ (#x0E00 #x0E5F thai)
+ (#x0E80 #x0EDF lao)
+ (#x0F00 #x0FFF tibetan)
+ (#x1000 #x105F myanmar)
+ (#x10A0 #x10FF georgian)
+ (#x1100 #x11FF hangul)
+ (#x1200 #x137F ethiopic)
+ (#x13A0 #x13FF cherokee)
+ (#x1400 #x167F canadian-aboriginal)
+ (#x1680 #x169F ogham)
+ (#x16A0 #x16FF runic)
+ (#x1780 #x17FF khmer)
+ (#x1800 #x18AF mongolian)
+ (#x1E00 #x1EFF latin)
+ (#x1F00 #x1FFF greek)
+ (#x2000 #x27FF symbol)
+ (#x2800 #x28FF braille)
+ (#x2E80 #x2FDF han)
+ (#x2FF0 #x2FFF ideographic-description)
+ (#x3000 #x303F cjk-misc)
+ (#x3040 #x30FF kana)
+ (#x3100 #x312F bopomofo)
+ (#x3130 #x318F hangul)
+ (#x3190 #x319F kanbun)
+ (#x31A0 #x31BF bopomofo)
+ (#x3400 #x9FAF han)
+ (#xA000 #xA4CF yi)
+ (#xAC00 #xD7AF hangul)
+ (#xF900 #xFAFF han)
+ (#xFB1D #xFB4F hebrew)
+ (#xFB50 #xFDFF arabic)
+ (#xFE70 #xFEFC arabic)
+ (#xFF00 #xFF5F cjk-misc)
+ (#xFF61 #xFF9F kana)
+ (#xFFE0 #xFFE6 cjk-misc)
+ (#x20000 #x2AFFF han)
+ (#x2F800 #x2FFFF han)))
+ (set-char-table-range char-script-table
+ (cons (car elt) (nth 1 elt)) (nth 2 elt))
+ (or (memq (nth 2 elt) script-list)
+ (setq script-list (cons (nth 2 elt) script-list))))
+ (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
+
+(map-charset-chars
+ #'(lambda (range ignore)
+ (set-char-table-range char-script-table range 'tibetan))
+ 'tibetan)
+
+\f
+;;; Setting word boundary.
+
+(defun next-word-boundary-han (pos limit)
+ (if (<= pos limit)
+ (save-excursion
+ (goto-char pos)
+ (looking-at "\\cC+")
+ (goto-char (match-end 0))
+ (if (looking-at "\\cH+")
+ (goto-char (match-end 0)))
+ (point))
+ (while (and (> pos limit)
+ (eq (aref char-script-table (char-after (1- pos))) 'han))
+ (setq pos (1- pos)))
+ pos))
+
+(defun next-word-boundary-kana (pos limit)
+ (if (<= pos limit)
+ (save-excursion
+ (goto-char pos)
+ (if (looking-at "\\cK+")
+ (goto-char (match-end 0)))
+ (if (looking-at "\\cH+")
+ (goto-char (match-end 0)))
+ (if (looking-at "\\ck+")
+ (goto-char (match-end 0)))
+ (point))
+ (let ((category-set (char-category-set (char-after pos)))
+ category)
+ (if (or (aref category-set ?K) (aref category-set ?k))
+ (while (and (> pos limit)
+ (setq category-set
+ (char-category-set (char-after (1- pos))))
+ (or (aref category-set ?K) (aref category-set ?k)))
+ (setq pos (1- pos)))
+ (while (and (> pos limit)
+ (aref (setq category-set
+ (char-category-set (char-after (1- pos)))) ?H))
+ (setq pos (1- pos)))
+ (setq category (cond ((aref category-set ?C) ?C)
+ ((aref category-set ?K) ?K)
+ ((aref category-set ?A) ?A)))
+ (when category
+ (setq pos (1- pos))
+ (while (and (> pos limit)
+ (aref (char-category-set (char-after (1- pos)))
+ category))
+ (setq pos (1- pos)))))
+ pos)))
+
+(map-char-table
+ #'(lambda (char script)
+ (cond ((eq script 'han)
+ (set-char-table-range find-word-boundary-function-table
+ char #'next-word-boundary-han))
+ ((eq script 'kana)
+ (set-char-table-range find-word-boundary-function-table
+ char #'next-word-boundary-kana))))
+ char-script-table)
+
+(setq word-combining-categories
- '((?l . ?l)))
++ '((?l . ?l)
++ (?C . ?C)
++ (?C . ?H)
++ (?C . ?K)))
+
+(setq word-separating-categories ; (2-byte character sets)
+ '((?A . ?K) ; Alpha numeric - Katakana
+ (?A . ?C) ; Alpha numeric - Chinese
+ (?H . ?A) ; Hiragana - Alpha numeric
+ (?H . ?K) ; Hiragana - Katakana
+ (?H . ?C) ; Hiragana - Chinese
+ (?K . ?A) ; Katakana - Alpha numeric
+ (?K . ?C) ; Katakana - Chinese
+ (?C . ?A) ; Chinese - Alpha numeric
+ (?C . ?K) ; Chinese - Katakana
+ ))
;;; Local Variables:
-;;; coding: iso-2022-7bit
+;;; coding: utf-8-emacs
;;; End:
;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d