+ (let ((l charset-list)
+ charset)
+ (while l
+ (setq charset (car l) l (cdr l))
+ (princ (format "%03d:%s:%d:%d:%d:%d:%d:%d:%d:%s\n"
+ (charset-id charset)
+ charset
+ (charset-dimension charset)
+ (charset-chars charset)
+ (charset-bytes charset)
+ (charset-width charset)
+ (charset-direction charset)
+ (charset-iso-final-char charset)
+ (charset-iso-graphic-plane charset)
+ (charset-description charset))))))
+
+(defvar non-iso-charset-alist
+ `((viscii
+ (ascii vietnamese-viscii-lower vietnamese-viscii-upper)
+ viet-viscii-nonascii-translation-table
+ ((0 255)))
+ (koi8-r
+ (ascii cyrillic-iso8859-5)
+ cyrillic-koi8-r-nonascii-translation-table
+ ((32 255)))
+ (alternativnyj
+ (ascii cyrillic-iso8859-5)
+ cyrillic-alternativnyj-nonascii-translation-table
+ ((32 255)))
+ (big5
+ (ascii chinese-big5-1 chinese-big5-2)
+ decode-big5-char
+ ((32 127)
+ ((?\xA1 ?\xFE) . (?\x40 ?\x7E ?\xA1 ?\xFE))))
+ (sjis
+ (ascii katakana-jisx0201 japanese-jisx0208)
+ decode-sjis-char
+ ((32 127 ?\xA1 ?\xDF)
+ ((?\x81 ?\x9F ?\xE0 ?\xEF) . (?\x40 ?\x7E ?\x80 ?\xFC)))))
+ "Alist of non-ISO charset names vs the corresponding information.
+
+Non-ISO charsets are what Emacs can read (or write) by mapping to (or
+from) some Emacs' charsets that correspond to ISO charsets.
+
+Each element has the following format:
+ (NON-ISO-CHARSET CHARSET-LIST TRANSLATION-METHOD [ CODE-RANGE ])
+
+NON-ISO-CHARSET is a name (symbol) of the non-ISO charset.
+
+CHARSET-LIST is a list of Emacs' charsets into which characters of
+NON-ISO-CHARSET are mapped.
+
+TRANSLATION-METHOD is a translation table (symbol) to translate a
+character code of NON-ISO-CHARSET to the corresponding Emacs character
+code. It can also be a function to call with one argument, a
+character code in NON-ISO-CHARSET.
+
+CODE-RANGE specifies the valid code ranges of NON-ISO-CHARSET.
+It is a list of RANGEs, where each RANGE is of the form:
+ (FROM1 TO1 FROM2 TO2 ...)
+or
+ ((FROM1-1 TO1-1 FROM1-2 TO1-2 ...) . (FROM2-1 TO2-1 FROM2-2 TO2-2 ...))
+In the first form, valid codes are between FROM1 and TO1, or FROM2 and
+TO2, or...
+The second form is used for 2-byte codes. The car part is the ranges
+of the first byte, and the cdr part is the ranges of the second byte.")
+
+
+;; Decode a character that has code CODE in CODEPAGE. Value is a
+;; string of decoded character.
+
+(defun decode-codepage-char (codepage code)
+ ;; Each CODEPAGE corresponds to a coding system cpCODEPAGE.
+ (let ((coding-system (intern (format "cp%d" codepage))))
+ (or (coding-system-p coding-system)
+ (codepage-setup codepage))
+ (string-to-char
+ (decode-coding-string (char-to-string code) coding-system))))
+
+
+;; Add DOS codepages to `non-iso-charset-alist'.
+
+(let ((tail (cp-supported-codepages))
+ elt)
+ (while tail
+ (setq elt (car tail) tail (cdr tail))
+ ;; Now ELT is (CODEPAGE . CHARSET), where CODEPAGE is a string
+ ;; (e.g. "850"), CHARSET is a charset that characters in CODEPAGE
+ ;; are mapped to.
+ (setq non-iso-charset-alist
+ (cons (list (intern (concat "cp" (car elt)))
+ (list 'ascii (cdr elt))
+ `(lambda (code)
+ (decode-codepage-char ,(string-to-int (car elt))
+ code))
+ (list (list 0 255)))
+ non-iso-charset-alist))))
+
+
+;; A variable to hold charset input history.
+(defvar charset-history nil)
+
+
+;;;###autoload
+(defun read-charset (prompt &optional default-value initial-input)
+ "Read a character set from the minibuffer, prompting with string PROMPT.
+It reads an Emacs' character set listed in the variable `charset-list'
+or a non-ISO character set listed in the variable
+`non-iso-charset-alist'.
+
+Optional arguments are DEFAULT-VALUE and INITIAL-INPUT.
+DEFAULT-VALUE, if non-nil, is the default value.
+INITIAL-INPUT, if non-nil, is a string inserted in the minibuffer initially.
+See the documentation of the function `completing-read' for the
+detailed meanings of these arguments."
+ (let* ((table (append (mapcar (function (lambda (x) (list (symbol-name x))))
+ charset-list)
+ (mapcar (function (lambda (x)
+ (list (symbol-name (car x)))))
+ non-iso-charset-alist)))
+ (charset (completing-read prompt table
+ nil t initial-input 'charset-history
+ default-value)))
+ (if (> (length charset) 0)
+ (intern charset))))
+
+
+;; List characters of the range MIN and MAX of CHARSET. If dimension
+;; of CHARSET is two (i.e. 2-byte charset), ROW is the first byte
+;; (block index) of the characters, and MIN and MAX are the second
+;; bytes of the characters. If the dimension is one, ROW should be 0.
+;; For a non-ISO charset, CHARSET is a translation table (symbol) or a
+;; function to get Emacs' character codes that corresponds to the
+;; characters to list.
+
+(defun list-block-of-chars (charset row min max)
+ (let (i ch)
+ (insert-char ?- (+ 4 (* 3 16)))
+ (insert "\n ")
+ (setq i 0)
+ (while (< i 16)
+ (insert (format "%3X" i))
+ (setq i (1+ i)))
+ (setq i (* (/ min 16) 16))
+ (while (<= i max)
+ (if (= (% i 16) 0)
+ (insert (format "\n%3Xx" (/ (+ (* row 256) i) 16))))
+ (setq ch (cond ((< i min)
+ 32)
+ ((charsetp charset)
+ (if (= row 0)
+ (make-char charset i)
+ (make-char charset row i)))
+ ((and (symbolp charset) (get charset 'translation-table))
+ (aref (get charset 'translation-table) i))
+ (t (funcall charset (+ (* row 256) i)))))
+ (if (and (char-table-p charset)
+ (or (< ch 32) (and (>= ch 127) (<= ch 255))))
+ ;; Don't insert a control code.
+ (setq ch 32))
+ (indent-to (+ (* (% i 16) 3) 6))
+ (insert ch)
+ (setq i (1+ i))))
+ (insert "\n"))
+
+
+;; List all characters in ISO charset CHARSET.
+
+(defun list-iso-charset-chars (charset)
+ (let ((dim (charset-dimension charset))
+ (chars (charset-chars charset))
+ (plane (charset-iso-graphic-plane charset))
+ min max)
+ (insert (format "Characters in the charset %s.\n" charset))
+
+ (cond ((eq charset 'eight-bit-control)
+ (setq min 128 max 159))
+ ((eq charset 'eight-bit-graphic)
+ (setq min 160 max 255))
+ (t
+ (if (= chars 94)
+ (setq min 33 max 126)
+ (setq min 32 max 127))
+ (or (= plane 0)
+ (setq min (+ min 128) max (+ max 128)))))
+
+ (if (= dim 1)
+ (list-block-of-chars charset 0 min max)
+ (let ((i min))
+ (while (<= i max)
+ (list-block-of-chars charset i min max)
+ (setq i (1+ i)))))))
+
+
+;; List all characters in non-ISO charset CHARSET.
+
+(defun list-non-iso-charset-chars (charset)
+ (let* ((slot (assq charset non-iso-charset-alist))
+ (charsets (nth 1 slot))
+ (translate-method (nth 2 slot))
+ (ranges (nth 3 slot))
+ range)
+ (or slot
+ (error "Unknown external charset: %s" charset))
+ (insert (format "Characters in non-ISO charset %s.\n" charset))
+ (insert "They are mapped to: "
+ (mapconcat #'symbol-name charsets ", ")
+ "\n")
+ (while ranges
+ (setq range (car ranges) ranges (cdr ranges))
+ (if (integerp (car range))
+ ;; The form of RANGES is (FROM1 TO1 FROM2 TO2 ...).
+ (while range
+ (list-block-of-chars translate-method
+ 0 (car range) (nth 1 range))
+ (setq range (nthcdr 2 range)))
+ ;; The form of RANGES is ((FROM1-1 TO1-1 ...) . (FROM2-1 TO2-1 ...)).
+ (let ((row-range (car range))
+ row row-max
+ col-range col col-max)
+ (while row-range
+ (setq row (car row-range) row-max (nth 1 row-range)
+ row-range (nthcdr 2 row-range))
+ (while (<= row row-max)
+ (setq col-range (cdr range))
+ (while col-range
+ (setq col (car col-range) col-max (nth 1 col-range)
+ col-range (nthcdr 2 col-range))
+ (list-block-of-chars translate-method row col col-max))
+ (setq row (1+ row)))))))))
+
+
+;;;###autoload
+(defun list-charset-chars (charset)
+ "Display a list of characters in the specified character set."
+ (interactive (list (read-charset "Character set: ")))
+ (with-output-to-temp-buffer "*Help*"
+ (with-current-buffer standard-output
+ (set-buffer-multibyte t)
+ (cond ((charsetp charset)
+ (list-iso-charset-chars charset))
+ ((assq charset non-iso-charset-alist)
+ (list-non-iso-charset-chars charset))
+ (t
+ (error "Invalid charset %s" charset))))))
+
+
+;;;###autoload
+(defun describe-character-set (charset)
+ "Display information about character set CHARSET."
+ (interactive (list (let ((non-iso-charset-alist nil))
+ (read-charset "Charset: "))))
+ (or (charsetp charset)
+ (error "Invalid charset: %S" charset))
+ (let ((info (charset-info charset)))
+ (with-output-to-temp-buffer "*Help*"
+ (save-excursion
+ (set-buffer standard-output)
+ (insert "Character set: " (symbol-name charset)
+ (format " (ID:%d)\n\n" (aref info 0)))
+ (insert (aref info 13) "\n\n") ; description
+ (insert "number of contained characters: "
+ (if (= (aref info 2) 1)
+ (format "%d\n" (aref info 3))
+ (format "%dx%d\n" (aref info 3) (aref info 3))))
+ (insert "the final char of ISO2022's designation sequence: ")
+ (if (>= (aref info 8) 0)
+ (insert (format "`%c'\n" (aref info 8)))
+ (insert "not assigned\n"))
+ (insert (format "width (how many columns on screen): %d\n"
+ (aref info 4)))
+ (insert (format "internal multibyte sequence: %s\n"
+ (charset-multibyte-form-string charset)))
+ (let ((coding (plist-get (aref info 14) 'preferred-coding-system)))
+ (when coding
+ (insert (format "preferred coding system: %s\n" coding))
+ (search-backward (symbol-name coding))
+ (help-xref-button 0 #'describe-coding-system coding
+ "mouse-2, RET: describe this coding system")))
+ (help-setup-xref (list #'describe-character-set charset)
+ (interactive-p))
+ ))))
+
+;;;###autoload
+(defun describe-char-after (&optional pos)
+ "Display information about the character at POS in the current buffer.
+POS defaults to point.
+The information includes character code, charset and code points in it,
+syntax, category, how the character is encoded in a file,
+which font is being used for displaying the character."
+ (interactive)
+ (or pos
+ (setq pos (point)))
+ (if (>= pos (point-max))
+ (error "No character at point"))
+ (let* ((char (char-after pos))
+ (charset (char-charset char))
+ (composition (find-composition (point) nil nil t))
+ (composed (if composition (buffer-substring (car composition)
+ (nth 1 composition))))
+ (multibyte-p enable-multibyte-characters)
+ item-list max-width)
+ (if (eq charset 'unknown)
+ (setq item-list
+ `(("character"
+ ,(format "%s (0%o, %d, 0x%x) -- invalid character code"
+ (if (< char 256)
+ (single-key-description char)
+ (char-to-string char))
+ char char char))))
+ (setq item-list
+ `(("character"
+ ,(format "%s (0%o, %d, 0x%x)" (if (< char 256)
+ (single-key-description char)
+ (char-to-string char))
+ char char char))
+ ("charset"
+ ,(symbol-name charset)
+ ,(format "(%s)" (charset-description charset)))
+ ("code point"
+ ,(let ((split (split-char char)))
+ (if (= (charset-dimension charset) 1)
+ (format "%d" (nth 1 split))
+ (format "%d %d" (nth 1 split) (nth 2 split)))))
+ ("syntax"
+ ,(nth 2 (assq (char-syntax char) syntax-code-table)))
+ ("category"
+ ,@(let ((category-set (char-category-set char)))
+ (if (not category-set)
+ '("-- none --")
+ (mapcar #'(lambda (x) (format "%c:%s "
+ x (category-docstring x)))
+ (category-set-mnemonics category-set)))))
+ ("buffer code"
+ ,(encoded-string-description
+ (string-as-unibyte (char-to-string char)) nil))
+ ("file code"
+ ,@(let* ((coding buffer-file-coding-system)
+ (encoded (encode-coding-char char coding)))
+ (if encoded
+ (list (encoded-string-description encoded coding)
+ (format "(encoded by coding system %S)" coding))
+ (list "not encodable by coding system"
+ (symbol-name coding)))))
+ ,(if (display-graphic-p (selected-frame))
+ (list "font" (or (internal-char-font (point))
+ "-- none --"))
+ (list "terminal code"
+ (let* ((coding (terminal-coding-system))
+ (encoded (encode-coding-char char coding)))
+ (if encoded
+ (encoded-string-description encoded coding)
+ "not encodable")))))))
+ (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x)))
+ item-list)))
+ (with-output-to-temp-buffer "*Help*"
+ (save-excursion
+ (set-buffer standard-output)
+ (set-buffer-multibyte multibyte-p)
+ (let ((formatter (format "%%%ds:" max-width)))
+ (dolist (elt item-list)
+ (insert (format formatter (car elt)))
+ (dolist (clm (cdr elt))
+ (when (>= (+ (current-column) (string-width clm) 1)
+ (frame-width))
+ (insert "\n")
+ (indent-to (1+ max-width)))
+ (insert " " clm))
+ (insert "\n")))
+ (when composition
+ (insert "\nComposed with the following character(s) "
+ (mapconcat (lambda (x) (format "`%c'" x))
+ (substring composed 1)
+ ", ")
+ " to form `" composed "'")
+ (if (nth 3 composition)
+ (insert ".\n")
+ (insert "\nby the rule ("
+ (mapconcat (lambda (x)
+ (format (if (consp x) "%S" "?%c") x))
+ (nth 2 composition)
+ " ")
+ ").\n"
+ "See the variable `reference-point-alist' for the meaning of the rule.\n")))
+ ))))
+