]> code.delx.au - gnu-emacs/blob - share/emacs/site-lisp/w3m/w3m-ccl.el
epa-file: suppress file-locking question on M-x revert-buffer
[gnu-emacs] / share / emacs / site-lisp / w3m / w3m-ccl.el
1 ;;; w3m-ccl.el --- CCL programs to process Unicode and internal characters.
2
3 ;; Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007
4 ;; TSUCHIYA Masatoshi <tsuchiya@namazu.org>
5
6 ;; Authors: TSUCHIYA Masatoshi <tsuchiya@namazu.org>,
7 ;; ARISAWA Akihiro <ari@mbf.sphere.ne.jp>
8 ;; Keywords: w3m, WWW, hypermedia
9
10 ;; This file is a part of emacs-w3m.
11
12 ;; This program is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; any later version.
16
17 ;; This program is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with this program; see the file COPYING. If not, write to
24 ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
26
27 ;;; Commentary:
28
29 ;; This file contains CCL programs to process Unicode and internal
30 ;; characters of w3m. For more detail about emacs-w3m, see:
31 ;;
32 ;; http://emacs-w3m.namazu.org/
33
34 ;;; MEMO:
35
36 ;; It is possible to support multi scripts without Mule-UCS. For more
37 ;; detail, see [emacs-w3m:01950]
38
39 ;;; Code:
40
41 (eval-and-compile
42 (cond
43 ((featurep 'xemacs)
44 (require 'pccl))
45 (t
46 (require 'ccl))))
47
48 ;;; CCL programs:
49
50 (eval-when-compile
51 (when (and (not (fboundp 'charset-id))
52 (fboundp 'charset-id-internal))
53 (defmacro charset-id (charset)
54 "Return charset identification number of CHARSET."
55 `(charset-id-internal ,charset))))
56
57 (eval-and-compile
58 (defconst w3m-internal-characters-alist
59 '((?\x90 . ? ) ; ANSP (use for empty anchor)
60 (?\x91 . ? ) ; IMSP (blank around image)
61 (?\xa0 . ? )) ; NBSP (non breakble space)
62 "Alist of internal characters v.s. ASCII characters.")
63
64 (defun w3m-ccl-write-repeat (charset &optional r0 r1)
65 (unless r0
66 (setq r0 'r0))
67 (unless r1
68 (setq r1 (if (eq r0 'r1) 'r0 'r1)))
69 (let ((unibyte (memq charset '(latin-iso8859-1 katakana-jisx0201))))
70 (if (fboundp 'ccl-compile-write-multibyte-character)
71 `((,r1 &= ?\x7f)
72 ,@(unless unibyte
73 `((,r1 |= ((,r0 & ?\x7f) << 7))))
74 (,r0 = ,(charset-id charset))
75 (write-multibyte-character ,r0 ,r1)
76 (repeat))
77 `((write ,(charset-id charset))
78 ,@(unless unibyte
79 `((write ,r0)))
80 (write-repeat ,r1)))))
81
82 (defconst w3m-ccl-write-euc-japan-character
83 (when (fboundp 'ccl-compile-read-multibyte-character)
84 `((read-multibyte-character r1 r0)
85 (if (r1 == ,(charset-id 'ascii))
86 ;; (1) ASCII characters
87 (write-repeat r0))
88 (if (r1 == ,(charset-id 'latin-jisx0201))
89 ;; (2) Latin Part of Japanese JISX0201.1976
90 ;; Convert to ASCII
91 (write-repeat r0))
92 (r2 = (r1 == ,(charset-id 'japanese-jisx0208-1978)))
93 (if ((r1 == ,(charset-id 'japanese-jisx0208)) | r2)
94 ;; (3) Characters of Japanese JISX0208.
95 ((r1 = ((r0 & 127) | 128))
96 (r0 = ((r0 >> 7) | 128))
97 (write r0)
98 (write-repeat r1)))
99 (if (r1 == ,(charset-id 'katakana-jisx0201))
100 ;; (4) Katakana Part of Japanese JISX0201.1976
101 ((r0 |= 128)
102 (write ?\x8e)
103 (write-repeat r0)))))
104 "CCL program to write characters represented in `euc-japan'.")
105
106 (defconst w3m-ccl-write-iso-latin-1-character
107 (when (fboundp 'ccl-compile-read-multibyte-character)
108 `((read-multibyte-character r1 r0)
109 (if (r1 == ,(charset-id 'ascii))
110 ;; (1) ASCII characters
111 (write-repeat r0))
112 (if (r1 == ,(charset-id 'latin-jisx0201))
113 ;; (2) Latin Part of Japanese JISX0201.1976
114 ;; Convert to ASCII
115 (write-repeat r0))
116 (if (r1 == ,(charset-id 'latin-iso8859-1))
117 ;; (3) Latin-1 characters
118 ((r0 |= ?\x80)
119 (write-repeat r0)))))
120 "CCL program to write characters represented in `iso-latin-1'.")
121
122 (defconst w3m-ccl-generate-ncr
123 `((r1 = 0)
124 (r2 = 0)
125 (loop
126 (r1 = (r1 << 4))
127 (r1 |= (r0 & 15))
128 (r0 = (r0 >> 4))
129 (if (r0 == 0)
130 (break)
131 ((r2 += 1)
132 (repeat))))
133 (write "&#x")
134 (loop
135 (branch (r1 & 15)
136 ,@(mapcar
137 (lambda (i)
138 (list 'write (string-to-char (format "%x" i))))
139 '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)))
140 (r1 = (r1 >> 4))
141 (if (r2 == 0)
142 ((write ?\;)
143 (break))
144 ((r2 -= 1)
145 (repeat))))
146 (repeat))
147 "CCL program to generate a string which represents a UCS codepoint
148 in NCR (Numeric Character References)."))
149
150 (define-ccl-program w3m-euc-japan-decoder
151 `(2
152 (loop
153 (read r0)
154 ;; Process normal EUC characters.
155 (if (r0 < ?\x80)
156 (write-repeat r0))
157 (if (r0 > ?\xa0)
158 ((read r1)
159 ,@(w3m-ccl-write-repeat 'japanese-jisx0208)))
160 (if (r0 == ?\x8e)
161 ((read r1)
162 ,@(w3m-ccl-write-repeat 'katakana-jisx0201)))
163 (if (r0 == ?\x8f)
164 ((read r0)
165 (read r1)
166 ,@(w3m-ccl-write-repeat 'japanese-jisx0212)))
167 ;; Process internal characters used in w3m.
168 ,@(mapcar (lambda (pair)
169 `(if (r0 == ,(car pair))
170 (write-repeat ,(cdr pair))))
171 w3m-internal-characters-alist)
172 (write-repeat r0))))
173
174 (unless (get 'w3m-euc-japan-encoder 'ccl-program-idx)
175 (define-ccl-program w3m-euc-japan-encoder
176 `(1 (loop (read r0) (write-repeat r0)))))
177
178 (define-ccl-program w3m-iso-latin-1-decoder
179 `(2
180 (loop
181 (read r0)
182 ;; Process ASCII characters.
183 (if (r0 < ?\x80)
184 (write-repeat r0))
185 ;; Process Latin-1 characters.
186 (if (r0 > ?\xa0)
187 (,@(w3m-ccl-write-repeat 'latin-iso8859-1 'r1)))
188 ;; Process internal characters used in w3m.
189 ,@(mapcar (lambda (pair)
190 `(if (r0 == ,(car pair))
191 (write-repeat ,(cdr pair))))
192 w3m-internal-characters-alist)
193 (write-repeat r0))))
194
195 (unless (get 'w3m-iso-latin-1-encoder 'ccl-program-idx)
196 (define-ccl-program w3m-iso-latin-1-encoder
197 `(1 (loop (read r0) (write-repeat r0)))))
198
199
200 (provide 'w3m-ccl)
201
202 ;;; w3m-ccl.el ends here