]> code.delx.au - gnu-emacs/blob - lisp/term/internal.el
term/internal.el (IT-display-table-setup): Don't overstep
[gnu-emacs] / lisp / term / internal.el
1 ;;; internal.el --- support for PC internal terminal -*- coding: raw-text; -*-
2
3 ;; Copyright (C) 1993, 1994, 1998 Free Software Foundation, Inc.
4
5 ;; Author: Morten Welinder <terra@diku.dk>
6
7 ;; This file is part of GNU Emacs.
8
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; any later version.
13
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
18
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
23
24 ;;; Code:
25
26 ;; ---------------------------------------------------------------------------
27 ;; keyboard setup -- that's simple!
28 (set-input-mode nil nil 0)
29 (define-key function-key-map [backspace] "\177") ; Normal behaviour for BS
30 (define-key function-key-map [delete] "\C-d") ; ... and Delete
31 (define-key function-key-map [tab] [?\t])
32 (define-key function-key-map [linefeed] [?\n])
33 (define-key function-key-map [clear] [11])
34 (define-key function-key-map [return] [13])
35 (define-key function-key-map [escape] [?\e])
36 (define-key function-key-map [M-backspace] [?\M-\d])
37 (define-key function-key-map [M-delete] [?\M-\d])
38 (define-key function-key-map [M-tab] [?\M-\t])
39 (define-key function-key-map [M-linefeed] [?\M-\n])
40 (define-key function-key-map [M-clear] [?\M-\013])
41 (define-key function-key-map [M-return] [?\M-\015])
42 (define-key function-key-map [M-escape] [?\M-\e])
43 (put 'backspace 'ascii-character 127)
44 (put 'delete 'ascii-character 127)
45 (put 'tab 'ascii-character ?\t)
46 (put 'linefeed 'ascii-character ?\n)
47 (put 'clear 'ascii-character 12)
48 (put 'return 'ascii-character 13)
49 (put 'escape 'ascii-character ?\e)
50
51 ;; ----------------------------------------------------------------------
52 ;; DOS display setup
53 ;; =================
54 ;;
55 ;; DOS can only support a single font. On most systems (with the
56 ;; possible exception of Far Eastern DOS versions), this means that
57 ;; two character sets are available at any given time: the ASCII
58 ;; charset, and a single national charset, usually mapped to codes
59 ;; above 128 (i.e., with 8th bit set). Which national charset is
60 ;; supported depends on the codepage loaded by the system when it
61 ;; boots; usually, this codepage cannot be changed without
62 ;; rebooting.
63 ;;
64 ;; Since each codepage can usually display character of a single
65 ;; MULE charset, Emacs can display a single MULE charset with the
66 ;; glyphs of the current codepage. The mapping from DOS codepages
67 ;; to MULE charsets is established by the charset property of the
68 ;; cpNNN-decode-table variables in codepage.el, which also
69 ;; defines translation tables for each such pair, and a bunch of
70 ;; functions to generate coding systems that use those translation
71 ;; tables to convert codepage-encoded text to the appropriate MULE
72 ;; charset and back. When Emacs starts on DOS, it automatically
73 ;; sets its default coding systems for file I/O and terminal output
74 ;; according to the currend DOS codepage, given by the
75 ;; `dos-codepage' variable.
76 ;;
77 ;; This leaves us with the problem of displaying character sets
78 ;; other than the one which maps directly into the current codepage.
79 ;; The following functions and variables handle this nuisance by
80 ;; defining a display table where each character that doesn't have a
81 ;; glyph in some codepage is mapped to a string which represents it.
82 ;; For example, a small c with cedilla is mapped to the string
83 ;; "{,c}" (the braces serve as a sign that this is a single
84 ;; character). A nice feature of the display tables is that Emacs
85 ;; knows that the string represents a single character, and thus
86 ;; cursor motion works as you'd expect: a single `C-f' moves past
87 ;; the entire string which represents a single character.
88 ;; ----------------------------------------------------------------------
89
90 (defvar IT-character-translations
91 '(
92 (latin-iso8859-1
93 . [255 "!I" "|c" "Pd" "$$" "Ye" "|" "SE" "\"" "(c)"
94 "_a" "<<" "~" "--" "(R)" "'-" "^o" "+-" "^2" "^3"
95 "'" "u" ".P" "^." "'," "^1" "_o" ">>" "1/4" "1/2"
96 "3/4" "?I" "`A" "A'" "A^" "~A" "\"A" "Ao" "AE" ",C"
97 "`E" "E'" "E^" "\"E" "`I" "I'" "I^" "\"I" "D-" "~N"
98 "`O" "O'" "O^" "~O" "\"O" "*x" "/O" "`U" "U'" "U^"
99 "\"U" "Y'" "TH" "ss" "`a" "a'" "a^" "~a" "\"a" "ao"
100 "ae" ",c" "`e" "e'" "e^" "\"e" "`i" "i'" "i^" "\"i"
101 "d-" "~n" "`o" "o'" "o^" "~o" "\"o" "-:" "/o" "`u"
102 "u'" "u^" "\"u" "y'" "th" "\"y"]
103 )
104 (latin-iso8859-2
105 . [255 "A;" "'(" "/L" "$$" "L<" "S'" "SE" "\"" "S<"
106 ",S" "T<" "Z'" "--" "Z<" "Z^." "^o" "a;" "';" "/l"
107 "'" "l<" "s'" "'<" "'," "s<" ",s" "t<" "z'" "'"
108 "z<" "z^." "R'" "A'" "A^" "A(" "\"A" "L'" "C'" ",C"
109 "C<" "E'" "E;" "E:" "E<" "I'" "I^" "D<" "/D" "N'"
110 "N<" "O'" "O^" "O''" "\"O" "*x" "R<" "U^0" "U'" "U''"
111 "\"U" "Y'" ",T" "ss" "r'" "a'" "a^" "a(" "\"a" "l'"
112 "c'" ",c" "c<" "e'" "e;" "\"e" "e<" "i'" "i^" "d<"
113 "/d" "n'" "n<" "o'" "o^" "o''" "\"o" "-:" "r<" "u^0"
114 "u'" "u''" "\"u" "y'" ",t" "'."]
115 )
116 (latin-iso8859-3
117 . [255 "/H" "'(" "Pd" "$$" " " "H^" "SE" "\"" "I^."
118 ",S" "G(" "J^" "--" " " "Z^." "^o" "/h" "^2" "^3"
119 "'" "u" "h^" "." "'," "i^." ",s" "g(" "j^" "1/2"
120 " " "z^." "`A" "A'" "A^" " " "\"A" "C^." "C^" ",C"
121 "`E" "E'" "E^" "\"E" "`I" "I'" "I^" "\"I" " " "~N"
122 "`O" "O'" "O^" "G^." "\"O" "*x" "G^" "`U" "U'" "U^"
123 "\"U" "U(" "S^" "ss" "`a" "a'" "a^" " " "\"a" "c^."
124 "c^" ",c" "`e" "e'" "e^" "\"e" "`i" "i'" "i^" "\"i"
125 " " "~n" "`o" "o'" "o^" "g^." "\"o" "-:" "g^" "`u"
126 "u'" "u^" "\"u" "u(" "s^" "^."]
127 )
128 (latin-iso8859-4
129 . [255 "A;" "kk" ",R" "$$" "?I" ",L" "SE" "\"" "S<"
130 "E-" ",G" "/T" "--" "Z<" "'-" "^o" "a;" "';" ",r"
131 "'" "~i" ",l" "'<" "'," "s<" "e-" ",g" "/t" "NG"
132 "z<" "ng" "A-" "A'" "A^" "~A" "\"A" "Ao" "AE" "I;"
133 "C<" "E'" "E;" "\"E" "E^." "I'" "I^" "I-" "/D" ",N"
134 "O-" ",K" "O^" "~O" "\"O" "*x" "/O" "U;" "U'" "U^"
135 "\"U" "~U" "U-" "ss" "a-" "a'" "a^" "~a" "\"a" "ao"
136 "ae" "i;" "c<" "e'" "e;" "\"e" "e^." "i'" "i^" "i-"
137 "/d" ",n" "o-" ",k" "o^" "~o" "\"o" "-:" "/o" "u;"
138 "u'" "u^" "\"u" "~u" "u-" "^."]
139 )
140 (cyrillic-iso8859-5
141 . [255 "\"E" "Dj" "Gj" "IE" "Dz" "Ii" "Ji" "JE" "Lj"
142 "Nj" "Ts" "Kj" 240 "V%" "Dzh" 65 "B=" 66 226
143 68 69 "Z%" 51 85 "J=" 75 "L=" 77 72
144 79 "P=" 80 67 84 89 232 88 "C=" "C%"
145 "S%" "Sc" "=\"" "Y=" "%\"" "Ee" "Yu" "Ya" 97 98
146 "v=" "g=" 103 101 "z%" "z=" 117 "j=" 107 "l="
147 "m=" "n=" 111 110 112 99 "t=" 121 "f=" 120
148 "c=" "c%" "s%" "sc" "='" "y=" "%'" "ee" "yu" "ya"
149 "N0" "\"e" "dj" "gj" "ie" "dz" "ii" "ji" "je" "lj"
150 "nj" "ts" "kj" 21 "v%" "dzh"]
151 )
152 (arabic-iso8859-6
153 . [255 nil nil nil "$$" nil nil nil nil nil
154 nil nil ",+" "--" nil nil nil nil nil nil
155 nil nil nil nil nil nil nil ";+" nil nil
156 nil "?+" nil "H'" "aM" "aH" "wH" "ah" "yH"
157 "a+" "b+" "tm" "t+" "tk" "g+" "hk" "x+" "d+" "dk"
158 "r+" "z+" "s+" "sn" "c+" "dd" "tj" "zH" "e+" "i+"
159 nil nil nil nil nil "++" "f+" "q+" "k+" "l+"
160 "m+" "n+" "h+" "w+" "j+" "y+" ":+" "\"+" "=+" "/+"
161 "'+" "1+" "3+" "0+" nil nil nil nil nil nil
162 nil nil nil nil nil nil nil]
163 )
164 (greek-iso8859-7
165 . [255 "9'" "'9" "Pd" nil nil "|" "SE" "\"" "(c)"
166 nil "<<" "~" "--" nil "-M" "^o" "+-" "^2" "^3"
167 "'" "'%" "A%" "^." "E%" "Y%" "I%" ">>" "O%" "1/2"
168 "U%" "W%" "i3" "A*" "B*" "G*" "D*" "E*" "Z*" "Y*"
169 "H*" "I*" "K*" "L*" "M*" "N*" "C*" "O*" "P*" "R*"
170 nil "S*" "T*" "U*" "F*" "X*" "Q*" "W*" "J*" "V*"
171 "a%" "e%" "y%" "i%" "u3" "a*" "b*" "g*" "d*" "e*"
172 "z*" "y*" "h*" "i*" "k*" "l*" "m*" "n*" "c*" "o*"
173 "p*" "r*" "*s" "s*" "t*" "u*" "f*" "x*" "q*" "w*"
174 "j*" "v*" "o%" "u%" "w%" nil]
175 )
176 (hebrew-iso8859-8
177 . [255 nil "|c" "Pd" "$$" "Ye" "|" "SE" "\"" "(c)"
178 "*x" "<<" "~" "--" "(R)" "'-" "^o" "+-" "^2" "^3"
179 "'" "u" ".P" "^." "'," "^1" "-:" ">>" "1/4" "1/2"
180 "3/4" nil nil nil nil nil nil nil nil nil
181 nil nil nil nil nil nil nil nil nil nil
182 nil nil nil nil nil nil nil nil nil nil
183 nil nil nil "=2" "A+" "B+" "G+" "D+" "H+" "W+"
184 "Z+" "X+" "Tj" "J+" "K%" "K+" "L+" "M%" "M+" "N%"
185 "N+" "S+" "E+" "P%" "P+" "Zj" "ZJ" "Q+" "R+" "Sh"
186 "T+" nil nil nil nil nil]
187 )
188 (latin-iso8859-9
189 . [255 "!I" "|c" "Pd" "$$" "Ye" "|" "SE" "\"" "(c)"
190 "_a" "<<" "~" "--" "(R)" "'-" "^o" "+-" "^2" "^3"
191 "'" "u" ".P" "^." "'," "^1" "_o" ">>" "1/4" "1/2"
192 "3/4" "?I" "`A" "A'" "A^" "~A" "\"A" "Ao" "AE" ",C"
193 "`E" "E'" "E^" "\"E" "`I" "I'" "I^" "\"I" "G(" "~N"
194 "`O" "O'" "O^" "~O" "\"O" "*x" "/O" "`U" "U'" "U^"
195 "\"U" "I^." ",S" "ss" "`a" "a'" "a^" "~a" "\"a" "ao"
196 "ae" ",c" "`e" "e'" "e^" "\"e" "e^." "i'" "i^" "i-"
197 "g(" "~n" "`o" "o'" "o^" "~o" "\"o" "-:" "/o" "`u"
198 "u'" "u^" "\"u" "i^." ",s" "\"y"]
199 )
200 )
201 "An alist of MULE ISO-8859 character sets and the strings that
202 should be used to represent the characters from each set on a DOS
203 terminal which does not have corresponding glyphs built into the
204 installed codepage.")
205
206 (defun IT-display-table-setup (codepage &optional table)
207 "Set up display table TABLE for a DOS terminal which supports
208 glyphs built into the codepage CODEPAGE.
209
210 If TABLE is nil or omitted, `standard-display-table' is used."
211 (let* ((surrogates IT-character-translations)
212 (disp-tab (or table standard-display-table))
213 (built-in-set (cp-charset-for-codepage codepage))
214 (offset (cp-offset-for-codepage codepage))
215 (cp-decoder
216 (symbol-value (intern-soft (format "%s-decode-table" codepage))))
217 (cp-decoder-len (length cp-decoder))
218 (c offset)
219 association chset)
220 ;; Undo the effects of previous call (where they may have used
221 ;; a different codepage) by reverting the display table for the
222 ;; built-in charset to its pristine shape.
223 (while (< c 256)
224 (aset disp-tab (make-char built-in-set c) nil)
225 (setq c (1+ c)))
226 (while surrogates
227 (setq association (car surrogates))
228 (setq chset (car association))
229 (let* ((vector (cdr association))
230 (veclen (length vector))
231 (i 0)
232 glyph)
233 (while (< i veclen)
234 (setq glyph (aref vector i))
235 (if (and glyph
236 (or (not (equal chset built-in-set))
237 (>= i cp-decoder-len)
238 (null (aref cp-decoder i))))
239 (aset disp-tab (make-char chset (+ i (logand offset 127)))
240 (vconcat
241 (if (numberp glyph)
242 (char-to-string glyph)
243 (if (> (length glyph) 1) (concat "{" glyph "}")
244 glyph)))))
245 (setq i (1+ i))))
246 (setq surrogates (cdr surrogates)))))
247
248 (defun dos-cpNNN-setup (codepage)
249 "Set up the MULE environment using the DOS codepage CODEPAGE.
250
251 This function creates the coding system cpNNN (where NNN is the value
252 of the argument CODEPAGE), and then uses this coding system to set up
253 display tables, and the language environment options as appropriate."
254 (let* ((cp (format "cp%s" codepage))
255 (charset (cp-charset-for-codepage cp))
256 (offset (cp-offset-for-codepage cp)))
257 (cp-make-coding-systems-for-codepage cp charset offset)
258 ;; This is done by set-language-environment.
259 ;;(setq nonascii-translation-table
260 ;; (symbol-value (intern (concat cp "-nonascii-translation-table"))))
261 (set-language-environment (cp-language-for-codepage cp))
262 (set-default-coding-systems (intern (concat cp "-dos")))
263 (set-selection-coding-system (intern (concat cp "-dos")))
264 (set-terminal-coding-system
265 (setq default-terminal-coding-system (intern (concat cp
266 "-unix"))))
267 (IT-display-table-setup cp)
268 (prefer-coding-system (intern (concat cp "-dos")))
269 (if default-enable-multibyte-characters
270 ;; We want this in multibyte version only, since unibyte version
271 ;; should not convert non-ASCII characters at all.
272 (setq unibyte-display-via-language-environment t)
273 ;; Let the unibyte version behave as Emacs 19 did. In particular,
274 ;; let it use and display native codepage-specific glyphs for
275 ;; non-ASCII characters. For this to work correctly, we need to
276 ;; establish the correspondence between lower-case letters and their
277 ;; upper-case brethren, as appropriate for the codepage in use. The
278 ;; code below makes this happen.
279 ;; (In the multibyte mode, the appropriate tables are prepared
280 ;; elsewhere, since multibyte Emacs uses normal MULE character sets,
281 ;; which are supported on all platforms.)
282 (let* ((i 128)
283 (modify (function
284 (lambda (ch sy)
285 (modify-syntax-entry ch sy text-mode-syntax-table)
286 (if (boundp 'tex-mode-syntax-table)
287 (modify-syntax-entry ch sy tex-mode-syntax-table))
288 (modify-syntax-entry ch sy (standard-syntax-table))
289 )))
290 (table (standard-case-table))
291 ;; The following are strings of letters, first lower then
292 ;; upper case. This will look funny on terminals which
293 ;; display other code pages. In particular, what is
294 ;; displayed as blanks or triangles are not what they
295 ;; look lile at all! (Use `C-x =' to see what they
296 ;; really are.)
297 (chars
298 (cond
299 ((= codepage 850)
300 "\87\80\81\9a\82\90\83\84\8e\85·\86\8fÆÇ µ\88Ò\89Ó\8aÔ\8bØ\8c×\8dÞ¡Ö\91\92\93â\94\99\95ã¢à\9b\9d\96ê£é\97ë\98Yìí¡I£é¤¥ÐÑçè")
301 ((= codepage 865)
302 "\87\80\81\9a\82\90\83A\84\8e\85A\86\8f\88E\89E\8aE\8bI\8cI\8dI\91\92\93O\94\99\95O\96U£U\98Y\9b\9d A¡I¢O£U¤¥")
303 ;; default is 437
304 (t "\87\80\81\9a\82\90\83A\84\8e\85A\86\8f\88E\89E\8aE\8bI\8cI\8dI\91\92\93O\94\99\95O\96U£U\98Y A¡I¢O£U¤¥"))))
305
306 (while (< i 256)
307 (funcall modify i "_")
308 (setq i (1+ i)))
309
310 (setq i 0)
311 (while (< i (length chars))
312 (let ((ch1 (aref chars i))
313 (ch2 (aref chars (1+ i))))
314 (if (> ch2 127)
315 (set-case-syntax-pair ch2 ch1 table))
316 (setq i (+ i 2))))
317 (save-excursion
318 (mapcar (lambda (b) (set-buffer b) (set-case-table table))
319 (buffer-list)))
320 (set-standard-case-table table)))
321 ;; Some codepages have sporadic support for Latin-1, Greek, and
322 ;; symbol glyphs, which don't belong to their native character
323 ;; set. It's a nuisance to have all those glyphs here, for all
324 ;; the codepages (for starters, I don't even have references for
325 ;; all the codepages). So I provide a hook for those who want to
326 ;; squeeze every bit of support out of their terminal/font.
327 (run-hooks 'dos-codepage-setup-hook)
328 ))
329
330 ;; FIXME: Korean and Chinese codepages should be added here, but I
331 ;; don't know what coding systems do they support. The codepages in
332 ;; point are 934, 936, 938, 944, and 948.
333 (defvar cjk-codepages-alist
334 '((932 "Japanese" japanese-shift-jis))
335 "An alist of Far-Eastern codepages and the names of the associated
336 language and supported coding system.")
337
338 (defun dos-codepage-setup ()
339 "Set up the MULE environment as appropriate for the installed DOS codepage.
340
341 This function sets coding systems, display tables, and the language
342 environment options as appropriate for the current value of `dos-codepage'.
343
344 This function is automatically run at startup via the `term-setup-hook'
345 list. You can (and should) also run it whenever the value of
346 `dos-codepage' changes."
347 (interactive)
348 (let* ((desc (cdr (assq dos-codepage cjk-codepages-alist)))
349 (lang (car desc))
350 (coding (car (cdr desc)))
351 coding-dos coding-unix)
352 (if (null desc)
353 (dos-cpNNN-setup dos-codepage)
354 ;; We've got one of the Far-Eastern codepages which support
355 ;; MULE native coding systems directly.
356 (setq coding-dos (intern (format "%s-dos" coding))
357 coding-unix (intern (format "%s-unix" coding)))
358 (set-language-environment (car desc))
359 (set-selection-coding-system coding-dos)
360 (setq file-name-coding-system coding-unix)
361 (set-terminal-coding-system
362 (setq default-terminal-coding-system coding-unix))
363 ;; Assume they support non-ASCII Latin characters like the IBM
364 ;; codepage 437 does.
365 (IT-display-table-setup "cp437")
366 (prefer-coding-system coding-dos)
367 (if default-enable-multibyte-characters
368 (setq unibyte-display-via-language-environment t))
369 )))
370
371 ;; We want to delay the terminal and other codepage-related setup
372 ;; until after the terminal is set and user's .emacs is processed,
373 ;; because people might define their `dos-codepage-setup-hook' there.
374 (add-hook 'term-setup-hook 'dos-codepage-setup)
375
376 ;; In multibyte mode, we want unibyte buffers to be displayed using
377 ;; the terminal coding system, so that they display correctly on the
378 ;; DOS terminal; in unibyte mode we want to see all 8-bit characters
379 ;; verbatim. In both cases, we want the entire range of 8-bit
380 ;; characters to arrive at our display code verbatim.
381 (standard-display-8bit 127 255)
382
383 ;;; internal.el ends here
384