1 ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
4 ;; 2008 Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 ;; 2005, 2006, 2007, 2008
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
10 ;; National Institute of Advanced Industrial Science and Technology (AIST)
11 ;; Registration Number H13PRO009
13 ;; Keywords: multilingual, European
15 ;; This file is part of GNU Emacs.
17 ;; GNU Emacs is free software; you can redistribute it and/or modify
18 ;; it under the terms of the GNU General Public License as published by
19 ;; the Free Software Foundation; either version 3, or (at your option)
22 ;; GNU Emacs is distributed in the hope that it will be useful,
23 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 ;; GNU General Public License for more details.
27 ;; You should have received a copy of the GNU General Public License
28 ;; along with GNU Emacs; see the file COPYING. If not, write to the
29 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
30 ;; Boston, MA 02110-1301, USA.
34 ;; For European scripts, all the ISO Latin character sets are
35 ;; supported, along with various others.
39 ;; Latin-1 (ISO-8859-1)
41 (set-language-info-alist
42 "Latin-1" '((charset iso-8859-1)
43 (coding-system iso-latin-1 iso-latin-9 windows-1252)
44 (coding-priority iso-latin-1)
45 (nonascii-translation . iso-8859-1)
46 (unibyte-display . iso-latin-1)
47 (input-method . "latin-1-prefix")
49 . "Hello, Hej, Tere, Hei, Bonjour, Gr
\e$(D+d)N
\e(B Gott, Ciao,
\e$(D"B
\e(BHola!")
51 This language environment is a generic one for the Latin-1 (ISO-8859-1)
52 character set which supports the following European languages:
53 Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese,
54 Finnish, French (with restrictions -- see Latin-9), Frisian, Galician,
55 German, Greenlandic, Icelandic, Irish Gaelic (new orthography),
56 Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic,
57 Scottish Gaelic, Spanish, and Swedish.
58 We also have specific language environments for the following languages:
60 For German, \"German\".
61 For French, \"French\".
62 For Italian, \"Italian\".
63 For Slovenian, \"Slovenian\".
64 For Spanish, \"Spanish\".
66 Latin-1 also covers several written languages outside Europe, including
67 Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans."))
71 ;; Latin-2 (ISO-8859-2)
73 (define-coding-system 'iso-latin-2
74 "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)."
77 :charset-list '(iso-8859-2)
78 :mime-charset 'iso-8859-2)
80 (define-coding-system-alias 'iso-8859-2 'iso-latin-2)
81 (define-coding-system-alias 'latin-2 'iso-latin-2)
83 (set-language-info-alist
84 "Latin-2" '((charset iso-8859-2)
85 (coding-system iso-latin-2 windows-1250)
86 (coding-priority iso-latin-2)
87 (nonascii-translation . iso-8859-2)
88 (unibyte-display . iso-latin-2)
89 (input-method . "latin-2-prefix")
91 This language environment is a generic one for the Latin-2 (ISO-8859-2)
92 character set which supports the following languages:
93 Albanian, Czech, English, German, Hungarian, Polish, Romanian,
94 Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower),
96 We also have specific language environments for the following languages:
98 For Croatian, \"Croatian\".
99 For Polish, \"Polish\".
100 For Romanian, \"Romanian\".
101 For Slovak, \"Slovak\"."))
105 ;; Latin-3 (ISO-8859-3)
107 (define-coding-system 'iso-latin-3
108 "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)."
109 :coding-type 'charset
111 :charset-list '(iso-8859-3)
112 :mime-charset 'iso-8859-3)
114 (define-coding-system-alias 'iso-8859-3 'iso-latin-3)
115 (define-coding-system-alias 'latin-3 'iso-latin-3)
117 (set-language-info-alist
118 "Latin-3" '((charset iso-8859-3)
119 (coding-system iso-latin-3)
120 (coding-priority iso-latin-3)
121 (nonascii-translation . iso-8859-3)
122 (unibyte-display . iso-latin-3)
123 (input-method . "latin-3-prefix")
125 These languages are supported with the Latin-3 (ISO-8859-3) character set:
126 Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician,
127 German, Italian, Maltese, Spanish, and Turkish."))
131 ;; Latin-4 (ISO-8859-4)
133 (define-coding-system 'iso-latin-4
134 "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)."
135 :coding-type 'charset
137 :charset-list '(iso-8859-4)
138 :mime-charset 'iso-8859-4)
140 (define-coding-system-alias 'iso-8859-4 'iso-latin-4)
141 (define-coding-system-alias 'latin-4 'iso-latin-4)
143 (set-language-info-alist
144 "Latin-4" '((charset iso-8859-4)
145 (coding-system iso-8859-4)
146 (coding-priority iso-8859-4)
147 (nonascii-translation . iso-8859-4)
148 (unibyte-display . iso-8859-4)
149 (input-method . "latin-4-postfix")
151 These languages are supported with the Latin-4 (ISO-8859-4) character set:
152 Danish, English, Estonian, Finnish, German, Greenlandic, Lappish,
153 Latvian, Lithuanian, and Norwegian."))
157 ;; Latin-5 (ISO-8859-9)
159 (define-coding-system 'iso-latin-5
160 "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)."
161 :coding-type 'charset
163 :charset-list '(iso-8859-9)
164 :mime-charset 'iso-8859-9)
166 (define-coding-system-alias 'iso-8859-9 'iso-latin-5)
167 (define-coding-system-alias 'latin-5 'iso-latin-5)
169 (set-language-info-alist
170 "Latin-5" '((charset iso-8859-9)
171 (coding-system iso-latin-5)
172 (coding-priority iso-latin-5)
173 (nonascii-translation . iso-8859-9)
174 (unibyte-display . iso-latin-5)
175 (input-method . "latin-5-postfix")
176 (documentation . "Support for Latin-5.\
177 See also the Turkish environment."))
181 ;; Latin-6 (ISO-8859-10)
183 (define-coding-system 'iso-latin-6
184 "ISO 2022 based 8-bit encoding for Latin-6 (MIME:ISO-8859-10)."
185 :coding-type 'charset
187 :charset-list '(iso-8859-10)
188 :mime-charset 'iso-8859-10)
190 (define-coding-system-alias 'iso-8859-10 'iso-latin-6)
191 (define-coding-system-alias 'latin-6 'iso-latin-6)
193 (set-language-info-alist
194 "Latin-6" '((charset iso-8859-10)
195 (coding-system iso-latin-6)
196 (coding-priority iso-latin-6)
197 (nonascii-translation . iso-8859-10)
198 (unibyte-display . iso-latin-6)
199 ;; Fixme: input method.
200 (documentation . "Support for generic Latin-6 (Northern European)."))
204 ;; Latin-7 (ISO-8859-13)
206 (define-coding-system 'iso-latin-7
207 "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)."
208 :coding-type 'charset
210 :charset-list '(iso-8859-13)
211 :mime-charset 'iso-8859-13)
213 (define-coding-system-alias 'iso-8859-13 'iso-latin-7)
214 (define-coding-system-alias 'latin-7 'iso-latin-7)
216 (set-language-info-alist
217 "Latin-7" '((charset iso-8859-13)
218 (coding-system iso-latin-7)
219 (coding-priority iso-latin-7)
220 (nonascii-translation . iso-8859-13)
221 (unibyte-display . iso-latin-7)
222 ;; Fixme: input method.
223 (documentation . "Support for generic Latin-7 (Baltic Rim)."))
226 ;; Latin-8 (ISO-8859-14)
228 (define-coding-system 'iso-latin-8
229 "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)."
230 :coding-type 'charset
231 ;; `W' for `Welsh', since `C' for `Celtic' is taken.
233 :charset-list '(iso-8859-14)
234 :mime-charset 'iso-8859-14)
236 (define-coding-system-alias 'iso-8859-14 'iso-latin-8)
237 (define-coding-system-alias 'latin-8 'iso-latin-8)
239 (set-language-info-alist
240 "Latin-8" '((charset iso-8859-14)
241 (coding-system iso-latin-8)
242 (coding-priority iso-latin-8)
243 (nonascii-translation . iso-8859-14)
244 (unibyte-display . iso-latin-8)
245 (input-method . "latin-8-prefix")
246 ;; Fixme: Welsh/Ga{e}lic greetings
247 (sample-text . "
\e,_"
\e(B
\e$(D+q
\e(B
\e$(D*t
\e(B")
249 This language environment is a generic one for the Latin-8 (ISO-8859-14)
250 character set which supports the Celtic languages, including those not
251 covered by other ISO-8859 character sets:
252 Welsh, Manx Gaelic and Irish Gaelic (old orthography)."))
255 ;; Latin-9 (ISO-8859-15)
257 (define-coding-system 'iso-latin-9
258 "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)."
259 :coding-type 'charset
262 :charset-list '(iso-8859-15)
263 :mime-charset 'iso-8859-15)
265 (define-coding-system-alias 'iso-8859-15 'iso-latin-9)
266 (define-coding-system-alias 'latin-9 'iso-latin-9)
267 (define-coding-system-alias 'latin-0 'iso-latin-9)
269 (set-language-info-alist
270 "Latin-9" '((charset iso-8859-15)
271 (coding-system iso-latin-9)
272 (coding-priority iso-latin-9)
273 (nonascii-translation . iso-8859-15)
274 (unibyte-display . iso-latin-9)
275 (input-method . "latin-9-prefix")
277 . "AVE.
\e$(D*^+^*v+v)-)M*s
\e(B
\e$(Q)!
\e(B")
279 This language environment is a generic one for the Latin-9 (ISO-8859-15)
280 character set which supports the same languages as Latin-1 with the
281 addition of the Euro sign and some additional French and Finnish letters.
282 Latin-9 is sometimes nicknamed `Latin-0'."))
285 (set-language-info-alist
286 "Esperanto" '((tutorial . "TUTORIAL.eo")
287 (charset ascii latin-iso8859-3)
288 (coding-system iso-latin-3)
289 (coding-priority iso-latin-3)
290 (nonascii-translation . latin-iso8859-3)
291 (unibyte-syntax . "latin-3")
292 (unibyte-display . iso-latin-3)
293 (input-method . "latin-3-prefix")
294 (documentation . "Support for Esperanto with ISO-8859-3 character set."))
298 (define-coding-system 'windows-1250
299 "windows-1250 (Central European) encoding (MIME: WINDOWS-1250)"
300 :coding-type 'charset
302 :charset-list '(windows-1250)
303 :mime-charset 'windows-1250)
304 (define-coding-system-alias 'cp1250 'windows-1250)
306 (define-coding-system 'windows-1252
307 "windows-1252 (Western European) encoding (MIME: WINDOWS-1252)"
308 :coding-type 'charset
310 :charset-list '(windows-1252)
311 :mime-charset 'windows-1252)
312 (define-coding-system-alias 'cp1252 'windows-1252)
314 (define-coding-system 'windows-1254
315 "windows-1254 (Turkish) encoding (MIME: WINDOWS-1254)"
316 :coding-type 'charset
318 :charset-list '(windows-1254)
319 :mime-charset 'windows-1254)
320 (define-coding-system-alias 'cp1254 'windows-1254)
322 (define-coding-system 'windows-1257
323 "windows-1257 (Baltic) encoding (MIME: WINDOWS-1257)"
324 :coding-type 'charset
326 :charset-list '(windows-1257)
327 :mime-charset 'windows-1257)
328 (define-coding-system-alias 'cp1257 'windows-1257)
330 (define-coding-system 'cp850
331 "DOS codepage 850 (Western European)"
332 :coding-type 'charset
334 :charset-list '(cp850)
335 :mime-charset 'cp850)
336 (define-coding-system-alias 'ibm850 'cp850)
338 (define-coding-system 'cp852
339 "DOS codepage 852 (Slavic)"
340 :coding-type 'charset
342 :charset-list '(cp852)
343 :mime-charset 'cp852)
344 (define-coding-system-alias 'ibm852 'cp852)
346 (define-coding-system 'cp857
347 "DOS codepage 857 (Turkish)"
348 :coding-type 'charset
350 :charset-list '(cp857)
351 :mime-charset 'cp857)
352 (define-coding-system-alias 'ibm857 'cp857)
354 (define-coding-system 'cp858
355 "Codepage 858 (Multilingual Latin I + Euro)"
356 :coding-type 'charset
358 :charset-list '(cp858)
359 :mime-charset 'cp858)
361 (define-coding-system 'cp860
362 "DOS codepage 860 (Portuguese)"
363 :coding-type 'charset
365 :charset-list '(cp860)
366 :mime-charset 'cp860)
367 (define-coding-system-alias 'ibm860 'cp860)
369 (define-coding-system 'cp861
370 "DOS codepage 861 (Icelandic)"
371 :coding-type 'charset
373 :charset-list '(cp861)
374 :mime-charset 'cp861)
375 (define-coding-system-alias 'ibm861 'cp861)
377 (define-coding-system 'cp863
378 "DOS codepage 863 (French Canadian)"
379 :coding-type 'charset
381 :charset-list '(cp863)
382 :mime-charset 'cp863)
383 (define-coding-system-alias 'ibm863 'cp863)
385 (define-coding-system 'cp865
386 "DOS codepage 865 (Norwegian/Danish)"
387 :coding-type 'charset
389 :charset-list '(cp865)
390 :mime-charset 'cp865)
391 (define-coding-system-alias 'ibm865 'cp865)
393 (define-coding-system 'cp437
395 :coding-type 'charset
397 :charset-list '(cp437)
398 :mime-charset 'cp437)
399 (define-coding-system-alias 'ibm437 'cp437)
401 (set-language-info-alist
402 "Dutch" '((tutorial . "TUTORIAL.nl")
404 (coding-system iso-latin-1 iso-latin-9)
405 (coding-priority iso-latin-1)
406 (nonascii-translation . iso-8859-1)
407 (unibyte-display . iso-latin-1)
408 (input-method . "dutch")
409 (sample-text . "Er is een aantal manieren waarop je dit kan doen")
411 This language environment is almost the same as Latin-1,
412 but it selects the Dutch tutorial and input method."))
415 (set-language-info-alist
416 "German" '((tutorial . "TUTORIAL.de")
418 (coding-system iso-latin-1 iso-latin-9)
419 (coding-priority iso-latin-1)
420 (nonascii-translation . iso-8859-1)
421 (input-method . "german-postfix")
422 (unibyte-display . iso-latin-1)
424 German (Deutsch Nord) Guten Tag
425 German (Deutsch S
\e$(D+d
\e(Bd) Gr
\e$(D+d)N
\e(B Gott")
427 This language environment is almost the same as Latin-1,
428 but sets the default input method to \"german-postfix\".
429 Additionally, it selects the German tutorial."))
432 (set-language-info-alist
433 "French" '((tutorial . "TUTORIAL.fr")
435 (coding-system iso-latin-1 iso-latin-9)
436 (coding-priority iso-latin-1)
437 (nonascii-translation . iso-8859-1)
438 (unibyte-display . iso-latin-1)
439 (input-method . "latin-1-prefix")
440 (sample-text . "French (Fran
\e$(D+.
\e(Bais) Bonjour, Salut")
442 This language environment is almost the same as Latin-1,
443 but it selects the French tutorial and input method."))
446 (set-language-info-alist
447 "Italian" '((tutorial . "TUTORIAL.it")
449 (coding-system iso-latin-1 iso-latin-9)
450 (coding-priority iso-latin-1)
451 (nonascii-translation . iso-8859-1)
452 (unibyte-display . iso-latin-1)
453 (input-method . "italian-postfix")
454 (sample-text . "Salve, ciao!")
456 This language environment is almost the same as Latin-1,
457 but sets the default input method to \"italian-postfix\".
458 Additionally, it selects the Italian tutorial."))
461 (set-language-info-alist
462 "Slovenian" '((charset iso-8859-2)
463 (coding-system . (iso-8859-2 windows-1250))
464 (coding-priority . (iso-8859-2))
465 (nonascii-translation . iso-8859-2)
466 (input-method . "slovenian")
467 (unibyte-display . iso-8859-2)
468 (tutorial . "TUTORIAL.sl")
469 (sample-text . "
\e$(D*v
\e(Belimo vam uspe
\e$(D+^
\e(Ben dan!")
471 This language environment is almost the same as Latin-2,
472 but it selects the Slovenian tutorial and input method."))
475 (set-language-info-alist
476 "Spanish" '((tutorial . "TUTORIAL.es")
478 (coding-system iso-latin-1 iso-latin-9)
479 (coding-priority iso-latin-1)
480 (input-method . "spanish-postfix")
481 (nonascii-translation . iso-8859-1)
482 (unibyte-display . iso-latin-1)
483 (sample-text . "Spanish (Espa
\e$(D+P
\e(Bol)
\e$(D"B
\e(BHola!")
485 This language environment is almost the same as Latin-1,
486 but it sets the default input method to \"spanish-postfix\",
487 and it selects the Spanish tutorial."))
490 ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But,
491 ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3)
492 ;; was used for Turkish. Those who use Latin-3 for Turkish should use
493 ;; "Latin-3" language environment.
495 (set-language-info-alist
496 "Turkish" '((charset iso-8859-9)
497 (coding-system iso-latin-5 windows-1254 iso-latin-3)
498 (coding-priority iso-latin-5)
499 (nonascii-translation . iso-8859-9)
500 (unibyte-display . iso-latin-5)
501 (input-method . "turkish-postfix")
502 (sample-text . "Turkish (T
\e$(D+d
\e(Brk
\e$(D+.
\e(Be) Merhaba")
503 (setup-function . turkish-case-conversion-enable)
504 (setup-function . turkish-case-conversion-disable)
505 (documentation . "Support for Turkish.
506 Differs from the Latin-5 environment in using the `turkish-postfix' input
507 method and applying Turkish case rules for the characters i, I,
\e$(D)E
\e(B,
\e$(D*D
\e(B.")))
509 (defun turkish-case-conversion-enable ()
510 "Set up Turkish case conversion of `i' and `I' into `
\e$(D*D
\e(B' and `
\e$(D)E
\e(B'."
511 (let ((table (standard-case-table)))
512 (set-case-syntax-pair ?
\e$(D*D
\e(B ?i table)
513 (set-case-syntax-pair ?I ?
\e$(D)E
\e(B table)))
515 (defun turkish-case-conversion-disable ()
516 "Set up normal (non-Turkish) case conversion of `i' into `I'."
517 (let ((table (standard-case-table)))
518 (set-case-syntax-pair ?I ?i table)
519 (set-case-syntax ?
\e$(D*D
\e(B "w" table)
520 (set-case-syntax ?
\e$(D)E
\e(B "w" table)))
522 ;; Polish ISO 8859-2 environment.
523 ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl>
524 ;; Keywords: multilingual, Polish
526 (set-language-info-alist
527 "Polish" '((charset iso-8859-2)
528 (coding-system iso-8859-2 windows-1250)
529 (coding-priority iso-8859-2)
530 (input-method . "polish-slash")
531 (nonascii-translation . iso-8859-2)
532 (unibyte-display . iso-8859-2)
533 (tutorial . "TUTORIAL.pl")
534 (sample-text . "P
\e$(D+Q
\e(Bjd
\e$(D+u
\e(B, ki
\e$(D+M
\e(B-
\e$(D+w
\e(Be t
\e$(D+8
\e(B chmurno
\e$(D+\++
\e(B w g
\e$(D)H+(
\e(Bb flaszy")
538 (set-language-info-alist
539 "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based
540 (coding-priority utf-8 latin-8)
541 (nonascii-translation . iso-8859-14)
542 (input-method . "welsh")
543 (documentation . "Support for Welsh, using Unicode."))
546 (set-language-info-alist
547 "Latin-6" `((coding-system latin-6)
548 (coding-priority latin-6)
549 (nonascii-translation . ,(get 'decode-iso-latin-6 'translation-table))
550 (input-method . "latin-prefix")
551 (features code-pages)
552 (documentation . "Support for Latin-6."))
555 (set-language-info-alist
556 "Latin-7" `((coding-system latin-7)
557 (coding-priority latin-7)
558 (nonascii-translation . iso-8859-13)
559 (input-method . "latin-prefix")
560 (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian."))
563 (set-language-info-alist
564 "Lithuanian" `((coding-system latin-7 windows-1257)
565 (coding-priority latin-7)
566 (nonascii-translation . iso-8859-13)
567 (input-method . "lithuanian-keyboard")
568 (documentation . "Support for Lithuanian."))
571 (set-language-info-alist
572 "Latvian" `((coding-system latin-7 windows-1257)
573 (coding-priority latin-7)
574 (nonascii-translation . iso-8859-13)
575 (input-method . "latvian-keyboard")
576 (documentation . "Support for Latvian."))
579 (set-language-info-alist
580 "Swedish" '((tutorial . "TUTORIAL.sv")
582 (coding-system iso-latin-1)
583 (coding-priority iso-latin-1)
584 (nonascii-translation . iso-8859-1)
585 (unibyte-display . iso-latin-1)
586 (sample-text . "Goddag Hej")
587 (documentation . "Support for Swedish"))
590 (set-language-info-alist
591 "Croatian" '((charset iso-8859-2)
592 (coding-system iso-8859-2)
593 (coding-priority iso-8859-2)
594 (input-method . "croatian")
595 (nonascii-translation . iso-8859-2)
596 (unibyte-display . iso-8859-2)
597 (documentation . "Support for Croatian with Latin-2 encoding."))
600 (set-language-info-alist
601 "Brazilian Portuguese" '((tutorial . "TUTORIAL.pt_BR")
603 (coding-system iso-latin-1 iso-latin-9)
604 (coding-priority iso-latin-1)
605 (nonascii-translation . iso-8859-1)
606 (unibyte-display . iso-8859-1)
607 (input-method . "latin-1-prefix")
609 (documentation . "Support for Brazilian Portuguese."))
613 (define-coding-system 'mac-roman
614 "Mac Roman Encoding (MIME:MACINTOSH)."
615 :coding-type 'charset
617 :charset-list '(mac-roman)
618 :mime-charset 'macintosh)
620 (define-coding-system 'next
622 :coding-type 'charset
624 :charset-list '(next)
627 (define-coding-system 'hp-roman8
628 "Hewlet-Packard roman-8 encoding (MIME:ROMAN-8)"
629 :coding-type 'charset
631 :charset-list '(hp-roman8)
632 :mime-charset 'hp-roman8)
633 (define-coding-system-alias 'roman8 'hp-roman8)
635 (define-coding-system 'adobe-standard-encoding
636 "Adobe `standard' encoding for PostScript"
637 :coding-type 'charset
639 :charset-list '(adobe-standard-encoding)
640 :mime-charset 'adobe-standard-encoding)
643 ;; For automatic composing of diacritics and combining marks.
644 (dolist (range '( ;; combining diacritical marks
645 (#x0300 #x0314 (tc . bc))
647 (#x0316 #x0319 (bc . tc))
649 (#x031B #x0320 (bc . tc))
652 (#x0323 #x0333 (bc . tc))
653 (#x0334 #x0338 (Bc . Bc))
654 (#x0339 #x033C (bc . tc))
655 (#x033D #x033F (tc . bc))
658 (#x0342 #x0344 (tc . bc))
661 (#x0347 #x0349 (bc . tc))
662 (#x034A #x034C (tc . bc))
663 (#x034D #x034E (bc . tc))
664 ;; combining diacritical marks for symbols
665 (#x20D0 #x20D1 (tc . bc))
666 (#x20D2 #x20D3 (Bc . Bc))
667 (#x20D4 #x20D7 (tc . bc))
668 (#x20D8 #x20DA (Bc . Bc))
669 (#x20DB #x20DC (tc . bc))
670 (#x20DD #x20E0 (Bc . Bc))
672 (#x20E2 #x20E3 (Bc . Bc))))
673 (let* ((from (car range))
674 (to (if (= (length range) 3)
677 (composition (car (last range))))
679 (put-char-code-property from 'diacritic-composition composition)
680 (aset composition-function-table from 'diacritic-composition-function)
681 (setq from (1+ from)))))
683 (defconst diacritic-composition-pattern "\\C^\\c^+")
685 (defun diacritic-compose-region (beg end)
686 "Compose diacritic characters in the region.
687 When called from a program, expects two arguments,
688 positions (integers or markers) specifying the region."
691 (narrow-to-region beg end)
692 (goto-char (point-min))
693 (while (re-search-forward diacritic-composition-pattern nil t)
694 (if (= (char-syntax (char-after (match-beginning 0))) ?w)
695 (compose-region (match-beginning 0) (match-end 0))))))
697 (defun diacritic-compose-string (string)
698 "Compose diacritic characters in STRING and return the resulting string."
700 (while (setq idx (string-match diacritic-composition-pattern string idx))
701 (if (= (char-syntax (aref string idx)) ?w)
702 (compose-string string idx (match-end 0)))
703 (setq idx (match-end 0))))
706 (defun diacritic-compose-buffer ()
707 "Compose diacritic characters in the current buffer."
709 (diacritic-compose-region (point-min) (point-max)))
711 (defun diacritic-composition-function (pos to font-object string)
712 "Compose diacritic text around POS.
713 Optional 2nd argument STRING, if non-nil, is a string containing text
716 The return value is the end position of composed characters,
717 or nil if no characters are composed."
721 (let ((ch (aref string pos))
722 start end components ch composition)
723 (when (= (char-syntax ch) ?w)
730 (setq ch (aref string pos)
732 (get-char-code-property ch
733 'diacritic-composition)))
734 (setq components (cons ch (cons composition components))
736 (compose-string string start pos (nreverse components))
738 (if (>= pos (point-min))
739 (let ((ch (char-after pos))
740 start end components composition)
741 (when (= (char-syntax ch) ?w)
748 (setq ch (char-after pos)
750 (get-char-code-property ch 'diacritic-composition)))
751 (setq components (cons ch (cons composition components))
753 (compose-region start pos (nreverse components))
758 ;;; arch-tag: 9e018b12-fb02-4120-907b-9adeaf84b5c2
759 ;;; european.el ends here