1 ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2006
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H14PRO021
10 ;; National Institute of Advanced Industrial Science and Technology (AIST)
11 ;; Registration Number H13PRO009
13 ;; Keywords: multilingual, European
15 ;; This file is part of GNU Emacs.
17 ;; GNU Emacs is free software; you can redistribute it and/or modify
18 ;; it under the terms of the GNU General Public License as published by
19 ;; the Free Software Foundation; either version 2, or (at your option)
22 ;; GNU Emacs is distributed in the hope that it will be useful,
23 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 ;; GNU General Public License for more details.
27 ;; You should have received a copy of the GNU General Public License
28 ;; along with GNU Emacs; see the file COPYING. If not, write to the
29 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
30 ;; Boston, MA 02110-1301, USA.
34 ;; For European scripts, all the ISO Latin character sets are
35 ;; supported, along with various others.
39 ;; Latin-1 (ISO-8859-1)
41 (set-language-info-alist
42 "Latin-1" '((charset iso-8859-1)
43 (coding-system iso-latin-1 iso-latin-9 windows-1252)
44 (coding-priority iso-latin-1)
45 (nonascii-translation . iso-8859-1)
46 (unibyte-display . iso-latin-1)
47 (input-method . "latin-1-prefix")
49 . "Hello, Hej, Tere, Hei, Bonjour, Gr
\e$(D+d)N
\e(B Gott, Ciao,
\e$(D"B
\e(BHola!")
51 This language environment is a generic one for the Latin-1 (ISO-8859-1)
52 character set which supports the following European languages:
53 Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese,
54 Finnish, French (with restrictions -- see Latin-9), Frisian, Galician,
55 German, Greenlandic, Icelandic, Irish Gaelic (new orthography),
56 Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic,
57 Scottish Gaelic, Spanish, and Swedish.
58 We also have specific language environments for the following languages:
60 For German, \"German\".
61 For French, \"French\".
62 For Italian, \"Italian\".
63 For Slovenian, \"Slovenian\".
64 For Spanish, \"Spanish\".
66 Latin-1 also covers several written languages outside Europe, including
67 Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans."))
71 ;; Latin-2 (ISO-8859-2)
73 (define-coding-system 'iso-latin-2
74 "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)."
77 :charset-list '(iso-8859-2)
78 :mime-charset 'iso-8859-2)
80 (define-coding-system-alias 'iso-8859-2 'iso-latin-2)
81 (define-coding-system-alias 'latin-2 'iso-latin-2)
83 (set-language-info-alist
84 "Latin-2" '((charset iso-8859-2)
85 (coding-system iso-latin-2 windows-1250)
86 (coding-priority iso-latin-2)
87 (nonascii-translation . iso-8859-2)
88 (unibyte-display . iso-latin-2)
89 (input-method . "latin-2-prefix")
91 This language environment is a generic one for the Latin-2 (ISO-8859-2)
92 character set which supports the following languages:
93 Albanian, Czech, English, German, Hungarian, Polish, Romanian,
94 Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower),
96 We also have specific language environments for the following languages:
98 For Croatian, \"Croatian\".
99 For Polish, \"Polish\".
100 For Romanian, \"Romanian\".
101 For Slovak, \"Slovak\"."))
105 ;; Latin-3 (ISO-8859-3)
107 (define-coding-system 'iso-latin-3
108 "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)."
109 :coding-type 'charset
111 :charset-list '(iso-8859-3)
112 :mime-charset 'iso-8859-3)
114 (define-coding-system-alias 'iso-8859-3 'iso-latin-3)
115 (define-coding-system-alias 'latin-3 'iso-latin-3)
117 (set-language-info-alist
118 "Latin-3" '((charset iso-8859-3)
119 (coding-system iso-latin-3)
120 (coding-priority iso-latin-3)
121 (nonascii-translation . iso-8859-3)
122 (unibyte-display . iso-latin-3)
123 (input-method . "latin-3-prefix")
125 These languages are supported with the Latin-3 (ISO-8859-3) character set:
126 Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician,
127 German, Italian, Maltese, Spanish, and Turkish."))
131 ;; Latin-4 (ISO-8859-4)
133 (define-coding-system 'iso-latin-4
134 "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)."
135 :coding-type 'charset
137 :charset-list '(iso-8859-4)
138 :mime-charset 'iso-8859-4)
140 (define-coding-system-alias 'iso-8859-4 'iso-latin-4)
141 (define-coding-system-alias 'latin-4 'iso-latin-4)
143 (set-language-info-alist
144 "Latin-4" '((charset iso-8859-4)
145 (coding-system iso-8859-4)
146 (coding-priority iso-8859-4)
147 (nonascii-translation . iso-8859-4)
148 (unibyte-display . iso-8859-4)
149 (input-method . "latin-4-postfix")
151 These languages are supported with the Latin-4 (ISO-8859-4) character set:
152 Danish, English, Estonian, Finnish, German, Greenlandic, Lappish,
153 Latvian, Lithuanian, and Norwegian."))
157 ;; Latin-5 (ISO-8859-9)
159 (define-coding-system 'iso-latin-5
160 "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)."
161 :coding-type 'charset
163 :charset-list '(iso-8859-9)
164 :mime-charset 'iso-8859-9)
166 (define-coding-system-alias 'iso-8859-9 'iso-latin-5)
167 (define-coding-system-alias 'latin-5 'iso-latin-5)
169 (set-language-info-alist
170 "Latin-5" '((charset iso-8859-9)
171 (coding-system iso-latin-5)
172 (coding-priority iso-latin-5)
173 (nonascii-translation . iso-8859-9)
174 (unibyte-display . iso-latin-5)
175 (input-method . "latin-5-postfix")
176 (documentation . "Support for Latin-5.\
177 See also the Turkish environment."))
181 ;; Latin-6 (ISO-8859-10)
183 (define-coding-system 'iso-latin-6
184 "ISO 2022 based 8-bit encoding for Latin-6 (MIME:ISO-8859-10)."
185 :coding-type 'charset
187 :charset-list '(iso-8859-10)
188 :mime-charset 'iso-8859-10)
190 (define-coding-system-alias 'iso-8859-10 'iso-latin-6)
191 (define-coding-system-alias 'latin-6 'iso-latin-6)
193 (set-language-info-alist
194 "Latin-6" '((charset iso-8859-10)
195 (coding-system iso-latin-6)
196 (coding-priority iso-latin-6)
197 (nonascii-translation . iso-8859-10)
198 (unibyte-display . iso-latin-6)
199 ;; Fixme: input method.
200 (documentation . "Support for generic Latin-6 (Northern European)."))
204 ;; Latin-7 (ISO-8859-13)
206 (define-coding-system 'iso-latin-7
207 "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)."
208 :coding-type 'charset
210 :charset-list '(iso-8859-13)
211 :mime-charset 'iso-8859-13)
213 (define-coding-system-alias 'iso-8859-13 'iso-latin-7)
214 (define-coding-system-alias 'latin-7 'iso-latin-7)
216 (set-language-info-alist
217 "Latin-7" '((charset iso-8859-13)
218 (coding-system iso-latin-7)
219 (coding-priority iso-latin-7)
220 (nonascii-translation . iso-8859-13)
221 (unibyte-display . iso-latin-7)
222 ;; Fixme: input method.
223 (documentation . "Support for generic Latin-7 (Baltic Rim)."))
226 ;; Latin-8 (ISO-8859-14)
228 (define-coding-system 'iso-latin-8
229 "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)."
230 :coding-type 'charset
231 ;; `W' for `Welsh', since `C' for `Celtic' is taken.
233 :charset-list '(iso-8859-14)
234 :mime-charset 'iso-8859-14)
236 (define-coding-system-alias 'iso-8859-14 'iso-latin-8)
237 (define-coding-system-alias 'latin-8 'iso-latin-8)
239 (set-language-info-alist
240 "Latin-8" '((charset iso-8859-14)
241 (coding-system iso-latin-8)
242 (coding-priority iso-latin-8)
243 (nonascii-translation . iso-8859-14)
244 (unibyte-display . iso-latin-8)
245 (input-method . "latin-8-prefix")
246 ;; Fixme: Welsh/Ga{e}lic greetings
247 (sample-text . "
\e,_"
\e(B
\e$(D+q
\e(B
\e$(D*t
\e(B")
249 This language environment is a generic one for the Latin-8 (ISO-8859-14)
250 character set which supports the Celtic languages, including those not
251 covered by other ISO-8859 character sets:
252 Welsh, Manx Gaelic and Irish Gaelic (old orthography)."))
255 ;; Latin-9 (ISO-8859-15)
257 (define-coding-system 'iso-latin-9
258 "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)."
259 :coding-type 'charset
262 :charset-list '(iso-8859-15)
263 :mime-charset 'iso-8859-15)
265 (define-coding-system-alias 'iso-8859-15 'iso-latin-9)
266 (define-coding-system-alias 'latin-9 'iso-latin-9)
267 (define-coding-system-alias 'latin-0 'iso-latin-9)
269 (set-language-info-alist
270 "Latin-9" '((charset iso-8859-15)
271 (coding-system iso-latin-9)
272 (coding-priority iso-latin-9)
273 (nonascii-translation . iso-8859-15)
274 (unibyte-display . iso-latin-9)
275 (input-method . "latin-9-prefix")
277 . "AVE.
\e$(D*^+^*v+v)-)M*s
\e(B
\e$(Q)!
\e(B")
279 This language environment is a generic one for the Latin-9 (ISO-8859-15)
280 character set which supports the same languages as Latin-1 with the
281 addition of the Euro sign and some additional French and Finnish letters.
282 Latin-9 is sometimes nicknamed `Latin-0'."))
285 (define-coding-system 'windows-1250
286 "windows-1250 (Central European) encoding (MIME: WINDOWS-1250)"
287 :coding-type 'charset
289 :charset-list '(windows-1250)
290 :mime-charset 'windows-1250)
291 (define-coding-system-alias 'cp1250 'windows-1250)
293 (define-coding-system 'windows-1252
294 "windows-1252 (Western European) encoding (MIME: WINDOWS-1252)"
295 :coding-type 'charset
297 :charset-list '(windows-1252)
298 :mime-charset 'windows-1252)
299 (define-coding-system-alias 'cp1252 'windows-1252)
301 (define-coding-system 'windows-1254
302 "windows-1254 (Turkish) encoding (MIME: WINDOWS-1254)"
303 :coding-type 'charset
305 :charset-list '(windows-1254)
306 :mime-charset 'windows-1254)
307 (define-coding-system-alias 'cp1254 'windows-1254)
309 (define-coding-system 'windows-1257
310 "windows-1257 (Baltic) encoding (MIME: WINDOWS-1257)"
311 :coding-type 'charset
313 :charset-list '(windows-1257)
314 :mime-charset 'windows-1257)
315 (define-coding-system-alias 'cp1257 'windows-1257)
317 (define-coding-system 'cp850
318 "DOS codepage 850 (Western European)"
319 :coding-type 'charset
321 :charset-list '(cp850)
322 :mime-charset 'cp850)
323 (define-coding-system-alias 'ibm850 'cp850)
325 (define-coding-system 'cp852
326 "DOS codepage 852 (Slavic)"
327 :coding-type 'charset
329 :charset-list '(cp852)
330 :mime-charset 'cp852)
331 (define-coding-system-alias 'ibm852 'cp852)
333 (define-coding-system 'cp857
334 "DOS codepage 857 (Turkish)"
335 :coding-type 'charset
337 :charset-list '(cp857)
338 :mime-charset 'cp857)
339 (define-coding-system-alias 'ibm857 'cp857)
341 (define-coding-system 'cp858
342 "Codepage 858 (Multilingual Latin I + Euro)"
343 :coding-type 'charset
345 :charset-list '(cp858)
346 :mime-charset 'cp858)
348 (define-coding-system 'cp860
349 "DOS codepage 860 (Portuguese)"
350 :coding-type 'charset
352 :charset-list '(cp860)
353 :mime-charset 'cp860)
354 (define-coding-system-alias 'ibm860 'cp860)
356 (define-coding-system 'cp861
357 "DOS codepage 861 (Icelandic)"
358 :coding-type 'charset
360 :charset-list '(cp861)
361 :mime-charset 'cp861)
362 (define-coding-system-alias 'ibm861 'cp861)
364 (define-coding-system 'cp863
365 "DOS codepage 863 (French Canadian)"
366 :coding-type 'charset
368 :charset-list '(cp863)
369 :mime-charset 'cp863)
370 (define-coding-system-alias 'ibm863 'cp863)
372 (define-coding-system 'cp865
373 "DOS codepage 865 (Norwegian/Danish)"
374 :coding-type 'charset
376 :charset-list '(cp865)
377 :mime-charset 'cp865)
378 (define-coding-system-alias 'ibm865 'cp865)
380 (define-coding-system 'cp437
382 :coding-type 'charset
384 :charset-list '(cp437)
385 :mime-charset 'cp437)
386 (define-coding-system-alias 'ibm437 'cp437)
388 (set-language-info-alist
389 "Dutch" '((tutorial . "TUTORIAL.nl")
391 (coding-system iso-latin-1 iso-latin-9)
392 (coding-priority iso-latin-1)
393 (nonascii-translation . iso-8859-1)
394 (unibyte-display . iso-latin-1)
395 (input-method . "dutch")
396 (sample-text . "Er is een aantal manieren waarop je dit kan doen")
398 This language environment is almost the same as Latin-1,
399 but it selects the Dutch tutorial and input method."))
402 (set-language-info-alist
403 "German" '((tutorial . "TUTORIAL.de")
405 (coding-system iso-latin-1 iso-latin-9)
406 (coding-priority iso-latin-1)
407 (nonascii-translation . iso-8859-1)
408 (input-method . "german-postfix")
409 (unibyte-display . iso-latin-1)
411 German (Deutsch Nord) Guten Tag
412 German (Deutsch S
\e$(D+d
\e(Bd) Gr
\e$(D+d)N
\e(B Gott")
414 This language environment is almost the same as Latin-1,
415 but sets the default input method to \"german-postfix\".
416 Additionally, it selects the German tutorial."))
419 (set-language-info-alist
420 "French" '((tutorial . "TUTORIAL.fr")
422 (coding-system iso-latin-1 iso-latin-9)
423 (coding-priority iso-latin-1)
424 (nonascii-translation . iso-8859-1)
425 (unibyte-display . iso-latin-1)
426 (input-method . "latin-1-prefix")
427 (sample-text . "French (Fran
\e$(D+.
\e(Bais) Bonjour, Salut")
429 This language environment is almost the same as Latin-1,
430 but it selects the French tutorial and input method."))
433 (set-language-info-alist
434 "Italian" '((tutorial . "TUTORIAL.it")
436 (coding-system iso-latin-1 iso-latin-9)
437 (coding-priority iso-latin-1)
438 (nonascii-translation . iso-8859-1)
439 (unibyte-display . iso-latin-1)
440 (input-method . "italian-postfix")
441 (sample-text . "Salve, ciao!")
443 This language environment is almost the same as Latin-1,
444 but sets the default input method to \"italian-postfix\".
445 Additionally, it selects the Italian tutorial."))
448 (set-language-info-alist
449 "Slovenian" '((charset iso-8859-2)
450 (coding-system . (iso-8859-2 windows-1250))
451 (coding-priority . (iso-8859-2))
452 (nonascii-translation . iso-8859-2)
453 (input-method . "slovenian")
454 (unibyte-display . iso-8859-2)
455 (tutorial . "TUTORIAL.sl")
456 (sample-text . "
\e$(D*v
\e(Belimo vam uspe
\e$(D+^
\e(Ben dan!")
458 This language environment is almost the same as Latin-2,
459 but it selects the Slovenian tutorial and input method."))
462 (set-language-info-alist
463 "Spanish" '((tutorial . "TUTORIAL.es")
465 (coding-system iso-latin-1 iso-latin-9)
466 (coding-priority iso-latin-1)
467 (input-method . "spanish-postfix")
468 (nonascii-translation . iso-8859-1)
469 (unibyte-display . iso-latin-1)
470 (sample-text . "Spanish (Espa
\e$(D+P
\e(Bol)
\e$(D"B
\e(BHola!")
472 This language environment is almost the same as Latin-1,
473 but it sets the default input method to \"spanish-postfix\",
474 and it selects the Spanish tutorial."))
477 ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But,
478 ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3)
479 ;; was used for Turkish. Those who use Latin-3 for Turkish should use
480 ;; "Latin-3" language environment.
482 (set-language-info-alist
483 "Turkish" '((charset iso-8859-9)
484 (coding-system iso-latin-5 windows-1254 iso-latin-3)
485 (coding-priority iso-latin-5)
486 (nonascii-translation . iso-8859-9)
487 (unibyte-display . iso-latin-5)
488 (input-method . "turkish-postfix")
489 (sample-text . "Turkish (T
\e$(D+d
\e(Brk
\e$(D+.
\e(Be) Merhaba")
490 (setup-function . turkish-case-conversion-enable)
491 (setup-function . turkish-case-conversion-disable)
492 (documentation . "Support for Turkish.
493 Differs from the Latin-5 environment in using the `turkish-postfix' input
494 method and applying Turkish case rules for the characters i, I,
\e$(D)E
\e(B,
\e$(D*D
\e(B.")))
496 (defun turkish-case-conversion-enable ()
497 "Set up Turkish case conversion of `i' and `I' into `
\e$(D*D
\e(B' and `
\e$(D)E
\e(B'."
498 (let ((table (standard-case-table)))
499 (set-case-syntax-pair ?
\e$(D*D
\e(B ?i table)
500 (set-case-syntax-pair ?I ?
\e$(D)E
\e(B table)))
502 (defun turkish-case-conversion-disable ()
503 "Set up normal (non-Turkish) case conversion of `i' into `I'."
504 (let ((table (standard-case-table)))
505 (set-case-syntax-pair ?I ?i table)
506 (set-case-syntax ?
\e$(D*D
\e(B "w" table)
507 (set-case-syntax ?
\e$(D)E
\e(B "w" table)))
509 ;; Polish ISO 8859-2 environment.
510 ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl>
511 ;; Keywords: multilingual, Polish
513 (set-language-info-alist
514 "Polish" '((charset iso-8859-2)
515 (coding-system iso-8859-2 windows-1250)
516 (coding-priority iso-8859-2)
517 (input-method . "polish-slash")
518 (nonascii-translation . iso-8859-2)
519 (unibyte-display . iso-8859-2)
520 (tutorial . "TUTORIAL.pl")
521 (sample-text . "P
\e$(D+Q
\e(Bjd
\e$(D+u
\e(B, ki
\e$(D+M
\e(B-
\e$(D+w
\e(Be t
\e$(D+8
\e(B chmurno
\e$(D+\++
\e(B w g
\e$(D)H+(
\e(Bb flaszy")
525 (set-language-info-alist
526 "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based
527 (coding-priority utf-8 latin-8)
528 (nonascii-translation . iso-8859-14)
529 (input-method . "welsh")
530 (documentation . "Support for Welsh, using Unicode."))
533 (set-language-info-alist
534 "Latin-6" `((coding-system latin-6)
535 (coding-priority latin-6)
536 (nonascii-translation . ,(get 'decode-iso-latin-6 'translation-table))
537 (input-method . "latin-prefix")
538 (features code-pages)
539 (documentation . "Support for Latin-6."))
542 (set-language-info-alist
543 "Latin-7" `((coding-system latin-7)
544 (coding-priority latin-7)
545 (nonascii-translation . iso-8859-13)
546 (input-method . "latin-prefix")
547 (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian."))
550 (set-language-info-alist
551 "Lithuanian" `((coding-system latin-7 windows-1257)
552 (coding-priority latin-7)
553 (nonascii-translation . iso-8859-13)
554 (input-method . "lithuanian-keyboard")
555 (documentation . "Support for Lithuanian."))
558 (set-language-info-alist
559 "Latvian" `((coding-system latin-7 windows-1257)
560 (coding-priority latin-7)
561 (nonascii-translation . iso-8859-13)
562 (input-method . "latvian-keyboard")
563 (documentation . "Support for Latvian."))
566 (set-language-info-alist
567 "Swedish" '((tutorial . "TUTORIAL.sv")
569 (coding-system iso-latin-1)
570 (coding-priority iso-latin-1)
571 (nonascii-translation . iso-8859-1)
572 (unibyte-display . iso-latin-1)
573 (sample-text . "Goddag Hej")
574 (documentation . "Support for Swedish"))
577 (set-language-info-alist
578 "Croatian" '((charset iso-8859-2)
579 (coding-system iso-8859-2)
580 (coding-priority iso-8859-2)
581 (input-method . "croatian")
582 (nonascii-translation . iso-8859-2)
583 (unibyte-display . iso-8859-2)
584 (documentation . "Support for Croatian with Latin-2 encoding."))
587 (set-language-info-alist
588 "Brazilian Portuguese" '((tutorial . "TUTORIAL.pt_BR")
590 (coding-system iso-latin-1 iso-latin-9)
591 (coding-priority iso-latin-1)
592 (nonascii-translation . iso-8859-1)
593 (unibyte-display . iso-8859-1)
594 (input-method . "latin-1-prefix")
596 (documentation . "Support for Brazilian Portuguese."))
600 (define-coding-system 'mac-roman
601 "Mac Roman Encoding (MIME:MACINTOSH)."
602 :coding-type 'charset
604 :charset-list '(mac-roman)
605 :mime-charset 'macintosh)
607 (define-coding-system 'next
609 :coding-type 'charset
611 :charset-list '(next)
614 (define-coding-system 'hp-roman8
615 "Hewlet-Packard roman-8 encoding (MIME:ROMAN-8)"
616 :coding-type 'charset
618 :charset-list '(hp-roman8)
619 :mime-charset 'hp-roman8)
620 (define-coding-system-alias 'roman8 'hp-roman8)
622 (define-coding-system 'adobe-standard-encoding
623 "Adobe `standard' encoding for PostScript"
624 :coding-type 'charset
626 :charset-list '(adobe-standard-encoding)
627 :mime-charset 'adobe-standard-encoding)
630 ;; For automatic composing of diacritics and combining marks.
631 (dolist (range '( ;; combining diacritical marks
632 (#x0300 #x0314 (tc . bc))
634 (#x0316 #x0319 (bc . tc))
636 (#x031B #x0320 (bc . tc))
639 (#x0323 #x0333 (bc . tc))
640 (#x0334 #x0338 (Bc . Bc))
641 (#x0339 #x033C (bc . tc))
642 (#x033D #x033F (tc . bc))
645 (#x0342 #x0344 (tc . bc))
648 (#x0347 #x0349 (bc . tc))
649 (#x034A #x034C (tc . bc))
650 (#x034D #x034E (bc . tc))
651 ;; combining diacritical marks for symbols
652 (#x20D0 #x20D1 (tc . bc))
653 (#x20D2 #x20D3 (Bc . Bc))
654 (#x20D4 #x20D7 (tc . bc))
655 (#x20D8 #x20DA (Bc . Bc))
656 (#x20DB #x20DC (tc . bc))
657 (#x20DD #x20E0 (Bc . Bc))
659 (#x20E2 #x20E3 (Bc . Bc))))
660 (let* ((from (car range))
661 (to (if (= (length range) 3)
664 (composition (car (last range))))
666 (put-char-code-property from 'diacritic-composition composition)
667 (aset composition-function-table from 'diacritic-composition-function)
668 (setq from (1+ from)))))
670 (defconst diacritic-composition-pattern "\\C^\\c^+")
672 (defun diacritic-compose-region (beg end)
673 "Compose diacritic characters in the region.
674 When called from a program, expects two arguments,
675 positions (integers or markers) specifying the region."
678 (narrow-to-region beg end)
679 (goto-char (point-min))
680 (while (re-search-forward diacritic-composition-pattern nil t)
681 (if (= (char-syntax (char-after (match-beginning 0))) ?w)
682 (compose-region (match-beginning 0) (match-end 0))))))
684 (defun diacritic-compose-string (string)
685 "Compose diacritic characters in STRING and return the resulting string."
687 (while (setq idx (string-match diacritic-composition-pattern string idx))
688 (if (= (char-syntax (aref string idx)) ?w)
689 (compose-string string idx (match-end 0)))
690 (setq idx (match-end 0))))
693 (defun diacritic-compose-buffer ()
694 "Compose diacritic characters in the current buffer."
696 (diacritic-compose-region (point-min) (point-max)))
698 (defun diacritic-composition-function (pos &optional string)
699 "Compose diacritic text around POS.
700 Optional 2nd argument STRING, if non-nil, is a string containing text
703 The return value is the end position of composed characters,
704 or nil if no characters are composed."
708 (let ((ch (aref string pos))
709 start end components ch composition)
710 (when (= (char-syntax ch) ?w)
717 (setq ch (aref string pos)
719 (get-char-code-property ch
720 'diacritic-composition)))
721 (setq components (cons ch (cons composition components))
723 (compose-string string start pos (nreverse components))
725 (if (>= pos (point-min))
726 (let ((ch (char-after pos))
727 start end components composition)
728 (when (= (char-syntax ch) ?w)
735 (setq ch (char-after pos)
737 (get-char-code-property ch 'diacritic-composition)))
738 (setq components (cons ch (cons composition components))
740 (compose-region start pos (nreverse components))
745 ;;; arch-tag: 9e018b12-fb02-4120-907b-9adeaf84b5c2
746 ;;; european.el ends here