1 ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004
4 ;; Free Software Foundation, Inc.
5 ;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H14PRO021
9 ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 ;; Registration Number H13PRO009
12 ;; Keywords: multilingual, European
14 ;; This file is part of GNU Emacs.
16 ;; GNU Emacs is free software; you can redistribute it and/or modify
17 ;; it under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation; either version 2, or (at your option)
21 ;; GNU Emacs is distributed in the hope that it will be useful,
22 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;; GNU General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with GNU Emacs; see the file COPYING. If not, write to the
28 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
29 ;; Boston, MA 02111-1307, USA.
33 ;; For European scripts, all the ISO Latin character sets are
34 ;; supported, along with various others.
38 ;; Latin-1 (ISO-8859-1)
40 (set-language-info-alist
41 "Latin-1" '((charset iso-8859-1)
42 (coding-system iso-latin-1 iso-latin-9 windows-1252)
43 (coding-priority iso-latin-1)
44 (nonascii-translation . iso-8859-1)
45 (unibyte-display . iso-latin-1)
46 (input-method . "latin-1-prefix")
48 . "Hello, Hej, Tere, Hei, Bonjour, Gr
\e$(D+d)N
\e(B Gott, Ciao,
\e$(D"B
\e(BHola!")
50 This language environment is a generic one for the Latin-1 (ISO-8859-1)
51 character set which supports the following European languages:
52 Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese,
53 Finnish, French (with restrictions -- see Latin-9), Frisian, Galician,
54 German, Greenlandic, Icelandic, Irish Gaelic (new orthography),
55 Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic,
56 Scottish Gaelic, Spanish, and Swedish.
57 We also have specific language environments for the following languages:
59 For German, \"German\".
60 For French, \"French\".
61 For Italian, \"Italian\".
62 For Slovenian, \"Slovenian\".
63 For Spanish, \"Spanish\".
65 Latin-1 also covers several written languages outside Europe, including
66 Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans."))
70 ;; Latin-2 (ISO-8859-2)
72 (define-coding-system 'iso-latin-2
73 "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)."
76 :charset-list '(iso-8859-2)
77 :mime-charset 'iso-8859-2)
79 (define-coding-system-alias 'iso-8859-2 'iso-latin-2)
80 (define-coding-system-alias 'latin-2 'iso-latin-2)
82 (set-language-info-alist
83 "Latin-2" '((charset iso-8859-2)
84 (coding-system iso-latin-2 windows-1250)
85 (coding-priority iso-latin-2)
86 (nonascii-translation . iso-8859-2)
87 (unibyte-display . iso-latin-2)
88 (input-method . "latin-2-prefix")
90 This language environment is a generic one for the Latin-2 (ISO-8859-2)
91 character set which supports the following languages:
92 Albanian, Czech, English, German, Hungarian, Polish, Romanian,
93 Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower),
95 We also have specific language environments for the following languages:
97 For Croatian, \"Croatian\".
98 For Polish, \"Polish\".
99 For Romanian, \"Romanian\".
100 For Slovak, \"Slovak\"."))
104 ;; Latin-3 (ISO-8859-3)
106 (define-coding-system 'iso-latin-3
107 "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)."
108 :coding-type 'charset
110 :charset-list '(iso-8859-3)
111 :mime-charset 'iso-8859-3)
113 (define-coding-system-alias 'iso-8859-3 'iso-latin-3)
114 (define-coding-system-alias 'latin-3 'iso-latin-3)
116 (set-language-info-alist
117 "Latin-3" '((charset iso-8859-3)
118 (coding-system iso-latin-3)
119 (coding-priority iso-latin-3)
120 (nonascii-translation . iso-8859-3)
121 (unibyte-display . iso-latin-3)
122 (input-method . "latin-3-prefix")
124 These languages are supported with the Latin-3 (ISO-8859-3) character set:
125 Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician,
126 German, Italian, Maltese, Spanish, and Turkish."))
130 ;; Latin-4 (ISO-8859-4)
132 (define-coding-system 'iso-latin-4
133 "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)."
134 :coding-type 'charset
136 :charset-list '(iso-8859-4)
137 :mime-charset 'iso-8859-4)
139 (define-coding-system-alias 'iso-8859-4 'iso-latin-4)
140 (define-coding-system-alias 'latin-4 'iso-latin-4)
142 (set-language-info-alist
143 "Latin-4" '((charset iso-8859-4)
144 (coding-system iso-8859-4)
145 (coding-priority iso-8859-4)
146 (nonascii-translation . iso-8859-4)
147 (unibyte-display . iso-8859-4)
148 (input-method . "latin-4-postfix")
150 These languages are supported with the Latin-4 (ISO-8859-4) character set:
151 Danish, English, Estonian, Finnish, German, Greenlandic, Lappish,
152 Latvian, Lithuanian, and Norwegian."))
156 ;; Latin-5 (ISO-8859-9)
158 (define-coding-system 'iso-latin-5
159 "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)."
160 :coding-type 'charset
162 :charset-list '(iso-8859-9)
163 :mime-charset 'iso-8859-9)
165 (define-coding-system-alias 'iso-8859-9 'iso-latin-5)
166 (define-coding-system-alias 'latin-5 'iso-latin-5)
168 (set-language-info-alist
169 "Latin-5" '((charset iso-8859-9)
170 (coding-system iso-latin-5)
171 (coding-priority iso-latin-5)
172 (nonascii-translation . iso-8859-9)
173 (unibyte-display . iso-latin-5)
174 (input-method . "latin-5-postfix")
175 (documentation . "Support for Latin-5.\
176 See also the Turkish environment."))
180 ;; Latin-6 (ISO-8859-10)
182 (define-coding-system 'iso-latin-6
183 "ISO 2022 based 8-bit encoding for Latin-6 (MIME:ISO-8859-10)."
184 :coding-type 'charset
186 :charset-list '(iso-8859-10)
187 :mime-charset 'iso-8859-10)
189 (define-coding-system-alias 'iso-8859-10 'iso-latin-6)
190 (define-coding-system-alias 'latin-6 'iso-latin-6)
192 (set-language-info-alist
193 "Latin-6" '((charset iso-8859-10)
194 (coding-system iso-latin-6)
195 (coding-priority iso-latin-6)
196 (nonascii-translation . iso-8859-10)
197 (unibyte-display . iso-latin-6)
198 ;; Fixme: input method.
199 (documentation . "Support for generic Latin-6 (Northern European)."))
203 ;; Latin-7 (ISO-8859-13)
205 (define-coding-system 'iso-latin-7
206 "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)."
207 :coding-type 'charset
209 :charset-list '(iso-8859-13)
210 :mime-charset 'iso-8859-13)
212 (define-coding-system-alias 'iso-8859-13 'iso-latin-7)
213 (define-coding-system-alias 'latin-7 'iso-latin-7)
215 (set-language-info-alist
216 "Latin-7" '((charset iso-8859-13)
217 (coding-system iso-latin-7)
218 (coding-priority iso-latin-7)
219 (nonascii-translation . iso-8859-13)
220 (unibyte-display . iso-latin-7)
221 ;; Fixme: input method.
222 (documentation . "Support for generic Latin-7 (Baltic Rim)."))
225 ;; Latin-8 (ISO-8859-14)
227 (define-coding-system 'iso-latin-8
228 "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)."
229 :coding-type 'charset
230 ;; `W' for `Welsh', since `C' for `Celtic' is taken.
232 :charset-list '(iso-8859-14)
233 :mime-charset 'iso-8859-14)
235 (define-coding-system-alias 'iso-8859-14 'iso-latin-8)
236 (define-coding-system-alias 'latin-8 'iso-latin-8)
238 (set-language-info-alist
239 "Latin-8" '((charset iso-8859-14)
240 (coding-system iso-latin-8)
241 (coding-priority iso-latin-8)
242 (nonascii-translation . iso-8859-14)
243 (unibyte-display . iso-latin-8)
244 (input-method . "latin-8-prefix")
245 ;; Fixme: Welsh/Ga{e}lic greetings
246 (sample-text . "
\e,_"
\e(B
\e$(D+q
\e(B
\e$(D*t
\e(B")
248 This language environment is a generic one for the Latin-8 (ISO-8859-14)
249 character set which supports the Celtic languages, including those not
250 covered by other ISO-8859 character sets:
251 Welsh, Manx Gaelic and Irish Gaelic (old orthography)."))
254 ;; Latin-9 (ISO-8859-15)
256 (define-coding-system 'iso-latin-9
257 "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)."
258 :coding-type 'charset
261 :charset-list '(iso-8859-15)
262 :mime-charset 'iso-8859-15)
264 (define-coding-system-alias 'iso-8859-15 'iso-latin-9)
265 (define-coding-system-alias 'latin-9 'iso-latin-9)
266 (define-coding-system-alias 'latin-0 'iso-latin-9)
268 (set-language-info-alist
269 "Latin-9" '((charset iso-8859-15)
270 (coding-system iso-latin-9)
271 (coding-priority iso-latin-9)
272 (nonascii-translation . iso-8859-15)
273 (unibyte-display . iso-latin-9)
274 (input-method . "latin-9-prefix")
276 . "AVE.
\e$(D*^+^*v+v)-)M*s
\e(B
\e$(Q)!
\e(B")
278 This language environment is a generic one for the Latin-9 (ISO-8859-15)
279 character set which supports the same languages as Latin-1 with the
280 addition of the Euro sign and some additional French and Finnish letters.
281 Latin-9 is sometimes nicknamed `Latin-0'."))
284 (define-coding-system 'windows-1250
285 "windows-1250 (Central European) encoding (MIME: WINDOWS-1250)"
286 :coding-type 'charset
288 :charset-list '(windows-1250)
289 :mime-charset 'windows-1250)
290 (define-coding-system-alias 'cp1250 'windows-1250)
292 (define-coding-system 'windows-1252
293 "windows-1252 (Western European) encoding (MIME: WINDOWS-1252)"
294 :coding-type 'charset
296 :charset-list '(windows-1252)
297 :mime-charset 'windows-1252)
298 (define-coding-system-alias 'cp1252 'windows-1252)
300 (define-coding-system 'windows-1254
301 "windows-1254 (Turkish) encoding (MIME: WINDOWS-1254)"
302 :coding-type 'charset
304 :charset-list '(windows-1254)
305 :mime-charset 'windows-1254)
306 (define-coding-system-alias 'cp1254 'windows-1254)
308 (define-coding-system 'windows-1257
309 "windows-1257 (Baltic) encoding (MIME: WINDOWS-1257)"
310 :coding-type 'charset
312 :charset-list '(windows-1257)
313 :mime-charset 'windows-1257)
314 (define-coding-system-alias 'cp1257 'windows-1257)
316 (define-coding-system 'cp850
317 "DOS codepage 850 (Western European)"
318 :coding-type 'charset
320 :charset-list '(cp850)
321 :mime-charset 'cp850)
322 (define-coding-system-alias 'ibm850 'cp850)
324 (define-coding-system 'cp852
325 "DOS codepage 852 (Slavic)"
326 :coding-type 'charset
328 :charset-list '(cp852)
329 :mime-charset 'cp852)
330 (define-coding-system-alias 'ibm852 'cp852)
332 (define-coding-system 'cp857
333 "DOS codepage 857 (Turkish)"
334 :coding-type 'charset
336 :charset-list '(cp857)
337 :mime-charset 'cp857)
338 (define-coding-system-alias 'ibm857 'cp857)
340 (define-coding-system 'cp858
341 "Codepage 858 (Multilingual Latin I + Euro)"
342 :coding-type 'charset
344 :charset-list '(cp858)
345 :mime-charset 'cp858)
347 (define-coding-system 'cp860
348 "DOS codepage 860 (Portuguese)"
349 :coding-type 'charset
351 :charset-list '(cp860)
352 :mime-charset 'cp860)
353 (define-coding-system-alias 'ibm860 'cp860)
355 (define-coding-system 'cp861
356 "DOS codepage 861 (Icelandic)"
357 :coding-type 'charset
359 :charset-list '(cp861)
360 :mime-charset 'cp861)
361 (define-coding-system-alias 'ibm861 'cp861)
363 (define-coding-system 'cp863
364 "DOS codepage 863 (French Canadian)"
365 :coding-type 'charset
367 :charset-list '(cp863)
368 :mime-charset 'cp863)
369 (define-coding-system-alias 'ibm863 'cp863)
371 (define-coding-system 'cp865
372 "DOS codepage 865 (Norwegian/Danish)"
373 :coding-type 'charset
375 :charset-list '(cp865)
376 :mime-charset 'cp865)
377 (define-coding-system-alias 'ibm865 'cp865)
379 (define-coding-system 'cp437
381 :coding-type 'charset
383 :charset-list '(cp437)
384 :mime-charset 'cp437)
385 (define-coding-system-alias 'ibm437 'cp437)
387 (set-language-info-alist
388 "Dutch" '((tutorial . "TUTORIAL.nl")
390 (coding-system iso-latin-1 iso-latin-9)
391 (coding-priority iso-latin-1)
392 (nonascii-translation . iso-8859-1)
393 (unibyte-display . iso-latin-1)
394 (input-method . "dutch")
395 (sample-text . "Er is een aantal manieren waarop je dit kan doen")
397 This language environment is almost the same as Latin-1,
398 but it selects the Dutch tutorial and input method."))
401 (set-language-info-alist
402 "German" '((tutorial . "TUTORIAL.de")
404 (coding-system iso-latin-1 iso-latin-9)
405 (coding-priority iso-latin-1)
406 (nonascii-translation . iso-8859-1)
407 (input-method . "german-postfix")
408 (unibyte-display . iso-latin-1)
410 German (Deutsch Nord) Guten Tag
411 German (Deutsch S
\e$(D+d
\e(Bd) Gr
\e$(D+d)N
\e(B Gott")
413 This language environment is almost the same as Latin-1,
414 but sets the default input method to \"german-postfix\".
415 Additionally, it selects the German tutorial."))
418 (set-language-info-alist
419 "French" '((tutorial . "TUTORIAL.fr")
421 (coding-system iso-latin-1 iso-latin-9)
422 (coding-priority iso-latin-1)
423 (nonascii-translation . iso-8859-1)
424 (unibyte-display . iso-latin-1)
425 (input-method . "latin-1-prefix")
426 (sample-text . "French (Fran
\e$(D+.
\e(Bais) Bonjour, Salut")
428 This language environment is almost the same as Latin-1,
429 but it selects the French tutorial and input method."))
432 (set-language-info-alist
433 "Italian" '((tutorial . "TUTORIAL.it")
435 (coding-system iso-latin-1 iso-latin-9)
436 (coding-priority iso-latin-1)
437 (nonascii-translation . iso-8859-1)
438 (unibyte-display . iso-latin-1)
439 (input-method . "italian-postfix")
440 (sample-text . "Salve, ciao!")
442 This language environment is almost the same as Latin-1,
443 but sets the default input method to \"italian-postfix\".
444 Additionally, it selects the Italian tutorial."))
447 (set-language-info-alist
448 "Slovenian" '((charset iso-8859-2)
449 (coding-system . (iso-8859-2 windows-1250))
450 (coding-priority . (iso-8859-2))
451 (nonascii-translation . iso-8859-2)
452 (input-method . "slovenian")
453 (unibyte-display . iso-8859-2)
454 (tutorial . "TUTORIAL.sl")
455 (sample-text . "
\e$(D*v
\e(Belimo vam uspe
\e$(D+^
\e(Ben dan!")
457 This language environment is almost the same as Latin-2,
458 but it selects the Slovenian tutorial and input method."))
461 (set-language-info-alist
462 "Spanish" '((tutorial . "TUTORIAL.es")
464 (coding-system iso-latin-1 iso-latin-9)
465 (coding-priority iso-latin-1)
466 (input-method . "spanish-postfix")
467 (nonascii-translation . iso-8859-1)
468 (unibyte-display . iso-latin-1)
469 (sample-text . "Spanish (Espa
\e$(D+P
\e(Bol)
\e$(D"B
\e(BHola!")
471 This language environment is almost the same as Latin-1,
472 but it sets the default input method to \"spanish-postfix\",
473 and it selects the Spanish tutorial."))
476 ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But,
477 ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3)
478 ;; was used for Turkish. Those who use Latin-3 for Turkish should use
479 ;; "Latin-3" language environment.
481 (set-language-info-alist
482 "Turkish" '((charset iso-8859-9)
483 (coding-system iso-latin-5 windows-1254 iso-latin-3)
484 (coding-priority iso-latin-5)
485 (nonascii-translation . iso-8859-9)
486 (unibyte-display . iso-latin-5)
487 (input-method . "turkish-postfix")
488 (sample-text . "Turkish (T
\e$(D+d
\e(Brk
\e$(D+.
\e(Be) Merhaba")
491 (set-case-syntax-pair ?I ?
\e$(D)E
\e(B (standard-case-table))
492 (set-case-syntax-pair ?
\e$(D*D
\e(B ?i (standard-case-table))))
495 (set-case-syntax-pair ?I ?i (standard-case-table))
496 (set-case-syntax ?
\e$(D)E
\e(B "w" (standard-case-table))
497 (set-case-syntax ?
\e$(D*D
\e(B "w" (standard-case-table))))
498 (documentation . "Support for Turkish.
499 Differs from the Latin-5 environment in using the `turkish-postfix' input
500 method and applying Turkish case rules for the characters i, I,
\e$(D)E
\e(B,
\e$(D*D
\e(B.")))
502 ;; Polish ISO 8859-2 environment.
503 ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl>
504 ;; Keywords: multilingual, Polish
506 (set-language-info-alist
507 "Polish" '((charset iso-8859-2)
508 (coding-system iso-8859-2 windows-1250)
509 (coding-priority iso-8859-2)
510 (input-method . "polish-slash")
511 (nonascii-translation . iso-8859-2)
512 (unibyte-display . iso-8859-2)
513 (tutorial . "TUTORIAL.pl")
514 (sample-text . "P
\e$(D+Q
\e(Bjd
\e$(D+u
\e(B, ki
\e$(D+M
\e(B-
\e$(D+w
\e(Be t
\e$(D+8
\e(B chmurno
\e$(D+\++
\e(B w g
\e$(D)H+(
\e(Bb flaszy")
518 (set-language-info-alist
519 "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based
520 (coding-priority utf-8 latin-8)
521 (nonascii-translation . iso-8859-14)
522 (input-method . "welsh")
523 (documentation . "Support for Welsh, using Unicode."))
526 (set-language-info-alist
527 "Latin-6" `((coding-system latin-6)
528 (coding-priority latin-6)
529 (nonascii-translation . ,(get 'decode-iso-latin-6 'translation-table))
530 (input-method . "latin-prefix")
531 (features code-pages)
532 (documentation . "Support for Latin-6."))
535 (set-language-info-alist
536 "Latin-7" `((coding-system latin-7)
537 (coding-priority latin-7)
538 (nonascii-translation . iso-8859-13)
539 (input-method . "latin-prefix")
540 (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian."))
543 (set-language-info-alist
544 "Lithuanian" `((coding-system latin-7 windows-1257)
545 (coding-priority latin-7)
546 (nonascii-translation . iso-8859-13)
547 (input-method . "lithuanian-keyboard")
548 (documentation . "Support for Lithuanian."))
551 (set-language-info-alist
552 "Latvian" `((coding-system latin-7 windows-1257)
553 (coding-priority latin-7)
554 (nonascii-translation . iso-8859-13)
555 (input-method . "latvian-keyboard")
556 (documentation . "Support for Latvian."))
559 (set-language-info-alist
560 "Swedish" '((tutorial . "TUTORIAL.sv")
562 (coding-system iso-latin-1)
563 (coding-priority iso-latin-1)
564 (nonascii-translation . iso-8859-1)
565 (unibyte-display . iso-latin-1)
566 (sample-text . "Goddag Hej")
567 (documentation . "Support for Swedish"))
570 (set-language-info-alist
571 "Croatian" '((charset iso-8859-2)
572 (coding-system iso-8859-2)
573 (coding-priority iso-8859-2)
574 (input-method . "croatian")
575 (nonascii-translation . iso-8859-2)
576 (unibyte-display . iso-8859-2)
577 (documentation . "Support for Croatian with Latin-2 encoding."))
580 (set-language-info-alist
581 "Brazilian Portuguese" '((tutorial . "TUTORIAL.pt_BR")
583 (coding-system iso-latin-1 iso-latin-9)
584 (coding-priority iso-latin-1)
585 (nonascii-translation . iso-8859-1)
586 (unibyte-display . iso-8859-1)
587 (input-method . "latin-1-prefix")
589 (documentation . "Support for Brazilian Portuguese."))
593 (define-coding-system 'mac-roman
594 "Mac Roman Encoding (MIME:MACINTOSH)."
595 :coding-type 'charset
597 :charset-list '(mac-roman)
598 :mime-charset 'macintosh)
600 (define-coding-system 'next
602 :coding-type 'charset
604 :charset-list '(next)
607 (define-coding-system 'hp-roman8
608 "Hewlet-Packard roman-8 encoding (MIME:ROMAN-8)"
609 :coding-type 'charset
611 :charset-list '(hp-roman8)
612 :mime-charset 'hp-roman8)
613 (define-coding-system-alias 'roman8 'hp-roman8)
615 (define-coding-system 'adobe-standard-encoding
616 "Adobe `standard' encoding for PostScript"
617 :coding-type 'charset
619 :charset-list '(adobe-standard-encoding)
620 :mime-charset 'adobe-standard-encoding)
623 ;; For automatic composing of diacritics and combining marks.
624 (dolist (range '( ;; combining diacritical marks
625 (#x0300 #x0314 (tc . bc))
627 (#x0316 #x0319 (bc . tc))
629 (#x031B #x0320 (bc . tc))
632 (#x0323 #x0333 (bc . tc))
633 (#x0334 #x0338 (Bc . Bc))
634 (#x0339 #x033C (bc . tc))
635 (#x033D #x033F (tc . bc))
638 (#x0342 #x0344 (tc . bc))
641 (#x0347 #x0349 (bc . tc))
642 (#x034A #x034C (tc . bc))
643 (#x034D #x034E (bc . tc))
644 ;; combining diacritical marks for symbols
645 (#x20D0 #x20D1 (tc . bc))
646 (#x20D2 #x20D3 (Bc . Bc))
647 (#x20D4 #x20D7 (tc . bc))
648 (#x20D8 #x20DA (Bc . Bc))
649 (#x20DB #x20DC (tc . bc))
650 (#x20DD #x20E0 (Bc . Bc))
652 (#x20E2 #x20E3 (Bc . Bc))))
653 (let* ((from (car range))
654 (to (if (= (length range) 3)
657 (composition (car (last range))))
659 (put-char-code-property from 'diacritic-composition composition)
660 (aset composition-function-table from 'diacritic-composition-function)
661 (setq from (1+ from)))))
663 (defconst diacritic-composition-pattern "\\C^\\c^+")
665 (defun diacritic-compose-region (beg end)
666 "Compose diacritic characters in the region.
667 When called from a program, expects two arguments,
668 positions (integers or markers) specifying the region."
671 (narrow-to-region beg end)
672 (goto-char (point-min))
673 (while (re-search-forward diacritic-composition-pattern nil t)
674 (if (= (char-syntax (char-after (match-beginning 0))) ?w)
675 (compose-region (match-beginning 0) (match-end 0))))))
677 (defun diacritic-compose-string (string)
678 "Compose diacritic characters in STRING and return the resulting string."
680 (while (setq idx (string-match diacritic-composition-pattern string idx))
681 (if (= (char-syntax (aref string idx)) ?w)
682 (compose-string string idx (match-end 0)))
683 (setq idx (match-end 0))))
686 (defun diacritic-compose-buffer ()
687 "Compose diacritic characters in the current buffer."
689 (diacritic-compose-region (point-min) (point-max)))
691 (defun diacritic-composition-function (pos &optional string)
692 "Compose diacritic text around POS.
693 Optional 2nd argument STRING, if non-nil, is a string containing text
696 The return value is the end position of composed characters,
697 or nil if no characters are composed."
701 (let ((ch (aref string pos))
702 start end components ch composition)
703 (when (= (char-syntax ch) ?w)
710 (setq ch (aref string pos)
712 (get-char-code-property ch
713 'diacritic-composition)))
714 (setq components (cons ch (cons composition components))
716 (compose-string string start pos (nreverse components))
718 (if (>= pos (point-min))
719 (let ((ch (char-after pos))
720 start end components composition)
721 (when (= (char-syntax ch) ?w)
728 (setq ch (char-after pos)
730 (get-char-code-property ch 'diacritic-composition)))
731 (setq components (cons ch (cons composition components))
733 (compose-region start pos (nreverse components))
738 ;;; arch-tag: 9e018b12-fb02-4120-907b-9adeaf84b5c2
739 ;;; european.el ends here