1 ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*-
3 ;; Copyright (C) 1995, 1997, 2001 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H13PRO009
10 ;; Keywords: multilingual, European
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
31 ;; For European scripts, all the ISO Latin character sets are
32 ;; supported, along with various others.
36 ;; Latin-1 (ISO-8859-1)
38 (set-language-info-alist
39 "Latin-1" '((charset iso-8859-1)
40 (coding-system iso-latin-1 iso-latin-9 windows-1252)
41 (coding-priority iso-latin-1)
42 (nonascii-translation . iso-8859-1)
43 (unibyte-display . iso-latin-1)
44 (input-method . "latin-1-prefix")
46 . "Hello, Hej, Tere, Hei, Bonjour, Gr
\e$(D+d)N
\e(B Gott, Ciao,
\e$(D"B
\e(BHola!")
48 This language environment is a generic one for the Latin-1 (ISO-8859-1)
49 character set which supports the following European languages:
50 Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese,
51 Finnish, French (with restrictions -- see Latin-9), Frisian, Galician,
52 German, Greenlandic, Icelandic, Irish Gaelic (new orthography),
53 Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic,
54 Scottish Gaelic, Spanish, and Swedish.
55 We also have specific language environments for the following languages:
57 For German, \"German\".
58 For French, \"French\".
59 For Italian, \"Italian\".
60 For Slovenian, \"Slovenian\".
61 For Spanish, \"Spanish\".
63 Latin-1 also covers several written languages outside Europe, including
64 Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans."))
68 ;; Latin-2 (ISO-8859-2)
70 (define-coding-system 'iso-latin-2
71 "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)."
74 :charset-list '(iso-8859-2)
75 :mime-charset 'iso-8859-2)
77 (define-coding-system-alias 'iso-8859-2 'iso-latin-2)
78 (define-coding-system-alias 'latin-2 'iso-latin-2)
80 (set-language-info-alist
81 "Latin-2" '((charset iso-8859-2)
82 (coding-system iso-latin-2 windows-1250)
83 (coding-priority iso-latin-2)
84 (nonascii-translation . iso-8859-2)
85 (unibyte-display . iso-latin-2)
86 (input-method . "latin-2-prefix")
88 This language environment is a generic one for the Latin-2 (ISO-8859-2)
89 character set which supports the following languages:
90 Albanian, Czech, English, German, Hungarian, Polish, Romanian,
91 Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower),
93 We also have specific language environments for the following languages:
95 For Croatian, \"Croatian\".
96 For Polish, \"Polish\".
97 For Romanian, \"Romanian\".
98 For Slovak, \"Slovak\"."))
102 ;; Latin-3 (ISO-8859-3)
104 (define-coding-system 'iso-latin-3
105 "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)."
106 :coding-type 'charset
108 :charset-list '(iso-8859-3)
109 :mime-charset 'iso-8859-3)
111 (define-coding-system-alias 'iso-8859-3 'iso-latin-3)
112 (define-coding-system-alias 'latin-3 'iso-latin-3)
114 (set-language-info-alist
115 "Latin-3" '((charset iso-8859-3)
116 (coding-system iso-latin-3)
117 (coding-priority iso-latin-3)
118 (nonascii-translation . iso-8859-3)
119 (unibyte-display . iso-latin-3)
120 (input-method . "latin-3-prefix")
122 These languages are supported with the Latin-3 (ISO-8859-3) character set:
123 Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician,
124 German, Italian, Maltese, Spanish, and Turkish."))
128 ;; Latin-4 (ISO-8859-4)
130 (define-coding-system 'iso-latin-4
131 "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)."
132 :coding-type 'charset
134 :charset-list '(iso-8859-4)
135 :mime-charset 'iso-8859-4)
137 (define-coding-system-alias 'iso-8859-4 'iso-latin-4)
138 (define-coding-system-alias 'latin-4 'iso-latin-4)
140 (set-language-info-alist
141 "Latin-4" '((charset iso-8859-4)
142 (coding-system iso-8859-4)
143 (coding-priority iso-8859-4)
144 (nonascii-translation . iso-8859-4)
145 (unibyte-display . iso-8859-4)
146 (input-method . "latin-4-postfix")
148 These languages are supported with the Latin-4 (ISO-8859-4) character set:
149 Danish, English, Estonian, Finnish, German, Greenlandic, Lappish,
150 Latvian, Lithuanian, and Norwegian."))
154 ;; Latin-5 (ISO-8859-9)
156 (define-coding-system 'iso-latin-5
157 "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)."
158 :coding-type 'charset
160 :charset-list '(iso-8859-9)
161 :mime-charset 'iso-8859-9)
163 (define-coding-system-alias 'iso-8859-9 'iso-latin-5)
164 (define-coding-system-alias 'latin-5 'iso-latin-5)
166 (set-language-info-alist
167 "Latin-5" '((charset iso-8859-9)
168 (coding-system iso-latin-5)
169 (coding-priority iso-latin-5)
170 (nonascii-translation . iso-8859-9)
171 (unibyte-display . iso-latin-5)
172 (input-method . "latin-5-postfix")
173 (documentation . "Support for Latin-5.\
174 See also the Turkish environment."))
178 ;; Latin-6 (ISO-8859-10)
180 (define-coding-system 'iso-latin-6
181 "ISO 2022 based 8-bit encoding for Latin-6 (MIME:ISO-8859-10)."
182 :coding-type 'charset
184 :charset-list '(iso-8859-10)
185 :mime-charset 'iso-8859-10)
187 (define-coding-system-alias 'iso-8859-10 'iso-latin-6)
188 (define-coding-system-alias 'latin-6 'iso-latin-6)
190 (set-language-info-alist
191 "Latin-6" '((charset iso-8859-10)
192 (coding-system iso-latin-6)
193 (coding-priority iso-latin-6)
194 (nonascii-translation . iso-8859-10)
195 (unibyte-display . iso-latin-6)
196 ;; Fixme: input method.
197 (documentation . "Support for generic Latin-6 (Northern European)."))
201 ;; Latin-7 (ISO-8859-13)
203 (define-coding-system 'iso-latin-7
204 "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)."
205 :coding-type 'charset
207 :charset-list '(iso-8859-13)
208 :mime-charset 'iso-8859-13)
210 (define-coding-system-alias 'iso-8859-13 'iso-latin-7)
211 (define-coding-system-alias 'latin-7 'iso-latin-7)
213 (set-language-info-alist
214 "Latin-7" '((charset iso-8859-13)
215 (coding-system iso-latin-7)
216 (coding-priority iso-latin-7)
217 (nonascii-translation . iso-8859-13)
218 (unibyte-display . iso-latin-7)
219 ;; Fixme: input method.
220 (documentation . "Support for generic Latin-7 (Baltic Rim)."))
223 ;; Latin-8 (ISO-8859-14)
225 (define-coding-system 'iso-latin-8
226 "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)."
227 :coding-type 'charset
228 ;; `W' for `Welsh', since `C' for `Celtic' is taken.
230 :charset-list '(iso-8859-14)
231 :mime-charset 'iso-8859-14)
233 (define-coding-system-alias 'iso-8859-14 'iso-latin-8)
234 (define-coding-system-alias 'latin-8 'iso-latin-8)
236 (set-language-info-alist
237 "Latin-8" '((charset iso-8859-14)
238 (coding-system iso-latin-8)
239 (coding-priority iso-latin-8)
240 (nonascii-translation . iso-8859-14)
241 (unibyte-display . iso-latin-8)
242 (input-method . "latin-8-prefix")
243 ;; Fixme: Welsh/Ga{e}lic greetings
244 (sample-text . "
\e,_"
\e(B
\e$(D+q
\e(B
\e$(D*t
\e(B")
246 This language environment is a generic one for the Latin-8 (ISO-8859-14)
247 character set which supports the Celtic languages, including those not
248 covered by other ISO-8859 character sets:
249 Welsh, Manx Gaelic and Irish Gaelic (old orthography)."))
252 ;; Latin-9 (ISO-8859-15)
254 (define-coding-system 'iso-latin-9
255 "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)."
256 :coding-type 'charset
259 :charset-list '(iso-8859-15)
260 :mime-charset 'iso-8859-15)
262 (define-coding-system-alias 'iso-8859-15 'iso-latin-9)
263 (define-coding-system-alias 'latin-9 'iso-latin-9)
264 (define-coding-system-alias 'latin-0 'iso-latin-9)
266 (set-language-info-alist
267 "Latin-9" '((charset iso-8859-15)
268 (coding-system iso-latin-9)
269 (coding-priority iso-latin-9)
270 (nonascii-translation . iso-8859-15)
271 (unibyte-display . iso-latin-9)
272 (input-method . "latin-9-prefix")
274 . "AVE.
\e$(D*^+^*v+v)-)M*s
\e(B
\e$(Q)!
\e(B")
276 This language environment is a generic one for the Latin-9 (ISO-8859-15)
277 character set which supports the same languages as Latin-1 with the
278 addition of the Euro sign and some additional French and Finnish letters.
279 Latin-9 is sometimes nicknamed `Latin-0'."))
282 (define-coding-system 'windows-1250
283 "windows-1250 (Central European) encoding (MIME: WINDOWS-1250)"
284 :coding-type 'charset
286 :charset-list '(windows-1250)
287 :mime-charset 'windows-1250)
288 (define-coding-system-alias 'cp1250 'windows-1250)
290 (define-coding-system 'windows-1252
291 "windows-1252 (Western European) encoding (MIME: WINDOWS-1252)"
292 :coding-type 'charset
294 :charset-list '(windows-1252)
295 :mime-charset 'windows-1252)
296 (define-coding-system-alias 'cp1252 'windows-1252)
298 (define-coding-system 'windows-1254
299 "windows-1254 (Turkish) encoding (MIME: WINDOWS-1254)"
300 :coding-type 'charset
302 :charset-list '(windows-1254)
303 :mime-charset 'windows-1254)
304 (define-coding-system-alias 'cp1254 'windows-1254)
306 (define-coding-system 'windows-1257
307 "windows-1257 (Baltic) encoding (MIME: WINDOWS-1257)"
308 :coding-type 'charset
310 :charset-list '(windows-1257)
311 :mime-charset 'windows-1257)
312 (define-coding-system-alias 'cp1257 'windows-1257)
314 (define-coding-system 'cp850
315 "DOS codepage 850 (Western European)"
316 :coding-type 'charset
318 :charset-list '(cp850)
319 :mime-charset 'cp850)
320 (define-coding-system-alias 'ibm850 'cp850)
322 (define-coding-system 'cp852
323 "DOS codepage 852 (Slavic)"
324 :coding-type 'charset
326 :charset-list '(cp852)
327 :mime-charset 'cp852)
328 (define-coding-system-alias 'ibm852 'cp852)
330 (define-coding-system 'cp857
331 "DOS codepage 857 (Turkish)"
332 :coding-type 'charset
334 :charset-list '(cp857)
335 :mime-charset 'cp857)
336 (define-coding-system-alias 'ibm857 'cp857)
338 (define-coding-system 'cp858
339 "Codepage 858 (Multilingual Latin I + Euro)"
340 :coding-type 'charset
342 :charset-list '(cp858)
343 :mime-charset 'cp858)
345 (define-coding-system 'cp860
346 "DOS codepage 860 (Portuguese)"
347 :coding-type 'charset
349 :charset-list '(cp860)
350 :mime-charset 'cp860)
351 (define-coding-system-alias 'ibm860 'cp860)
353 (define-coding-system 'cp861
354 "DOS codepage 861 (Icelandic)"
355 :coding-type 'charset
357 :charset-list '(cp861)
358 :mime-charset 'cp861)
359 (define-coding-system-alias 'ibm861 'cp861)
361 (define-coding-system 'cp863
362 "DOS codepage 863 (French Canadian)"
363 :coding-type 'charset
365 :charset-list '(cp863)
366 :mime-charset 'cp863)
367 (define-coding-system-alias 'ibm863 'cp863)
369 (define-coding-system 'cp865
370 "DOS codepage 865 (Norwegian/Danish)"
371 :coding-type 'charset
373 :charset-list '(cp865)
374 :mime-charset 'cp865)
375 (define-coding-system-alias 'ibm865 'cp865)
377 (define-coding-system 'cp437
379 :coding-type 'charset
381 :charset-list '(cp437)
382 :mime-charset 'cp437)
383 (define-coding-system-alias 'ibm437 'cp437)
385 (set-language-info-alist
386 "Dutch" '((tutorial . "TUTORIAL.nl")
388 (coding-system iso-latin-1 iso-latin-9)
389 (coding-priority iso-latin-1)
390 (nonascii-translation . iso-8859-1)
391 (unibyte-display . iso-latin-1)
392 (input-method . "dutch")
393 (sample-text . "Er is een aantal manieren waarop je dit kan doen")
395 This language environment is almost the same as Latin-1,
396 but it selects the Dutch tutorial and input method."))
399 (set-language-info-alist
400 "German" '((tutorial . "TUTORIAL.de")
402 (coding-system iso-latin-1 iso-latin-9)
403 (coding-priority iso-latin-1)
404 (nonascii-translation . iso-8859-1)
405 (input-method . "german-postfix")
406 (unibyte-display . iso-latin-1)
408 German (Deutsch Nord) Guten Tag
409 German (Deutsch S
\e$(D+d
\e(Bd) Gr
\e$(D+d)N
\e(B Gott")
411 This language environment is almost the same as Latin-1,
412 but sets the default input method to \"german-postfix\".
413 Additionally, it selects the German tutorial."))
416 (set-language-info-alist
417 "French" '((tutorial . "TUTORIAL.fr")
419 (coding-system iso-latin-1 iso-latin-9)
420 (coding-priority iso-latin-1)
421 (nonascii-translation . iso-8859-1)
422 (unibyte-display . iso-latin-1)
423 (input-method . "latin-1-prefix")
424 (sample-text . "French (Fran
\e$(D+.
\e(Bais) Bonjour, Salut")
426 This language environment is almost the same as Latin-1,
427 but it selects the French tutorial and input method."))
430 (set-language-info-alist
431 "Italian" '((tutorial . "TUTORIAL.it")
433 (coding-system iso-latin-1 iso-latin-9)
434 (coding-priority iso-latin-1)
435 (nonascii-translation . iso-8859-1)
436 (unibyte-display . iso-latin-1)
437 (input-method . "italian-postfix")
438 (sample-text . "Salve, ciao!")
440 This language environment is almost the same as Latin-1,
441 but sets the default input method to \"italian-postfix\".
442 Additionally, it selects the Italian tutorial."))
445 (set-language-info-alist
446 "Slovenian" '((charset iso-8859-2)
447 (coding-system . (iso-8859-2 windows-1250))
448 (coding-priority . (iso-8859-2))
449 (nonascii-translation . iso-8859-2)
450 (input-method . "slovenian")
451 (unibyte-display . iso-8859-2)
452 (tutorial . "TUTORIAL.sl")
453 (sample-text . "
\e$(D*v
\e(Belimo vam uspe
\e$(D+^
\e(Ben dan!")
455 This language environment is almost the same as Latin-2,
456 but it selects the Slovenian tutorial and input method."))
459 (set-language-info-alist
460 "Spanish" '((tutorial . "TUTORIAL.es")
462 (coding-system iso-latin-1 iso-latin-9)
463 (coding-priority iso-latin-1)
464 (input-method . "spanish-postfix")
465 (nonascii-translation . iso-8859-1)
466 (unibyte-display . iso-latin-1)
467 (sample-text . "Spanish (Espa
\e$(D+P
\e(Bol)
\e$(D"B
\e(BHola!")
469 This language environment is almost the same as Latin-1,
470 but it sets the default input method to \"spanish-postfix\",
471 and it selects the Spanish tutorial."))
474 ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But,
475 ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3)
476 ;; was used for Turkish. Those who use Latin-3 for Turkish should use
477 ;; "Latin-3" language environment.
479 (set-language-info-alist
480 "Turkish" '((charset iso-8859-9)
481 (coding-system iso-latin-5 windows-1254 iso-latin-3)
482 (coding-priority iso-latin-5)
483 (nonascii-translation . iso-8859-9)
484 (unibyte-display . iso-latin-5)
485 (input-method . "turkish-postfix")
486 (sample-text . "Turkish (T
\e$(D+d
\e(Brk
\e$(D+.
\e(Be) Merhaba")
489 (set-case-syntax-pair ?I ?
\e$(D)E
\e(B (standard-case-table))
490 (set-case-syntax-pair ?
\e$(D*D
\e(B ?i (standard-case-table))))
493 (set-case-syntax-pair ?I ?i (standard-case-table))
494 (set-case-syntax ?
\e$(D)E
\e(B "w" (standard-case-table))
495 (set-case-syntax ?
\e$(D*D
\e(B "w" (standard-case-table))))
496 (documentation . "Support for Turkish.
497 Differs from the Latin-5 environment in using the `turkish-postfix' input
498 method and applying Turkish case rules for the characters i, I,
\e$(D)E
\e(B,
\e$(D*D
\e(B.")))
500 ;; Polish ISO 8859-2 environment.
501 ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl>
502 ;; Keywords: multilingual, Polish
504 (set-language-info-alist
505 "Polish" '((charset iso-8859-2)
506 (coding-system iso-8859-2 windows-1250)
507 (coding-priority iso-8859-2)
508 (input-method . "polish-slash")
509 (nonascii-translation . iso-8859-2)
510 (unibyte-display . iso-8859-2)
511 (tutorial . "TUTORIAL.pl")
512 (sample-text . "P
\e$(D+Q
\e(Bjd
\e$(D+u
\e(B, ki
\e$(D+M
\e(B-
\e$(D+w
\e(Be t
\e$(D+8
\e(B chmurno
\e$(D+\++
\e(B w g
\e$(D)H+(
\e(Bb flaszy")
516 (set-language-info-alist
517 "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based
518 (coding-priority utf-8 latin-8)
519 (nonascii-translation . iso-8859-14)
520 (input-method . "welsh")
521 (documentation . "Support for Welsh, using Unicode."))
524 (set-language-info-alist
525 "Latin-6" `((coding-system latin-6)
526 (coding-priority latin-6)
527 (nonascii-translation . ,(get 'decode-iso-latin-6 'translation-table))
528 (input-method . "latin-prefix")
529 (features code-pages)
530 (documentation . "Support for Latin-6."))
533 (set-language-info-alist
534 "Latin-7" `((coding-system latin-7)
535 (coding-priority latin-7)
536 (nonascii-translation . iso-8859-13)
537 (input-method . "latin-prefix")
538 (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian."))
541 (set-language-info-alist
542 "Lithuanian" `((coding-system latin-7 windows-1257)
543 (coding-priority latin-7)
544 (nonascii-translation . iso-8859-13)
545 (input-method . "lithuanian-keyboard")
546 (documentation . "Support for Lithuanian."))
549 (set-language-info-alist
550 "Latvian" `((coding-system latin-7 windows-1257)
551 (coding-priority latin-7)
552 (nonascii-translation . iso-8859-13)
553 (input-method . "latvian-keyboard")
554 (documentation . "Support for Latvian."))
557 (set-language-info-alist
558 "Swedish" '((tutorial . "TUTORIAL.sv")
560 (coding-system iso-latin-1)
561 (coding-priority iso-latin-1)
562 (nonascii-translation . iso-8859-1)
563 (unibyte-display . iso-latin-1)
564 (sample-text . "Goddag Hej")
565 (documentation . "Support for Swedish"))
568 (set-language-info-alist
569 "Croatian" '((charset iso-8859-2)
570 (coding-system iso-8859-2)
571 (coding-priority iso-8859-2)
572 (input-method . "croatian")
573 (nonascii-translation . iso-8859-2)
574 (unibyte-display . iso-8859-2)
575 (documentation . "Support for Croatian with Latin-2 encoding."))
578 (set-language-info-alist
579 "Brazilian Portuguese" '((tutorial . "TUTORIAL.pt_BR")
581 (coding-system iso-latin-1 iso-latin-9)
582 (coding-priority iso-latin-1)
583 (nonascii-translation . iso-8859-1)
584 (unibyte-display . iso-8859-1)
585 (input-method . "latin-1-prefix")
587 (documentation . "Support for Brazilian Portuguese."))
591 (define-coding-system 'mac-roman
592 "Mac Roman Encoding (MIME:MACINTOSH)."
593 :coding-type 'charset
595 :charset-list '(mac-roman)
596 :mime-charset 'macintosh)
598 (define-coding-system 'next
600 :coding-type 'charset
602 :charset-list '(next)
605 (define-coding-system 'hp-roman8
606 "Hewlet-Packard roman-8 encoding (MIME:ROMAN-8)"
607 :coding-type 'charset
609 :charset-list '(hp-roman8)
610 :mime-charset 'hp-roman8)
611 (define-coding-system-alias 'roman8 'hp-roman8)
613 (define-coding-system 'adobe-standard-encoding
614 "Adobe `standard' encoding for PostScript"
615 :coding-type 'charset
617 :charset-list '(adobe-standard-encoding)
618 :mime-charset 'adobe-standard-encoding)
621 ;; For automatic composing of diacritics and combining marks.
622 (dolist (range '( ;; combining diacritical marks
623 (#x0300 #x0314 (tc . bc))
625 (#x0316 #x0319 (bc . tc))
627 (#x031B #x0320 (bc . tc))
630 (#x0323 #x0333 (bc . tc))
631 (#x0334 #x0338 (Bc . Bc))
632 (#x0339 #x033C (bc . tc))
633 (#x033D #x033F (tc . bc))
636 (#x0342 #x0344 (tc . bc))
639 (#x0347 #x0349 (bc . tc))
640 (#x034A #x034C (tc . bc))
641 (#x034D #x034E (bc . tc))
642 ;; combining diacritical marks for symbols
643 (#x20D0 #x20D1 (tc . bc))
644 (#x20D2 #x20D3 (Bc . Bc))
645 (#x20D4 #x20D7 (tc . bc))
646 (#x20D8 #x20DA (Bc . Bc))
647 (#x20DB #x20DC (tc . bc))
648 (#x20DD #x20E0 (Bc . Bc))
650 (#x20E2 #x20E3 (Bc . Bc))))
651 (let* ((from (car range))
652 (to (if (= (length range) 3)
655 (composition (car (last range))))
657 (put-char-code-property from 'diacritic-composition composition)
658 (aset composition-function-table from 'diacritic-composition-function)
659 (setq from (1+ from)))))
661 (defconst diacritic-composition-pattern "\\C^\\c^+")
663 (defun diacritic-compose-region (beg end)
664 "Compose diacritic characters in the region.
665 When called from a program, expects two arguments,
666 positions (integers or markers) specifying the region."
669 (narrow-to-region beg end)
670 (goto-char (point-min))
671 (while (re-search-forward diacritic-composition-pattern nil t)
672 (if (= (char-syntax (char-after (match-beginning 0))) ?w)
673 (compose-region (match-beginning 0) (match-end 0))))))
675 (defun diacritic-compose-string (string)
676 "Compose diacritic characters in STRING and return the resulting string."
678 (while (setq idx (string-match diacritic-composition-pattern string idx))
679 (if (= (char-syntax (aref string idx)) ?w)
680 (compose-string string idx (match-end 0)))
681 (setq idx (match-end 0))))
684 (defun diacritic-compose-buffer ()
685 "Compose diacritic characters in the current buffer."
687 (diacritic-compose-region (point-min) (point-max)))
689 (defun diacritic-composition-function (pos &optional string)
690 "Compose diacritic text around POS.
691 Optional 2nd argument STRING, if non-nil, is a string containing text
694 The return value is the end position of composed characters,
695 or nil if no characters are composed."
699 (let ((ch (aref string pos))
700 start end components ch composition)
701 (when (= (char-syntax ch) ?w)
708 (setq ch (aref string pos)
710 (get-char-code-property ch
711 'diacritic-composition)))
712 (setq components (cons ch (cons composition components))
714 (compose-string string start pos (nreverse components))
716 (if (>= pos (point-min))
717 (let ((ch (char-after pos))
718 start end components composition)
719 (when (= (char-syntax ch) ?w)
726 (setq ch (char-after pos)
728 (get-char-code-property ch 'diacritic-composition)))
729 (setq components (cons ch (cons composition components))
731 (compose-region start pos (nreverse components))
736 ;;; arch-tag: 9e018b12-fb02-4120-907b-9adeaf84b5c2
737 ;;; european.el ends here