X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/edc64f42cd58b8f33204a3bf7a0323e8e7ab2a75..db8d59365b5ccdca367ace2d4df5b8a2242e5765:/lisp/language/european.el diff --git a/lisp/language/european.el b/lisp/language/european.el index 7944b2b001..b070fe7560 100644 --- a/lisp/language/european.el +++ b/lisp/language/european.el @@ -1,8 +1,10 @@ ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*- -;; Copyright (C) 1995, 1997, 2001 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. -;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. +;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004 +;; Free Software Foundation, Inc. +;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H14PRO021 ;; Keywords: multilingual, European @@ -20,33 +22,22 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: -;; For European scripts, character sets ISO8859-1,2,3,4,9,14,15 are -;; supported. +;; For European scripts, all the ISO Latin character sets are +;; supported, along with various others. ;;; Code: ;; Latin-1 (ISO-8859-1) -(make-coding-system - 'iso-latin-1 2 ?1 - "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." - '(ascii latin-iso8859-1 nil nil - nil nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii latin-iso8859-1) - (mime-charset . iso-8859-1))) - -(define-coding-system-alias 'iso-8859-1 'iso-latin-1) -(define-coding-system-alias 'latin-1 'iso-latin-1) - (set-language-info-alist "Latin-1" '((charset ascii latin-iso8859-1) (coding-system iso-latin-1) - (coding-priority iso-latin-1) + (coding-priority iso-latin-1 windows-1252) (nonascii-translation . latin-iso8859-1) (unibyte-syntax . "latin-1") (unibyte-display . iso-latin-1) @@ -64,13 +55,31 @@ character set which supports the following European languages: We also have specific language environments for the following languages: For Dutch, \"Dutch\". For German, \"German\". - For Spanish, \"Spanish\". For French, \"French\". + For Italian, \"Italian\". + For Slovenian, \"Slovenian\". + For Spanish, \"Spanish\". Latin-1 also covers several written languages outside Europe, including Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans.")) '("European")) +(eval-and-compile + (setq + non-iso-charset-alist + (cp-make-coding-system + windows-1252 + [?\$,1tL(B nil ?\$,1rz(B ?\$,1!R(B ?\$,1r~(B ?\$,1s&(B ?\$,1s (B ?\$,1s!(B ?\$,1$f(B ?\$,1s0(B ?\$,1! (B ?\$,1s9(B ?\$,1 r(B nil ?\$,1!=(B nil nil + ?\$,1rx(B ?\$,1ry(B ?\$,1r|(B ?\$,1r}(B ?\$,1s"(B ?\$,1rs(B ?\$,1rt(B ?\$,1$|(B ?\$,1ub(B ?\$,1!!(B ?\$,1s:(B ?\$,1 s(B nil ?\$,1!>(B ?\$,1!8(B ?\,A (B ?\,A!(B + ?\,A"(B ?\,A#(B ?\,A$(B ?\,A%(B ?\,A&(B ?\,A'(B ?\,A((B ?\,A)(B ?\,A*(B ?\,A+(B ?\,A,(B ?\,A-(B ?\,A.(B ?\,A/(B ?\,A0(B ?\,A1(B ?\,A2(B + ?\,A3(B ?\,A4(B ?\,A5(B ?\,A6(B ?\,A7(B ?\,A8(B ?\,A9(B ?\,A:(B ?\,A;(B ?\,A<(B ?\,A=(B ?\,A>(B ?\,A?(B ?\,A@(B ?\,AA(B ?\,AB(B ?\,AC(B + ?\,AD(B ?\,AE(B ?\,AF(B ?\,AG(B ?\,AH(B ?\,AI(B ?\,AJ(B ?\,AK(B ?\,AL(B ?\,AM(B ?\,AN(B ?\,AO(B ?\,AP(B ?\,AQ(B ?\,AR(B ?\,AS(B ?\,AT(B + ?\,AU(B ?\,AV(B ?\,AW(B ?\,AX(B ?\,AY(B ?\,AZ(B ?\,A[(B ?\,A\(B ?\,A](B ?\,A^(B ?\,A_(B ?\,A`(B ?\,Aa(B ?\,Ab(B ?\,Ac(B ?\,Ad(B ?\,Ae(B + ?\,Af(B ?\,Ag(B ?\,Ah(B ?\,Ai(B ?\,Aj(B ?\,Ak(B ?\,Al(B ?\,Am(B ?\,An(B ?\,Ao(B ?\,Ap(B ?\,Aq(B ?\,Ar(B ?\,As(B ?\,At(B ?\,Au(B ?\,Av(B + ?\,Aw(B ?\,Ax(B ?\,Ay(B ?\,Az(B ?\,A{(B ?\,A|(B ?\,A}(B ?\,A~(B ?\,A(B]))) + +(define-coding-system-alias 'cp1252 'windows-1252) + ;; Latin-2 (ISO-8859-2) @@ -78,7 +87,7 @@ Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans.")) 'iso-latin-2 2 ?2 "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)." '(ascii latin-iso8859-2 nil nil - nil nil nil nil nil nil nil) + nil nil nil nil nil nil nil nil nil nil nil t) '((safe-charsets ascii latin-iso8859-2) (mime-charset . iso-8859-2))) @@ -101,6 +110,7 @@ character set which supports the following languages: and Swedish. We also have specific language environments for the following languages: For Czech, \"Czech\". + For Croatian, \"Croatian\". For Romanian, \"Romanian\". For Slovak, \"Slovak\".")) '("European")) @@ -112,7 +122,7 @@ We also have specific language environments for the following languages: 'iso-latin-3 2 ?3 "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)." '(ascii latin-iso8859-3 nil nil - nil nil nil nil nil nil nil) + nil nil nil nil nil nil nil nil nil nil nil t) '((safe-charsets ascii latin-iso8859-3) (mime-charset . iso-8859-3))) @@ -140,7 +150,7 @@ These languages are supported with the Latin-3 (ISO-8859-3) character set: 'iso-latin-4 2 ?4 "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)." '(ascii latin-iso8859-4 nil nil - nil nil nil nil nil nil nil) + nil nil nil nil nil nil nil nil nil nil nil t) '((safe-charsets ascii latin-iso8859-4) (mime-charset . iso-8859-4))) @@ -168,7 +178,7 @@ These languages are supported with the Latin-4 (ISO-8859-4) character set: 'iso-latin-5 2 ?9 "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)." '(ascii latin-iso8859-9 nil nil - nil nil nil nil nil nil nil) + nil nil nil nil nil nil nil nil nil nil nil t) '((safe-charsets ascii latin-iso8859-9) (mime-charset . iso-8859-9))) @@ -194,7 +204,7 @@ These languages are supported with the Latin-4 (ISO-8859-4) character set: ; for `Celtic' is taken. "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)." '(ascii latin-iso8859-14 nil nil - nil nil nil nil nil nil nil nil nil nil nil nil t) + nil nil nil nil nil nil nil nil nil nil nil t t) '((safe-charsets ascii latin-iso8859-14) (mime-charset . iso-8859-14))) @@ -224,7 +234,7 @@ covered by other ISO-8859 character sets: 'iso-latin-9 2 ?0 ; `0' for `Latin-0' "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)." '(ascii latin-iso8859-15 nil nil - nil nil nil nil nil nil nil nil nil nil nil nil t) + nil nil nil nil nil nil nil nil nil nil nil t t) '((safe-charsets ascii latin-iso8859-15) (mime-charset . iso-8859-15))) @@ -249,13 +259,28 @@ addition of the Euro sign and some additional French and Finnish letters. Latin-9 is sometimes nicknamed `Latin-0'.")) '("European")) +(set-language-info-alist + "Dutch" '((tutorial . "TUTORIAL.nl") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (input-method . "dutch") + (sample-text . "Er is een aantal manieren waarop je dit kan doen") + (documentation . "\ +This language environment is almost the same as Latin-1, +but it selects the Dutch tutorial and input method.")) + '("European")) + (set-language-info-alist "German" '((tutorial . "TUTORIAL.de") (charset ascii latin-iso8859-1) (coding-system iso-latin-1 iso-latin-9) - (coding-priority iso-latin-1) + (coding-priority iso-latin-1 windows-1252) (input-method . "german-postfix") - (nonascii-translation . iso-latin-1) + (nonascii-translation . latin-iso8859-1) (unibyte-syntax . "latin-1") (unibyte-display . iso-latin-1) (sample-text . "\ @@ -279,7 +304,23 @@ Additionally, it selects the German tutorial.")) (sample-text . "French (Fran,Ag(Bais) Bonjour, Salut") (documentation . "\ This language environment is almost the same as Latin-1, -but it selects the French tutorial.")) +but it selects the French tutorial and input method.")) + '("European")) + +(set-language-info-alist + "Italian" '((tutorial . "TUTORIAL.it") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (input-method . "italian-postfix") + (sample-text . "Salve, ciao!") + (documentation . "\ +This language environment is almost the same as Latin-1, +but sets the default input method to \"italian-postfix\". +Additionally, it selects the Italian tutorial.")) '("European")) (set-language-info-alist @@ -287,12 +328,14 @@ but it selects the French tutorial.")) (coding-system . (iso-8859-2)) (coding-priority . (iso-8859-2)) (nonascii-translation . latin-iso8859-2) - (input-method . "latin-2-postfix") + (input-method . "slovenian") (unibyte-syntax . "latin-2") (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.sl") (sample-text . ",B.(Belimo vam uspe,B9(Ben dan!") - (documentation . t)) + (documentation . "\ +This language environment is almost the same as Latin-2, +but it selects the Slovenian tutorial and input method.")) '("European")) (set-language-info-alist @@ -301,7 +344,7 @@ but it selects the French tutorial.")) (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) (input-method . "spanish-postfix") - (nonascii-translation . iso-latin-1) + (nonascii-translation . latin-iso8859-1) (unibyte-syntax . "latin-1") (unibyte-display . iso-latin-1) (sample-text . "Spanish (Espa,Aq(Bol) ,A!(BHola!") @@ -311,21 +354,6 @@ but it sets the default input method to \"spanish-postfix\", and it selects the Spanish tutorial.")) '("European")) -(set-language-info-alist - "Dutch" '((tutorial . "TUTORIAL.nl") - (charset ascii latin-iso8859-1) - (coding-system iso-latin-1 iso-latin-9) - (coding-priority iso-latin-1) - (nonascii-translation . iso-latin-1) - (unibyte-syntax . "latin-1") - (unibyte-display . iso-latin-1) - (input-method . "dutch") - (sample-text . "Er is een aantal manieren waarop je dit kan doen") - (documentation . "\ -This language environment is almost the same as Latin-1, -but it selects the Dutch tutorial and input method.")) - '("European")) - ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But, ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3) ;; was used for Turkish. Those who use Latin-3 for Turkish should use @@ -362,16 +390,26 @@ but it selects the Dutch tutorial and input method.")) (set-language-info-alist "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based (coding-priority utf-8 latin-8) - (nonascii-translation . iso-8859-14) + (nonascii-translation . latin-iso8859-14) (input-method . "welsh") (documentation . "Support for Welsh, using Unicode.")) '("European")) +(set-language-info-alist + "Latin-6" `((coding-system latin-6) + (coding-priority latin-6) + (nonascii-translation . ,(get 'decode-iso-latin-6 'translation-table)) + (input-method . "latin-prefix") + (features code-pages) + (documentation . "Support for Latin-6.")) + '("European")) + (set-language-info-alist "Latin-7" `((coding-system latin-7) (coding-priority latin-7) - (nonascii-translation . iso-8859-13) - ;; Fixme: input-method + (nonascii-translation . ,(get 'decode-iso-latin-7 + 'translation-table)) + (input-method . "latin-prefix") (features code-pages) (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian.")) '("European")) @@ -380,6 +418,8 @@ but it selects the Dutch tutorial and input method.")) "Lithuanian" `((coding-system latin-7) (coding-priority latin-7) (input-method . "lithuanian-keyboard") + (nonascii-translation . ,(get 'decode-iso-latin-7 + 'translation-table)) (features code-pages) (documentation . "Support for Lithuanian.")) '("European")) @@ -388,9 +428,47 @@ but it selects the Dutch tutorial and input method.")) "Latvian" `((coding-system latin-7) (coding-priority latin-7) (input-method . "latvian-keyboard") + (nonascii-translation . ,(get 'decode-iso-latin-7 + 'translation-table)) (features code-pages) (documentation . "Support for Latvian.")) '("European")) + +(set-language-info-alist + "Swedish" '((tutorial . "TUTORIAL.sv") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (sample-text . "Goddag Hej") + (documentation . "Support for Swedish")) + '("European")) + +(set-language-info-alist + "Croatian" '((charset . (ascii latin-iso8859-2)) + (coding-system . (iso-8859-2)) + (coding-priority . (iso-8859-2)) + (input-method . "croatian") + (nonascii-translation . latin-iso8859-2) + (unibyte-syntax . "latin-2") + (unibyte-display . iso-8859-2) + (documentation . "Support for Croatian with Latin-2 encoding.")) + '("European")) + +(set-language-info-alist + "Brazilian Portuguese" '((tutorial . "TUTORIAL.pt_BR") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (input-method . "latin-1-prefix") + (sample-text . "Oi") + (documentation . "Support for Brazilian Portuguese.")) + '("European")) ;; Definitions for the Mac Roman character sets and coding system. ;; The Mac Roman encoding uses all 128 code points in the range 128 to @@ -543,7 +621,7 @@ but it selects the Dutch tutorial and input method.")) (setq translation-table (make-translation-table-from-vector encoding-vector)) (define-translation-table 'mac-roman-decoder translation-table) - (define-translation-table 'mac-roman-encoder + (define-translation-table 'mac-roman-encoder (char-table-extra-slot translation-table 0))) (define-ccl-program decode-mac-roman @@ -564,7 +642,12 @@ but it selects the Dutch tutorial and input method.")) `(1 ((loop (read-multibyte-character r0 r1) + (translate-character ucs-mule-to-mule-unicode r0 r1) (translate-character mac-roman-encoder r0 r1) + (if (r0 != ,(charset-id 'ascii)) + (if (r0 != ,(charset-id 'eight-bit-graphic)) + (if (r0 != ,(charset-id 'eight-bit-control)) + (r1 = ??)))) (write-repeat r1)))) "CCL program to encode Mac Roman") @@ -596,7 +679,7 @@ positions (integers or markers) specifying the region." (compose-string string idx (match-end 0)) (setq idx (match-end 0)))) string) - + (defun diacritic-compose-buffer () "Compose diacritic characters in the current buffer." (interactive) @@ -612,13 +695,12 @@ The text matches the regular expression PATTERN. Optional 4th argument STRING, if non-nil, is a string containing text to compose. -The return value is number of composed characters." - (if (< (1+ from) to) - (prog1 (- to from) - (if string - (compose-string string from to) - (compose-region from to)) - (- to from)))) +The return value is the number of composed characters." + (when (< (1+ from) to) + (if string + (compose-string string from to) + (compose-region from to)) + (- to from))) ;; Register a function to compose Unicode diacrtics and marks. (let ((patterns '(("\\C^\\c^+" . diacritic-composition-function)))) @@ -633,4 +715,5 @@ The return value is number of composed characters." (provide 'european) +;;; arch-tag: 9e018b12-fb02-4120-907b-9adeaf84b5c2 ;;; european.el ends here