X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/cd53f29e7debaaace0165cc2bc9cdd1bf29b311c..d5dc920668a85c56c4d3e54a6898bbd43bcb64a1:/lisp/language/european.el diff --git a/lisp/language/european.el b/lisp/language/european.el index 7ccb9fcf44..5063ded3c6 100644 --- a/lisp/language/european.el +++ b/lisp/language/european.el @@ -1,7 +1,10 @@ -;;; european.el --- European languages -*- coding: iso-2022-7bit; -*- +;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*- -;; Copyright (C) 1995, 1997, 2001 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004 +;; Free Software Foundation, Inc. +;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H14PRO021 ;; Keywords: multilingual, European @@ -19,28 +22,18 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: -;; For Europeans, character sets ISO8859-1,2,3,4,9,14,15 are supported. +;; For European scripts, all the ISO Latin character sets are +;; supported, along with various others. ;;; Code: ;; Latin-1 (ISO-8859-1) -(make-coding-system - 'iso-latin-1 2 ?1 - "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)" - '(ascii latin-iso8859-1 nil nil - nil nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii latin-iso8859-1) - (mime-charset . iso-8859-1))) - -(define-coding-system-alias 'iso-8859-1 'iso-latin-1) -(define-coding-system-alias 'latin-1 'iso-latin-1) - (set-language-info-alist "Latin-1" '((charset ascii latin-iso8859-1) (coding-system iso-latin-1) @@ -62,20 +55,36 @@ character set which supports the following European languages: We also have specific language environments for the following languages: For Dutch, \"Dutch\". For German, \"German\". + For French, \"French\". + For Italian, \"Italian\". + For Slovenian, \"Slovenian\". For Spanish, \"Spanish\". Latin-1 also covers several written languages outside Europe, including Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans.")) '("European")) +(eval-and-compile + (setq + non-iso-charset-alist + (cp-make-coding-system + windows-1252 + [?\$,1tL(B nil ?\$,1rz(B ?\$,1!R(B ?\$,1r~(B ?\$,1s&(B ?\$,1s (B ?\$,1s!(B ?\$,1$f(B ?\$,1s0(B ?\$,1! (B ?\$,1s9(B ?\$,1 r(B nil ?\$,1!=(B nil nil + ?\$,1rx(B ?\$,1ry(B ?\$,1r|(B ?\$,1r}(B ?\$,1s"(B ?\$,1rs(B ?\$,1rt(B ?\$,1$|(B ?\$,1ub(B ?\$,1!!(B ?\$,1s:(B ?\$,1 s(B nil ?\$,1!>(B ?\$,1!8(B ?\,A (B ?\,A!(B + ?\,A"(B ?\,A#(B ?\,A$(B ?\,A%(B ?\,A&(B ?\,A'(B ?\,A((B ?\,A)(B ?\,A*(B ?\,A+(B ?\,A,(B ?\,A-(B ?\,A.(B ?\,A/(B ?\,A0(B ?\,A1(B ?\,A2(B + ?\,A3(B ?\,A4(B ?\,A5(B ?\,A6(B ?\,A7(B ?\,A8(B ?\,A9(B ?\,A:(B ?\,A;(B ?\,A<(B ?\,A=(B ?\,A>(B ?\,A?(B ?\,A@(B ?\,AA(B ?\,AB(B ?\,AC(B + ?\,AD(B ?\,AE(B ?\,AF(B ?\,AG(B ?\,AH(B ?\,AI(B ?\,AJ(B ?\,AK(B ?\,AL(B ?\,AM(B ?\,AN(B ?\,AO(B ?\,AP(B ?\,AQ(B ?\,AR(B ?\,AS(B ?\,AT(B + ?\,AU(B ?\,AV(B ?\,AW(B ?\,AX(B ?\,AY(B ?\,AZ(B ?\,A[(B ?\,A\(B ?\,A](B ?\,A^(B ?\,A_(B ?\,A`(B ?\,Aa(B ?\,Ab(B ?\,Ac(B ?\,Ad(B ?\,Ae(B + ?\,Af(B ?\,Ag(B ?\,Ah(B ?\,Ai(B ?\,Aj(B ?\,Ak(B ?\,Al(B ?\,Am(B ?\,An(B ?\,Ao(B ?\,Ap(B ?\,Aq(B ?\,Ar(B ?\,As(B ?\,At(B ?\,Au(B ?\,Av(B + ?\,Aw(B ?\,Ax(B ?\,Ay(B ?\,Az(B ?\,A{(B ?\,A|(B ?\,A}(B ?\,A~(B ?\,A(B]))) ;; Latin-2 (ISO-8859-2) (make-coding-system 'iso-latin-2 2 ?2 - "ISO 2022 based 8-bit encoding (MIME:ISO-8859-2)" + "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)." '(ascii latin-iso8859-2 nil nil - nil nil nil nil nil nil nil) + nil nil nil nil nil nil nil nil nil nil nil t) '((safe-charsets ascii latin-iso8859-2) (mime-charset . iso-8859-2))) @@ -98,6 +107,7 @@ character set which supports the following languages: and Swedish. We also have specific language environments for the following languages: For Czech, \"Czech\". + For Croatian, \"Croatian\". For Romanian, \"Romanian\". For Slovak, \"Slovak\".")) '("European")) @@ -107,9 +117,9 @@ We also have specific language environments for the following languages: (make-coding-system 'iso-latin-3 2 ?3 - "ISO 2022 based 8-bit encoding (MIME:ISO-8859-3)" + "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)." '(ascii latin-iso8859-3 nil nil - nil nil nil nil nil nil nil) + nil nil nil nil nil nil nil nil nil nil nil t) '((safe-charsets ascii latin-iso8859-3) (mime-charset . iso-8859-3))) @@ -135,9 +145,9 @@ These languages are supported with the Latin-3 (ISO-8859-3) character set: (make-coding-system 'iso-latin-4 2 ?4 - "ISO 2022 based 8-bit encoding (MIME:ISO-8859-4)" + "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)." '(ascii latin-iso8859-4 nil nil - nil nil nil nil nil nil nil) + nil nil nil nil nil nil nil nil nil nil nil t) '((safe-charsets ascii latin-iso8859-4) (mime-charset . iso-8859-4))) @@ -151,7 +161,7 @@ These languages are supported with the Latin-3 (ISO-8859-3) character set: (nonascii-translation . latin-iso8859-4) (unibyte-syntax . "latin-4") (unibyte-display . iso-8859-4) - (input-method . "latin-4-prefix") + (input-method . "latin-4-postfix") (documentation . "\ These languages are supported with the Latin-4 (ISO-8859-4) character set: Danish, English, Estonian, Finnish, German, Greenlandic, Lappish, @@ -163,9 +173,9 @@ These languages are supported with the Latin-4 (ISO-8859-4) character set: (make-coding-system 'iso-latin-5 2 ?9 - "ISO 2022 based 8-bit encoding (MIME:ISO-8859-9)" + "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)." '(ascii latin-iso8859-9 nil nil - nil nil nil nil nil nil nil) + nil nil nil nil nil nil nil nil nil nil nil t) '((safe-charsets ascii latin-iso8859-9) (mime-charset . iso-8859-9))) @@ -179,11 +189,8 @@ These languages are supported with the Latin-4 (ISO-8859-4) character set: (nonascii-translation . latin-iso8859-9) (unibyte-syntax . "latin-5") (unibyte-display . iso-latin-5) - (input-method . "latin-5-prefix") - (documentation . "\ -These languages are supported with the Latin-5 (ISO-8859-9) character set: -Bulgarian, Byelorussian, (Slavic) Macedonian, Russian, Serbian and -Ukranian.")) ; says ISO 8859-1 + (input-method . "latin-5-postfix") + (documentation . "Support for Turkish language.")) '("European")) @@ -192,9 +199,9 @@ Ukranian.")) ; says ISO 8859-1 (make-coding-system 'iso-latin-8 2 ?W ; `W' for `Welsh', since `C' ; for `Celtic' is taken. - "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)" + "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)." '(ascii latin-iso8859-14 nil nil - nil nil nil nil nil nil nil nil nil nil nil nil t) + nil nil nil nil nil nil nil nil nil nil nil t t) '((safe-charsets ascii latin-iso8859-14) (mime-charset . iso-8859-14))) @@ -214,17 +221,17 @@ Ukranian.")) ; says ISO 8859-1 (documentation . "\ This language environment is a generic one for the Latin-8 (ISO-8859-14) character set which supports the Celtic languages, including those not -covered by other ISO-8859 character sets: Welsh, Manx Gaelic and -Irish Gaelic (old orthography).")) +covered by other ISO-8859 character sets: + Welsh, Manx Gaelic and Irish Gaelic (old orthography).")) '("European")) ;; Latin-9 (ISO-8859-15) (make-coding-system 'iso-latin-9 2 ?0 ; `0' for `Latin-0' - "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)" + "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)." '(ascii latin-iso8859-15 nil nil - nil nil nil nil nil nil nil nil nil nil nil nil t) + nil nil nil nil nil nil nil nil nil nil nil t t) '((safe-charsets ascii latin-iso8859-15) (mime-charset . iso-8859-15))) @@ -249,13 +256,28 @@ addition of the Euro sign and some additional French and Finnish letters. Latin-9 is sometimes nicknamed `Latin-0'.")) '("European")) +(set-language-info-alist + "Dutch" '((tutorial . "TUTORIAL.nl") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (input-method . "dutch") + (sample-text . "Er is een aantal manieren waarop je dit kan doen") + (documentation . "\ +This language environment is almost the same as Latin-1, +but it selects the Dutch tutorial and input method.")) + '("European")) + (set-language-info-alist "German" '((tutorial . "TUTORIAL.de") (charset ascii latin-iso8859-1) - (coding-system iso-latin-1) + (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) (input-method . "german-postfix") - (nonascii-translation . iso-latin-1) + (nonascii-translation . latin-iso8859-1) (unibyte-syntax . "latin-1") (unibyte-display . iso-latin-1) (sample-text . "\ @@ -263,7 +285,39 @@ German (Deutsch Nord) Guten Tag German (Deutsch S,A|(Bd) Gr,A|_(B Gott") (documentation . "\ This language environment is almost the same as Latin-1, -but default input method is set to \"german-postfix\".")) +but sets the default input method to \"german-postfix\". +Additionally, it selects the German tutorial.")) + '("European")) + +(set-language-info-alist + "French" '((tutorial . "TUTORIAL.fr") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (input-method . "latin-1-prefix") + (sample-text . "French (Fran,Ag(Bais) Bonjour, Salut") + (documentation . "\ +This language environment is almost the same as Latin-1, +but it selects the French tutorial and input method.")) + '("European")) + +(set-language-info-alist + "Italian" '((tutorial . "TUTORIAL.it") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (input-method . "italian-postfix") + (sample-text . "Salve, ciao!") + (documentation . "\ +This language environment is almost the same as Latin-1, +but sets the default input method to \"italian-postfix\". +Additionally, it selects the Italian tutorial.")) '("European")) (set-language-info-alist @@ -271,42 +325,30 @@ but default input method is set to \"german-postfix\".")) (coding-system . (iso-8859-2)) (coding-priority . (iso-8859-2)) (nonascii-translation . latin-iso8859-2) - (input-method . "latin-2-postfix") + (input-method . "slovenian") (unibyte-syntax . "latin-2") (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.sl") (sample-text . ",B.(Belimo vam uspe,B9(Ben dan!") - (documentation . t)) + (documentation . "\ +This language environment is almost the same as Latin-2, +but it selects the Slovenian tutorial and input method.")) '("European")) (set-language-info-alist "Spanish" '((tutorial . "TUTORIAL.es") (charset ascii latin-iso8859-1) - (coding-system iso-latin-1) + (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) (input-method . "spanish-postfix") - (nonascii-translation . iso-latin-1) + (nonascii-translation . latin-iso8859-1) (unibyte-syntax . "latin-1") (unibyte-display . iso-latin-1) (sample-text . "Spanish (Espa,Aq(Bol) ,A!(BHola!") (documentation . "\ This language environment is almost the same as Latin-1, -but default input method is set to \"spanish-postfix\", -and select's the Spanish tutorial.")) - '("European")) - -(set-language-info-alist - "Dutch" '((tutorial . "TUTORIAL.nl") - (charset ascii latin-iso8859-1) - (coding-system iso-latin-1) - (coding-priority iso-latin-1) - (nonascii-translation . iso-latin-1) - (unibyte-syntax . "latin-1") - (unibyte-display . iso-latin-1) - (sample-text . "Er is een aantal manieren waarop je dit kan doen") - (documentation . "\ -This language environment is almost the same as Latin-1, -but select's the Dutch tutorial.")) +but it sets the default input method to \"spanish-postfix\", +and it selects the Spanish tutorial.")) '("European")) ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But, @@ -316,7 +358,7 @@ but select's the Dutch tutorial.")) (set-language-info-alist "Turkish" '((charset ascii latin-iso8859-9) - (coding-system iso-latin-5) + (coding-system iso-latin-5 iso-latin-3) (coding-priority iso-latin-5) (nonascii-translation . latin-iso8859-9) (unibyte-syntax . "latin-5") @@ -338,10 +380,92 @@ but select's the Dutch tutorial.")) (unibyte-syntax . "latin-2") (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.pl") - (sample-text . ",B1!fFjJ3#qQsS6&?/<,(B") + (sample-text . "P,Bs(Bjd,B<(B, ki,Bq(B-,B?(Be t,Bj(B chmurno,B6f(B w g,B31(Bb flaszy") (documentation . t)) '("European")) +(set-language-info-alist + "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based + (coding-priority utf-8 latin-8) + (nonascii-translation . latin-iso8859-14) + (input-method . "welsh") + (documentation . "Support for Welsh, using Unicode.")) + '("European")) + +(set-language-info-alist + "Latin-6" `((coding-system latin-6) + (coding-priority latin-6) + (nonascii-translation . ,(get 'decode-iso-latin-6 'translation-table)) + (input-method . "latin-prefix") + (features code-pages) + (documentation . "Support for Latin-6.")) + '("European")) + +(set-language-info-alist + "Latin-7" `((coding-system latin-7) + (coding-priority latin-7) + (nonascii-translation . ,(get 'decode-iso-latin-7 + 'translation-table)) + (input-method . "latin-prefix") + (features code-pages) + (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian.")) + '("European")) + +(set-language-info-alist + "Lithuanian" `((coding-system latin-7) + (coding-priority latin-7) + (input-method . "lithuanian-keyboard") + (nonascii-translation . ,(get 'decode-iso-latin-7 + 'translation-table)) + (features code-pages) + (documentation . "Support for Lithuanian.")) + '("European")) + +(set-language-info-alist + "Latvian" `((coding-system latin-7) + (coding-priority latin-7) + (input-method . "latvian-keyboard") + (nonascii-translation . ,(get 'decode-iso-latin-7 + 'translation-table)) + (features code-pages) + (documentation . "Support for Latvian.")) + '("European")) + +(set-language-info-alist + "Swedish" '((tutorial . "TUTORIAL.sv") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (sample-text . "Goddag Hej") + (documentation . "Support for Swedish")) + '("European")) + +(set-language-info-alist + "Croatian" '((charset . (ascii latin-iso8859-2)) + (coding-system . (iso-8859-2)) + (coding-priority . (iso-8859-2)) + (input-method . "croatian") + (nonascii-translation . latin-iso8859-2) + (unibyte-syntax . "latin-2") + (unibyte-display . iso-8859-2) + (documentation . "Support for Croatian with Latin-2 encoding.")) + '("European")) + +(set-language-info-alist + "Brazilian Portuguese" '((tutorial . "TUTORIAL.pt_BR") + (charset ascii latin-iso8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) + (nonascii-translation . latin-iso8859-1) + (unibyte-syntax . "latin-1") + (unibyte-display . iso-latin-1) + (input-method . "latin-1-prefix") + (sample-text . "Oi") + (documentation . "Support for Brazilian Portuguese.")) + '("European")) ;; Definitions for the Mac Roman character sets and coding system. ;; The Mac Roman encoding uses all 128 code points in the range 128 to @@ -494,7 +618,7 @@ but select's the Dutch tutorial.")) (setq translation-table (make-translation-table-from-vector encoding-vector)) (define-translation-table 'mac-roman-decoder translation-table) - (define-translation-table 'mac-roman-encoder + (define-translation-table 'mac-roman-encoder (char-table-extra-slot translation-table 0))) (define-ccl-program decode-mac-roman @@ -515,16 +639,78 @@ but select's the Dutch tutorial.")) `(1 ((loop (read-multibyte-character r0 r1) + (translate-character ucs-mule-to-mule-unicode r0 r1) (translate-character mac-roman-encoder r0 r1) + (if (r0 != ,(charset-id 'ascii)) + (if (r0 != ,(charset-id 'eight-bit-graphic)) + (if (r0 != ,(charset-id 'eight-bit-control)) + (r1 = ??)))) (write-repeat r1)))) "CCL program to encode Mac Roman") (make-coding-system - 'mac-roman 4 ?M "Mac Roman Encoding" + 'mac-roman 4 ?M + "Mac Roman Encoding (MIME:MACINTOSH)." '(decode-mac-roman . encode-mac-roman) - '((safe-chars . mac-roman-encoder) - (valid-codes (0 . 255)))) + (list (cons 'safe-chars (get 'mac-roman-encoder 'translation-table)) + '(valid-codes (0 . 255)) + '(mime-charset . macintosh))) ; per IANA, rfc1345 + +(defconst diacritic-composition-pattern "\\C^\\c^+") + +(defun diacritic-compose-region (beg end) + "Compose diacritic characters in the region. +When called from a program, expects two arguments, +positions (integers or markers) specifying the region." + (interactive "r") + (save-restriction + (narrow-to-region beg end) + (goto-char (point-min)) + (while (re-search-forward diacritic-composition-pattern nil t) + (compose-region (match-beginning 0) (match-end 0))))) + +(defun diacritic-compose-string (string) + "Compose diacritic characters in STRING and return the resulting string." + (let ((idx 0)) + (while (setq idx (string-match diacritic-composition-pattern string idx)) + (compose-string string idx (match-end 0)) + (setq idx (match-end 0)))) + string) + +(defun diacritic-compose-buffer () + "Compose diacritic characters in the current buffer." + (interactive) + (diacritic-compose-region (point-min) (point-max))) + +(defun diacritic-post-read-conversion (len) + (diacritic-compose-region (point) (+ (point) len)) + len) + +(defun diacritic-composition-function (from to pattern &optional string) + "Compose diacritic text in the region FROM and TO. +The text matches the regular expression PATTERN. +Optional 4th argument STRING, if non-nil, is a string containing text +to compose. + +The return value is the number of composed characters." + (when (< (1+ from) to) + (if string + (compose-string string from to) + (compose-region from to)) + (- to from))) + +;; Register a function to compose Unicode diacrtics and marks. +(let ((patterns '(("\\C^\\c^+" . diacritic-composition-function)))) + (let ((c #x300)) + (while (<= c #x362) + (aset composition-function-table (decode-char 'ucs c) patterns) + (setq c (1+ c))) + (setq c #x20d0) + (while (<= c #x20e3) + (aset composition-function-table (decode-char 'ucs c) patterns) + (setq c (1+ c))))) (provide 'european) +;;; arch-tag: 9e018b12-fb02-4120-907b-9adeaf84b5c2 ;;; european.el ends here