-;; Definitions for the Mac Roman character sets and coding system.
-;; The Mac Roman encoding uses all 128 code points in the range 128 to
-;; 255 for actual characters. Emacs decodes them to one of the
-;; following character sets.
-;; ascii, latin-iso8859-1, mule-unicode-0100-24ff,
-;; mule-unicode-2500-33ff, mule-unicode-e000-ffff
-
-(let
- ((encoding-vector (make-vector 256 nil))
- (i 0)
- (vec ;; mac-roman (128..255) -> UCS mapping
- [ #x00C4 ;; 128:LATIN CAPITAL LETTER A WITH DIAERESIS
- #x00C5 ;; 129:LATIN CAPITAL LETTER A WITH RING ABOVE
- #x00C7 ;; 130:LATIN CAPITAL LETTER C WITH CEDILLA
- #x00C9 ;; 131:LATIN CAPITAL LETTER E WITH ACUTE
- #x00D1 ;; 132:LATIN CAPITAL LETTER N WITH TILDE
- #x00D6 ;; 133:LATIN CAPITAL LETTER O WITH DIAERESIS
- #x00DC ;; 134:LATIN CAPITAL LETTER U WITH DIAERESIS
- #x00E1 ;; 135:LATIN SMALL LETTER A WITH ACUTE
- #x00E0 ;; 136:LATIN SMALL LETTER A WITH GRAVE
- #x00E2 ;; 137:LATIN SMALL LETTER A WITH CIRCUMFLEX
- #x00E4 ;; 138:LATIN SMALL LETTER A WITH DIAERESIS
- #x00E3 ;; 139:LATIN SMALL LETTER A WITH TILDE
- #x00E5 ;; 140:LATIN SMALL LETTER A WITH RING ABOVE
- #x00E7 ;; 141:LATIN SMALL LETTER C WITH CEDILLA
- #x00E9 ;; 142:LATIN SMALL LETTER E WITH ACUTE
- #x00E8 ;; 143:LATIN SMALL LETTER E WITH GRAVE
- #x00EA ;; 144:LATIN SMALL LETTER E WITH CIRCUMFLEX
- #x00EB ;; 145:LATIN SMALL LETTER E WITH DIAERESIS
- #x00ED ;; 146:LATIN SMALL LETTER I WITH ACUTE
- #x00EC ;; 147:LATIN SMALL LETTER I WITH GRAVE
- #x00EE ;; 148:LATIN SMALL LETTER I WITH CIRCUMFLEX
- #x00EF ;; 149:LATIN SMALL LETTER I WITH DIAERESIS
- #x00F1 ;; 150:LATIN SMALL LETTER N WITH TILDE
- #x00F3 ;; 151:LATIN SMALL LETTER O WITH ACUTE
- #x00F2 ;; 152:LATIN SMALL LETTER O WITH GRAVE
- #x00F4 ;; 153:LATIN SMALL LETTER O WITH CIRCUMFLEX
- #x00F6 ;; 154:LATIN SMALL LETTER O WITH DIAERESIS
- #x00F5 ;; 155:LATIN SMALL LETTER O WITH TILDE
- #x00FA ;; 156:LATIN SMALL LETTER U WITH ACUTE
- #x00F9 ;; 157:LATIN SMALL LETTER U WITH GRAVE
- #x00FB ;; 158:LATIN SMALL LETTER U WITH CIRCUMFLEX
- #x00FC ;; 159:LATIN SMALL LETTER U WITH DIAERESIS
- #x2020 ;; 160:DAGGER
- #x00B0 ;; 161:DEGREE SIGN
- #x00A2 ;; 162:CENT SIGN
- #x00A3 ;; 163:POUND SIGN
- #x00A7 ;; 164:SECTION SIGN
- #x2022 ;; 165:BULLET
- #x00B6 ;; 166:PILCROW SIGN
- #x00DF ;; 167:LATIN SMALL LETTER SHARP S
- #x00AE ;; 168:REGISTERED SIGN
- #x00A9 ;; 169:COPYRIGHT SIGN
- #x2122 ;; 170:TRADE MARK SIGN
- #x00B4 ;; 171:ACUTE ACCENT
- #x00A8 ;; 172:DIAERESIS
- #x2260 ;; 173:NOT EQUAL TO
- #x00C6 ;; 174:LATIN CAPITAL LETTER AE
- #x00D8 ;; 175:LATIN CAPITAL LETTER O WITH STROKE
- #x221E ;; 176:INFINITY
- #x00B1 ;; 177:PLUS-MINUS SIGN
- #x2264 ;; 178:LESS-THAN OR EQUAL TO
- #x2265 ;; 179:GREATER-THAN OR EQUAL TO
- #x00A5 ;; 180:YEN SIGN
- #x00B5 ;; 181:MICRO SIGN
- #x2202 ;; 182:PARTIAL DIFFERENTIAL
- #x2211 ;; 183:N-ARY SUMMATION
- #x220F ;; 184:N-ARY PRODUCT
- #x03C0 ;; 185:GREEK SMALL LETTER PI
- #x222B ;; 186:INTEGRAL
- #x00AA ;; 187:FEMININE ORDINAL INDICATOR
- #x00BA ;; 188:MASCULINE ORDINAL INDICATOR
- #x03A9 ;; 189:GREEK CAPITAL LETTER OMEGA
- #x00E6 ;; 190:LATIN SMALL LETTER AE
- #x00F8 ;; 191:LATIN SMALL LETTER O WITH STROKE
- #x00BF ;; 192:INVERTED QUESTION MARK
- #x00A1 ;; 193:INVERTED EXCLAMATION MARK
- #x00AC ;; 194:NOT SIGN
- #x221A ;; 195:SQUARE ROOT
- #x0192 ;; 196:LATIN SMALL LETTER F WITH HOOK
- #x2248 ;; 197:ALMOST EQUAL TO
- #x2206 ;; 198:INCREMENT
- #x00AB ;; 199:LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
- #x00BB ;; 200:RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
- #x2026 ;; 201:HORIZONTAL ELLIPSIS
- #x00A0 ;; 202:NO-BREAK SPACE
- #x00C0 ;; 203:LATIN CAPITAL LETTER A WITH GRAVE
- #x00C3 ;; 204:LATIN CAPITAL LETTER A WITH TILDE
- #x00D5 ;; 205:LATIN CAPITAL LETTER O WITH TILDE
- #x0152 ;; 206:LATIN CAPITAL LIGATURE OE
- #x0153 ;; 207:LATIN SMALL LIGATURE OE
- #x2013 ;; 208:EN DASH
- #x2014 ;; 209:EM DASH
- #x201C ;; 210:LEFT DOUBLE QUOTATION MARK
- #x201D ;; 211:RIGHT DOUBLE QUOTATION MARK
- #x2018 ;; 212:LEFT SINGLE QUOTATION MARK
- #x2019 ;; 213:RIGHT SINGLE QUOTATION MARK
- #x00F7 ;; 214:DIVISION SIGN
- #x25CA ;; 215:LOZENGE
- #x00FF ;; 216:LATIN SMALL LETTER Y WITH DIAERESIS
- #x0178 ;; 217:LATIN CAPITAL LETTER Y WITH DIAERESIS
- #x2044 ;; 218:FRACTION SLASH
- #x20AC ;; 219:EURO SIGN
- #x2039 ;; 220:SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- #x203A ;; 221:SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
- #xFB01 ;; 222:LATIN SMALL LIGATURE FI
- #xFB02 ;; 223:LATIN SMALL LIGATURE FL
- #x2021 ;; 224:DOUBLE DAGGER
- #x00B7 ;; 225:MIDDLE DOT
- #x201A ;; 226:SINGLE LOW-9 QUOTATION MARK
- #x201E ;; 227:DOUBLE LOW-9 QUOTATION MARK
- #x2030 ;; 228:PER MILLE SIGN
- #x00C2 ;; 229:LATIN CAPITAL LETTER A WITH CIRCUMFLEX
- #x00CA ;; 230:LATIN CAPITAL LETTER E WITH CIRCUMFLEX
- #x00C1 ;; 231:LATIN CAPITAL LETTER A WITH ACUTE
- #x00CB ;; 232:LATIN CAPITAL LETTER E WITH DIAERESIS
- #x00C8 ;; 233:LATIN CAPITAL LETTER E WITH GRAVE
- #x00CD ;; 234:LATIN CAPITAL LETTER I WITH ACUTE
- #x00CE ;; 235:LATIN CAPITAL LETTER I WITH CIRCUMFLEX
- #x00CF ;; 236:LATIN CAPITAL LETTER I WITH DIAERESIS
- #x00CC ;; 237:LATIN CAPITAL LETTER I WITH GRAVE
- #x00D3 ;; 238:LATIN CAPITAL LETTER O WITH ACUTE
- #x00D4 ;; 239:LATIN CAPITAL LETTER O WITH CIRCUMFLEX
- #xF8FF ;; 240:Apple logo
- #x00D2 ;; 241:LATIN CAPITAL LETTER O WITH GRAVE
- #x00DA ;; 242:LATIN CAPITAL LETTER U WITH ACUTE
- #x00DB ;; 243:LATIN CAPITAL LETTER U WITH CIRCUMFLEX
- #x00D9 ;; 244:LATIN CAPITAL LETTER U WITH GRAVE
- #x0131 ;; 245:LATIN SMALL LETTER DOTLESS I
- #x02C6 ;; 246:MODIFIER LETTER CIRCUMFLEX ACCENT
- #x02DC ;; 247:SMALL TILDE
- #x00AF ;; 248:MACRON
- #x02D8 ;; 249:BREVE
- #x02D9 ;; 250:DOT ABOVE
- #x02DA ;; 251:RING ABOVE
- #x00B8 ;; 252:CEDILLA
- #x02DD ;; 253:DOUBLE ACUTE ACCENT
- #x02DB ;; 254:OGONEK
- #x02C7 ;; 255:CARON
- ])
- translation-table)
- (while (< i 128)
- (aset encoding-vector i i)
- (setq i (1+ i)))
- (while (< i 256)
- (aset encoding-vector i
- (decode-char 'ucs (aref vec (- i 128))))
- (setq i (1+ i)))
- (setq translation-table
- (make-translation-table-from-vector encoding-vector))
- (define-translation-table 'mac-roman-decoder translation-table)
- (define-translation-table 'mac-roman-encoder
- (char-table-extra-slot translation-table 0)))
-
-(define-ccl-program decode-mac-roman
- `(4
- ((loop
- (read r1)
- (if (r1 < 128) ;; ASCII
- (r0 = ,(charset-id 'ascii))
- (if (r1 < 160)
- (r0 = ,(charset-id 'eight-bit-control))
- (r0 = ,(charset-id 'eight-bit-graphic))))
- (translate-character mac-roman-decoder r0 r1)
- (write-multibyte-character r0 r1)
- (repeat))))
- "CCL program to decode Mac Roman")
-
-(define-ccl-program encode-mac-roman
- `(1
- ((loop
- (read-multibyte-character r0 r1)
- (translate-character ucs-mule-to-mule-unicode r0 r1)
- (translate-character mac-roman-encoder r0 r1)
- (if (r0 != ,(charset-id 'ascii))
- (if (r0 != ,(charset-id 'eight-bit-graphic))
- (if (r0 != ,(charset-id 'eight-bit-control))
- (r1 = ??))))
- (write-repeat r1))))
- "CCL program to encode Mac Roman")
-
-(make-coding-system
- 'mac-roman 4 ?M
- "Mac Roman Encoding (MIME:MACINTOSH)."
- '(decode-mac-roman . encode-mac-roman)
- (list (cons 'safe-chars (get 'mac-roman-encoder 'translation-table))
- '(valid-codes (0 . 255))
- '(mime-charset . macintosh))) ; per IANA, rfc1345
-
-(defconst diacritic-composition-pattern "\\C^\\c^+")
-
-(defun diacritic-compose-region (beg end)
- "Compose diacritic characters in the region.
-When called from a program, expects two arguments,
-positions (integers or markers) specifying the region."
- (interactive "r")
- (save-restriction
- (narrow-to-region beg end)
- (goto-char (point-min))
- (while (re-search-forward diacritic-composition-pattern nil t)
- (compose-region (match-beginning 0) (match-end 0)))))
-
-(defun diacritic-compose-string (string)
- "Compose diacritic characters in STRING and return the resulting string."
- (let ((idx 0))
- (while (setq idx (string-match diacritic-composition-pattern string idx))
- (compose-string string idx (match-end 0))
- (setq idx (match-end 0))))
- string)
-
-(defun diacritic-compose-buffer ()
- "Compose diacritic characters in the current buffer."
- (interactive)
- (diacritic-compose-region (point-min) (point-max)))
-
-(defun diacritic-post-read-conversion (len)
- (diacritic-compose-region (point) (+ (point) len))
- len)
-
-(defun diacritic-composition-function (from to pattern &optional string)
- "Compose diacritic text in the region FROM and TO.
-The text matches the regular expression PATTERN.
-Optional 4th argument STRING, if non-nil, is a string containing text
-to compose.
-
-The return value is the number of composed characters."
- (when (< (1+ from) to)
- (if string
- (compose-string string from to)
- (compose-region from to))
- (- to from)))
-
-;; Register a function to compose Unicode diacrtics and marks.
-(let ((patterns '(("\\C^\\c^+" . diacritic-composition-function))))
- (let ((c #x300))
- (while (<= c #x362)
- (aset composition-function-table (decode-char 'ucs c) patterns)
- (setq c (1+ c)))
- (setq c #x20d0)
- (while (<= c #x20e3)
- (aset composition-function-table (decode-char 'ucs c) patterns)
- (setq c (1+ c)))))