]> code.delx.au - gnu-emacs/blob - lisp/progmodes/ebnf-iso.el
*** empty log message ***
[gnu-emacs] / lisp / progmodes / ebnf-iso.el
1 ;;; ebnf-iso --- Parser for ISO EBNF
2
3 ;; Copyright (C) 1999 Vinicius Jose Latorre
4
5 ;; Author: Vinicius Jose Latorre <vinicius@cpqd.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br>
7 ;; Keywords: wp, ebnf, PostScript
8 ;; Time-stamp: <99/11/20 18:04:11 vinicius>
9 ;; Version: 1.4
10
11 ;; This file is *NOT* (yet?) part of GNU Emacs.
12
13 ;; This program is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; any later version.
17
18 ;; This program is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
27
28 ;;; Commentary:
29
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;;
32 ;;
33 ;; This is part of ebnf2ps package.
34 ;;
35 ;; This package defines a parser for ISO EBNF.
36 ;;
37 ;; See ebnf2ps.el for documentation.
38 ;;
39 ;;
40 ;; ISO EBNF Syntax
41 ;; ---------------
42 ;;
43 ;; See the URL:
44 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
45 ;; ("International Standard of the ISO EBNF Notation").
46 ;;
47 ;;
48 ;; ISO EBNF = syntax rule, {syntax rule};
49 ;;
50 ;; syntax rule = meta identifier, '=', definition list, ';';
51 ;;
52 ;; definition list = single definition, {'|', single definition};
53 ;;
54 ;; single definition = term, {',', term};
55 ;;
56 ;; term = factor, ['-', exception];
57 ;;
58 ;; exception = factor (* without <meta identifier> *);
59 ;;
60 ;; factor = [integer, '*'], primary;
61 ;;
62 ;; primary = optional sequence | repeated sequence | special sequence
63 ;; | grouped sequence | meta identifier | terminal string
64 ;; | empty;
65 ;;
66 ;; empty = ;
67 ;;
68 ;; optional sequence = '[', definition list, ']';
69 ;;
70 ;; repeated sequence = '{', definition list, '}';
71 ;;
72 ;; grouped sequence = '(', definition list, ')';
73 ;;
74 ;; terminal string = "'", character - "'", {character - "'"}, "'"
75 ;; | '"', character - '"', {character - '"'}, '"';
76 ;;
77 ;; special sequence = '?', {character - '?'}, '?';
78 ;;
79 ;; meta identifier = letter, { letter | decimal digit | ' ' };
80 ;;
81 ;; integer = decimal digit, {decimal digit};
82 ;;
83 ;; comment = '(*', {comment symbol}, '*)';
84 ;;
85 ;; comment symbol = comment (* <== NESTED COMMENT *)
86 ;; | terminal string | special sequence | character;
87 ;;
88 ;; letter = ? A-Z a-z ?;
89 ;;
90 ;; decimal digit = ? 0-9 ?;
91 ;;
92 ;; character = letter | decimal digit
93 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
94 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
95 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
96 ;;
97 ;;
98 ;; There is also the following alternative representation:
99 ;;
100 ;; STANDARD ALTERNATIVE
101 ;; | ==> / or !
102 ;; [ ==> (/
103 ;; ] ==> /)
104 ;; { ==> (:
105 ;; } ==> :)
106 ;; ; ==> .
107 ;;
108 ;;
109 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
110 ;; -------------------------------------------------
111 ;;
112 ;; ISO EBNF accepts the characters given by <character> production above,
113 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
114 ;; (^L), any other characters are illegal. But ebnf2ps accepts also the
115 ;; european 8-bit accentuated characters (from \240 to \377).
116 ;;
117 ;;
118 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
119
120 ;;; code:
121
122
123 (require 'ebnf-otz)
124
125
126 (defvar ebnf-iso-lex nil
127 "Value returned by `ebnf-iso-lex' function.")
128
129
130 (defconst ebnf-no-meta-identifier nil)
131
132 \f
133 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
134 ;; Syntatic analyzer
135
136
137 ;;; ISO EBNF = syntax rule, {syntax rule};
138
139 (defun ebnf-iso-parser (start)
140 "ISO EBNF parser."
141 (let ((total (+ (- ebnf-limit start) 1))
142 (bias (1- start))
143 (origin (point))
144 syntax-list token rule)
145 (goto-char start)
146 (setq token (ebnf-iso-lex))
147 (and (eq token 'end-of-input)
148 (error "Invalid ISO EBNF file format."))
149 (while (not (eq token 'end-of-input))
150 (ebnf-message-float
151 "Parsing...%s%%"
152 (/ (* (- (point) bias) 100.0) total))
153 (setq token (ebnf-iso-syntax-rule token)
154 rule (cdr token)
155 token (car token))
156 (or (ebnf-add-empty-rule-list rule)
157 (setq syntax-list (cons rule syntax-list))))
158 (goto-char origin)
159 syntax-list))
160
161
162 ;;; syntax rule = meta identifier, '=', definition list, ';';
163
164 (defun ebnf-iso-syntax-rule (token)
165 (let ((header ebnf-iso-lex)
166 (action ebnf-action)
167 body)
168 (setq ebnf-action nil)
169 (or (eq token 'non-terminal)
170 (error "Invalid meta identifier syntax rule."))
171 (or (eq (ebnf-iso-lex) 'equal)
172 (error "Invalid syntax rule: missing `='."))
173 (setq body (ebnf-iso-definition-list))
174 (or (eq (car body) 'period)
175 (error "Invalid syntax rule: missing `;' or `.'."))
176 (setq body (cdr body))
177 (ebnf-eps-add-production header)
178 (cons (ebnf-iso-lex)
179 (ebnf-make-production header body action))))
180
181
182 ;;; definition list = single definition, {'|', single definition};
183
184 (defun ebnf-iso-definition-list ()
185 (let (body sequence)
186 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
187 'alternative)
188 (setq sequence (cdr sequence)
189 body (cons sequence body)))
190 (ebnf-token-alternative body sequence)))
191
192
193 ;;; single definition = term, {',', term};
194
195 (defun ebnf-iso-single-definition ()
196 (let (token seq term)
197 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
198 token (car term)
199 term (cdr term))
200 (eq token 'catenate))
201 (setq seq (cons term seq)))
202 (cons token
203 (cond
204 ;; null sequence
205 ((null seq)
206 term)
207 ;; sequence with only one element
208 ((and (null term) (= (length seq) 1))
209 (car seq))
210 ;; a real sequence
211 (t
212 (ebnf-make-sequence (nreverse (cons term seq))))
213 ))))
214
215
216 ;;; term = factor, ['-', exception];
217 ;;;
218 ;;; exception = factor (* without <meta identifier> *);
219
220 (defun ebnf-iso-term (token)
221 (let ((factor (ebnf-iso-factor token)))
222 (if (not (eq (car factor) 'except))
223 ;; factor
224 factor
225 ;; factor - exception
226 (let ((ebnf-no-meta-identifier t))
227 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
228
229
230 ;;; factor = [integer, '*'], primary;
231
232 (defun ebnf-iso-factor (token)
233 (if (eq token 'integer)
234 (let ((times ebnf-iso-lex))
235 (or (eq (ebnf-iso-lex) 'repeat)
236 (error "Missing `*'."))
237 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
238 (ebnf-iso-primary token)))
239
240
241 ;;; primary = optional sequence | repeated sequence | special sequence
242 ;;; | grouped sequence | meta identifier | terminal string
243 ;;; | empty;
244 ;;;
245 ;;; empty = ;
246 ;;;
247 ;;; optional sequence = '[', definition list, ']';
248 ;;;
249 ;;; repeated sequence = '{', definition list, '}';
250 ;;;
251 ;;; grouped sequence = '(', definition list, ')';
252 ;;;
253 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
254 ;;; | '"', character - '"', {character - '"'}, '"';
255 ;;;
256 ;;; special sequence = '?', {character - '?'}, '?';
257 ;;;
258 ;;; meta identifier = letter, {letter | decimal digit};
259
260 (defun ebnf-iso-primary (token)
261 (let ((primary
262 (cond
263 ;; terminal string
264 ((eq token 'terminal)
265 (ebnf-make-terminal ebnf-iso-lex))
266 ;; meta identifier
267 ((eq token 'non-terminal)
268 (ebnf-make-non-terminal ebnf-iso-lex))
269 ;; special sequence
270 ((eq token 'special)
271 (ebnf-make-special ebnf-iso-lex))
272 ;; grouped sequence
273 ((eq token 'begin-group)
274 (let ((body (ebnf-iso-definition-list)))
275 (or (eq (car body) 'end-group)
276 (error "Missing `)'."))
277 (cdr body)))
278 ;; optional sequence
279 ((eq token 'begin-optional)
280 (let ((body (ebnf-iso-definition-list)))
281 (or (eq (car body) 'end-optional)
282 (error "Missing `]' or `/)'."))
283 (ebnf-token-optional (cdr body))))
284 ;; repeated sequence
285 ((eq token 'begin-zero-or-more)
286 (let* ((body (ebnf-iso-definition-list))
287 (repeat (cdr body)))
288 (or (eq (car body) 'end-zero-or-more)
289 (error "Missing `}' or `:)'."))
290 (ebnf-make-zero-or-more repeat)))
291 ;; empty
292 (t
293 nil)
294 )))
295 (cons (if primary
296 (ebnf-iso-lex)
297 token)
298 primary)))
299
300 \f
301 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
302 ;; Lexical analyzer
303
304
305 (defconst ebnf-iso-token-table
306 ;; control character & 8-bit character are set to `error'
307 (let ((table (make-vector 256 'error))
308 (char ?\040))
309 ;; printable character
310 (while (< char ?\060)
311 (aset table char 'character)
312 (setq char (1+ char)))
313 ;; digits:
314 (while (< char ?\072)
315 (aset table char 'integer)
316 (setq char (1+ char)))
317 (while (< char ?\101)
318 (aset table char 'character)
319 (setq char (1+ char)))
320 ;; upper case letters:
321 (while (< char ?\133)
322 (aset table char 'non-terminal)
323 (setq char (1+ char)))
324 (while (< char ?\141)
325 (aset table char 'character)
326 (setq char (1+ char)))
327 ;; lower case letters:
328 (while (< char ?\173)
329 (aset table char 'non-terminal)
330 (setq char (1+ char)))
331 (while (< char ?\177)
332 (aset table char 'character)
333 (setq char (1+ char)))
334 ;; European 8-bit accentuated characters:
335 (setq char ?\240)
336 (while (< char ?\400)
337 (aset table char 'non-terminal)
338 (setq char (1+ char)))
339 ;; Override space characters:
340 (aset table ?\013 'space) ; [VT] vertical tab
341 (aset table ?\n 'space) ; [NL] linefeed
342 (aset table ?\r 'space) ; [CR] carriage return
343 (aset table ?\t 'space) ; [HT] horizontal tab
344 (aset table ?\ 'space) ; [SP] space
345 ;; Override form feed character:
346 (aset table ?\f 'form-feed) ; [FF] form feed
347 ;; Override other lexical characters:
348 (aset table ?\" 'double-terminal)
349 (aset table ?\' 'single-terminal)
350 (aset table ?\? 'special)
351 (aset table ?* 'repeat)
352 (aset table ?, 'catenate)
353 (aset table ?- 'except)
354 (aset table ?= 'equal)
355 (aset table ?\) 'end-group)
356 table)
357 "Vector used to map characters to a lexical token.")
358
359
360 (defun ebnf-iso-initialize ()
361 "Initialize ISO EBNF token table."
362 (if ebnf-iso-alternative-p
363 ;; Override alternative lexical characters:
364 (progn
365 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
366 (aset ebnf-iso-token-table ?\[ 'character)
367 (aset ebnf-iso-token-table ?\] 'character)
368 (aset ebnf-iso-token-table ?\{ 'character)
369 (aset ebnf-iso-token-table ?\} 'character)
370 (aset ebnf-iso-token-table ?| 'character)
371 (aset ebnf-iso-token-table ?\; 'character)
372 (aset ebnf-iso-token-table ?/ 'slash)
373 (aset ebnf-iso-token-table ?! 'alternative)
374 (aset ebnf-iso-token-table ?: 'colon)
375 (aset ebnf-iso-token-table ?. 'period))
376 ;; Override standard lexical characters:
377 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
378 (aset ebnf-iso-token-table ?\[ 'begin-optional)
379 (aset ebnf-iso-token-table ?\] 'end-optional)
380 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
381 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
382 (aset ebnf-iso-token-table ?| 'alternative)
383 (aset ebnf-iso-token-table ?\; 'period)
384 (aset ebnf-iso-token-table ?/ 'character)
385 (aset ebnf-iso-token-table ?! 'character)
386 (aset ebnf-iso-token-table ?: 'character)
387 (aset ebnf-iso-token-table ?. 'character)))
388
389
390 (defun ebnf-iso-lex ()
391 "Lexical analyser for ISO EBNF.
392
393 Return a lexical token.
394
395 See documentation for variable `ebnf-iso-lex'."
396 (if (>= (point) ebnf-limit)
397 'end-of-input
398 (let (token)
399 ;; skip spaces and comments
400 (while (if (> (following-char) 255)
401 (progn
402 (setq token 'error)
403 nil)
404 (setq token (aref ebnf-iso-token-table (following-char)))
405 (cond
406 ((eq token 'space)
407 (skip-chars-forward " \013\n\r\t" ebnf-limit)
408 (< (point) ebnf-limit))
409 ((or (eq token 'begin-parenthesis)
410 (eq token 'left-parenthesis))
411 (forward-char)
412 (if (/= (following-char) ?*)
413 ;; no comment
414 nil
415 ;; comment
416 (ebnf-iso-skip-comment)
417 t))
418 ((eq token 'form-feed)
419 (forward-char)
420 (setq ebnf-action 'form-feed))
421 (t nil)
422 )))
423 (cond
424 ;; end of input
425 ((>= (point) ebnf-limit)
426 'end-of-input)
427 ;; error
428 ((eq token 'error)
429 (error "Illegal character."))
430 ;; integer
431 ((eq token 'integer)
432 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
433 'integer)
434 ;; special: ?special?
435 ((eq token 'special)
436 (setq ebnf-iso-lex (concat "?"
437 (ebnf-string " ->@-~" ?\? "special")
438 "?"))
439 'special)
440 ;; terminal: "string"
441 ((eq token 'double-terminal)
442 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
443 'terminal)
444 ;; terminal: 'string'
445 ((eq token 'single-terminal)
446 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
447 'terminal)
448 ;; non-terminal
449 ((eq token 'non-terminal)
450 (setq ebnf-iso-lex (ebnf-iso-normalize
451 (ebnf-trim-right
452 (ebnf-buffer-substring " 0-9A-Za-z\240-\377"))))
453 (and ebnf-no-meta-identifier
454 (error "Exception sequence should not contain a meta identifier."))
455 'non-terminal)
456 ;; begin optional, begin list or begin group
457 ((eq token 'left-parenthesis)
458 (forward-char)
459 (cond ((= (following-char) ?/)
460 (forward-char)
461 'begin-optional)
462 ((= (following-char) ?:)
463 (forward-char)
464 'begin-zero-or-more)
465 (t
466 'begin-group)
467 ))
468 ;; end optional or alternative
469 ((eq token 'slash)
470 (forward-char)
471 (if (/= (following-char) ?\))
472 'alternative
473 (forward-char)
474 'end-optional))
475 ;; end list
476 ((eq token 'colon)
477 (forward-char)
478 (if (/= (following-char) ?\))
479 'character
480 (forward-char)
481 'end-zero-or-more))
482 ;; begin group
483 ((eq token 'begin-parenthesis)
484 'begin-group)
485 ;; miscellaneous
486 (t
487 (forward-char)
488 token)
489 ))))
490
491
492 (defconst ebnf-iso-comment-chars "^*(\000-\010\016-\037\177-\237")
493
494
495 (defun ebnf-iso-skip-comment ()
496 (forward-char)
497 (cond
498 ;; open EPS file
499 ((and ebnf-eps-executing (= (following-char) ?\[))
500 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
501 ;; close EPS file
502 ((and ebnf-eps-executing (= (following-char) ?\]))
503 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
504 ;; any other action in comment
505 (t
506 (setq ebnf-action (aref ebnf-comment-table (following-char))))
507 )
508 (let ((pair 1))
509 (while (> pair 0)
510 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
511 (cond ((>= (point) ebnf-limit)
512 (error "Missing end of comment: `*)'."))
513 ((= (following-char) ?*)
514 (skip-chars-forward "*" ebnf-limit)
515 (when (= (following-char) ?\))
516 ;; end of comment
517 (forward-char)
518 (setq pair (1- pair))))
519 ((= (following-char) ?\()
520 (skip-chars-forward "(" ebnf-limit)
521 (when (= (following-char) ?*)
522 ;; beginning of comment
523 (forward-char)
524 (setq pair (1+ pair))))
525 (t
526 (error "Illegal character."))
527 ))))
528
529
530 (defun ebnf-iso-eps-filename ()
531 (forward-char)
532 (buffer-substring-no-properties
533 (point)
534 (let ((chars (concat ebnf-iso-comment-chars "\n"))
535 found)
536 (while (not found)
537 (skip-chars-forward chars ebnf-limit)
538 (setq found
539 (cond ((>= (point) ebnf-limit)
540 (point))
541 ((= (following-char) ?*)
542 (skip-chars-forward "*" ebnf-limit)
543 (if (/= (following-char) ?\))
544 nil
545 (backward-char)
546 (point)))
547 ((= (following-char) ?\()
548 (forward-char)
549 (if (/= (following-char) ?*)
550 nil
551 (backward-char)
552 (point)))
553 (t
554 (point))
555 )))
556 found)))
557
558
559 (defun ebnf-iso-normalize (str)
560 (if (not ebnf-iso-normalize-p)
561 str
562 (let ((len (length str))
563 (stri 0)
564 (spaces 0))
565 ;; count exceeding spaces
566 (while (< stri len)
567 (if (/= (aref str stri) ?\ )
568 (setq stri (1+ stri))
569 (setq stri (1+ stri))
570 (while (and (< stri len) (= (aref str stri) ?\ ))
571 (setq stri (1+ stri)
572 spaces (1+ spaces)))))
573 (if (zerop spaces)
574 ;; no exceeding space
575 str
576 ;; at least one exceeding space
577 (let ((new (make-string (- len spaces) ?\ ))
578 (newi 0))
579 ;; eliminate exceeding spaces
580 (setq stri 0)
581 (while (> spaces 0)
582 (if (/= (aref str stri) ?\ )
583 (progn
584 (aset new newi (aref str stri))
585 (setq stri (1+ stri)
586 newi (1+ newi)))
587 (aset new newi (aref str stri))
588 (setq stri (1+ stri)
589 newi (1+ newi))
590 (while (and (> spaces 0) (= (aref str stri) ?\ ))
591 (setq stri (1+ stri)
592 spaces (1- spaces)))))
593 ;; remaining is normalized
594 (while (< stri len)
595 (aset new newi (aref str stri))
596 (setq stri (1+ stri)
597 newi (1+ newi)))
598 new)))))
599
600 \f
601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
602
603
604 (provide 'ebnf-iso)
605
606
607 ;;; ebnf-iso.el ends here