]> code.delx.au - gnu-emacs/blob - lisp/progmodes/ebnf-iso.el
Doc fix.
[gnu-emacs] / lisp / progmodes / ebnf-iso.el
1 ;;; ebnf-iso.el --- parser for ISO EBNF
2
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Time-stamp: <2004/02/22 14:24:55 vinicius>
9 ;; Keywords: wp, ebnf, PostScript
10 ;; Version: 1.7
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
28
29 ;;; Commentary:
30
31 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;;
33 ;;
34 ;; This is part of ebnf2ps package.
35 ;;
36 ;; This package defines a parser for ISO EBNF.
37 ;;
38 ;; See ebnf2ps.el for documentation.
39 ;;
40 ;;
41 ;; ISO EBNF Syntax
42 ;; ---------------
43 ;;
44 ;; See the URL:
45 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
46 ;; ("International Standard of the ISO EBNF Notation").
47 ;;
48 ;;
49 ;; ISO EBNF = syntax rule, {syntax rule};
50 ;;
51 ;; syntax rule = meta identifier, '=', definition list, ';';
52 ;;
53 ;; definition list = single definition, {'|', single definition};
54 ;;
55 ;; single definition = term, {',', term};
56 ;;
57 ;; term = factor, ['-', exception];
58 ;;
59 ;; exception = factor (* without <meta identifier> *);
60 ;;
61 ;; factor = [integer, '*'], primary;
62 ;;
63 ;; primary = optional sequence | repeated sequence | special sequence
64 ;; | grouped sequence | meta identifier | terminal string
65 ;; | empty;
66 ;;
67 ;; empty = ;
68 ;;
69 ;; optional sequence = '[', definition list, ']';
70 ;;
71 ;; repeated sequence = '{', definition list, '}';
72 ;;
73 ;; grouped sequence = '(', definition list, ')';
74 ;;
75 ;; terminal string = "'", character - "'", {character - "'"}, "'"
76 ;; | '"', character - '"', {character - '"'}, '"';
77 ;;
78 ;; special sequence = '?', {character - '?'}, '?';
79 ;;
80 ;; meta identifier = letter, { letter | decimal digit | ' ' };
81 ;;
82 ;; integer = decimal digit, {decimal digit};
83 ;;
84 ;; comment = '(*', {comment symbol}, '*)';
85 ;;
86 ;; comment symbol = comment (* <== NESTED COMMENT *)
87 ;; | terminal string | special sequence | character;
88 ;;
89 ;; letter = ? A-Z a-z ?;
90 ;;
91 ;; decimal digit = ? 0-9 ?;
92 ;;
93 ;; character = letter | decimal digit
94 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
95 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
96 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
97 ;;
98 ;;
99 ;; There is also the following alternative representation:
100 ;;
101 ;; STANDARD ALTERNATIVE
102 ;; | ==> / or !
103 ;; [ ==> (/
104 ;; ] ==> /)
105 ;; { ==> (:
106 ;; } ==> :)
107 ;; ; ==> .
108 ;;
109 ;;
110 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
111 ;; -------------------------------------------------
112 ;;
113 ;; ISO EBNF accepts the characters given by <character> production above,
114 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
115 ;; (^L), any other characters are illegal. But ebnf2ps accepts also the
116 ;; european 8-bit accentuated characters (from \240 to \377) and underscore.
117 ;;
118 ;;
119 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
120
121 ;;; Code:
122
123
124 (require 'ebnf-otz)
125
126
127 (defvar ebnf-iso-lex nil
128 "Value returned by `ebnf-iso-lex' function.")
129
130
131 (defvar ebnf-no-meta-identifier nil
132 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
133
134 \f
135 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
136 ;; Syntactic analyzer
137
138
139 ;;; ISO EBNF = syntax rule, {syntax rule};
140
141 (defun ebnf-iso-parser (start)
142 "ISO EBNF parser."
143 (let ((total (+ (- ebnf-limit start) 1))
144 (bias (1- start))
145 (origin (point))
146 syntax-list token rule)
147 (goto-char start)
148 (setq token (ebnf-iso-lex))
149 (and (eq token 'end-of-input)
150 (error "Invalid ISO EBNF file format"))
151 (while (not (eq token 'end-of-input))
152 (ebnf-message-float
153 "Parsing...%s%%"
154 (/ (* (- (point) bias) 100.0) total))
155 (setq token (ebnf-iso-syntax-rule token)
156 rule (cdr token)
157 token (car token))
158 (or (ebnf-add-empty-rule-list rule)
159 (setq syntax-list (cons rule syntax-list))))
160 (goto-char origin)
161 syntax-list))
162
163
164 ;;; syntax rule = meta identifier, '=', definition list, ';';
165
166 (defun ebnf-iso-syntax-rule (token)
167 (let ((header ebnf-iso-lex)
168 (action ebnf-action)
169 body)
170 (setq ebnf-action nil)
171 (or (eq token 'non-terminal)
172 (error "Invalid meta identifier syntax rule"))
173 (or (eq (ebnf-iso-lex) 'equal)
174 (error "Invalid syntax rule: missing `='"))
175 (setq body (ebnf-iso-definition-list))
176 (or (eq (car body) 'period)
177 (error "Invalid syntax rule: missing `;' or `.'"))
178 (setq body (cdr body))
179 (ebnf-eps-add-production header)
180 (cons (ebnf-iso-lex)
181 (ebnf-make-production header body action))))
182
183
184 ;;; definition list = single definition, {'|', single definition};
185
186 (defun ebnf-iso-definition-list ()
187 (let (body sequence)
188 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
189 'alternative)
190 (setq sequence (cdr sequence)
191 body (cons sequence body)))
192 (ebnf-token-alternative body sequence)))
193
194
195 ;;; single definition = term, {',', term};
196
197 (defun ebnf-iso-single-definition ()
198 (let (token seq term)
199 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
200 token (car term)
201 term (cdr term))
202 (eq token 'catenate))
203 (setq seq (cons term seq)))
204 (cons token
205 (cond
206 ;; null sequence
207 ((null seq)
208 term)
209 ;; sequence with only one element
210 ((and (null term) (= (length seq) 1))
211 (car seq))
212 ;; a real sequence
213 (t
214 (ebnf-make-sequence (nreverse (cons term seq))))
215 ))))
216
217
218 ;;; term = factor, ['-', exception];
219 ;;;
220 ;;; exception = factor (* without <meta identifier> *);
221
222 (defun ebnf-iso-term (token)
223 (let ((factor (ebnf-iso-factor token)))
224 (if (not (eq (car factor) 'except))
225 ;; factor
226 factor
227 ;; factor - exception
228 (let ((ebnf-no-meta-identifier t))
229 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
230
231
232 ;;; factor = [integer, '*'], primary;
233
234 (defun ebnf-iso-factor (token)
235 (if (eq token 'integer)
236 (let ((times ebnf-iso-lex))
237 (or (eq (ebnf-iso-lex) 'repeat)
238 (error "Missing `*'"))
239 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
240 (ebnf-iso-primary token)))
241
242
243 ;;; primary = optional sequence | repeated sequence | special sequence
244 ;;; | grouped sequence | meta identifier | terminal string
245 ;;; | empty;
246 ;;;
247 ;;; empty = ;
248 ;;;
249 ;;; optional sequence = '[', definition list, ']';
250 ;;;
251 ;;; repeated sequence = '{', definition list, '}';
252 ;;;
253 ;;; grouped sequence = '(', definition list, ')';
254 ;;;
255 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
256 ;;; | '"', character - '"', {character - '"'}, '"';
257 ;;;
258 ;;; special sequence = '?', {character - '?'}, '?';
259 ;;;
260 ;;; meta identifier = letter, {letter | decimal digit};
261
262 (defun ebnf-iso-primary (token)
263 (let ((primary
264 (cond
265 ;; terminal string
266 ((eq token 'terminal)
267 (ebnf-make-terminal ebnf-iso-lex))
268 ;; meta identifier
269 ((eq token 'non-terminal)
270 (ebnf-make-non-terminal ebnf-iso-lex))
271 ;; special sequence
272 ((eq token 'special)
273 (ebnf-make-special ebnf-iso-lex))
274 ;; grouped sequence
275 ((eq token 'begin-group)
276 (let ((body (ebnf-iso-definition-list)))
277 (or (eq (car body) 'end-group)
278 (error "Missing `)'"))
279 (cdr body)))
280 ;; optional sequence
281 ((eq token 'begin-optional)
282 (let ((body (ebnf-iso-definition-list)))
283 (or (eq (car body) 'end-optional)
284 (error "Missing `]' or `/)'"))
285 (ebnf-token-optional (cdr body))))
286 ;; repeated sequence
287 ((eq token 'begin-zero-or-more)
288 (let* ((body (ebnf-iso-definition-list))
289 (repeat (cdr body)))
290 (or (eq (car body) 'end-zero-or-more)
291 (error "Missing `}' or `:)'"))
292 (ebnf-make-zero-or-more repeat)))
293 ;; empty
294 (t
295 nil)
296 )))
297 (cons (if primary
298 (ebnf-iso-lex)
299 token)
300 primary)))
301
302 \f
303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
304 ;; Lexical analyzer
305
306
307 (defconst ebnf-iso-token-table
308 ;; control character & 8-bit character are set to `error'
309 (let ((table (make-vector 256 'error))
310 (char ?\040))
311 ;; printable character
312 (while (< char ?\060)
313 (aset table char 'character)
314 (setq char (1+ char)))
315 ;; digits:
316 (while (< char ?\072)
317 (aset table char 'integer)
318 (setq char (1+ char)))
319 (while (< char ?\101)
320 (aset table char 'character)
321 (setq char (1+ char)))
322 ;; upper case letters:
323 (while (< char ?\133)
324 (aset table char 'non-terminal)
325 (setq char (1+ char)))
326 (while (< char ?\141)
327 (aset table char 'character)
328 (setq char (1+ char)))
329 ;; lower case letters:
330 (while (< char ?\173)
331 (aset table char 'non-terminal)
332 (setq char (1+ char)))
333 (while (< char ?\177)
334 (aset table char 'character)
335 (setq char (1+ char)))
336 ;; European 8-bit accentuated characters:
337 (setq char ?\240)
338 (while (< char ?\400)
339 (aset table char 'non-terminal)
340 (setq char (1+ char)))
341 ;; Override space characters:
342 (aset table ?\013 'space) ; [VT] vertical tab
343 (aset table ?\n 'space) ; [NL] linefeed
344 (aset table ?\r 'space) ; [CR] carriage return
345 (aset table ?\t 'space) ; [HT] horizontal tab
346 (aset table ?\ 'space) ; [SP] space
347 ;; Override form feed character:
348 (aset table ?\f 'form-feed) ; [FF] form feed
349 ;; Override other lexical characters:
350 (aset table ?_ 'non-terminal)
351 (aset table ?\" 'double-terminal)
352 (aset table ?\' 'single-terminal)
353 (aset table ?\? 'special)
354 (aset table ?* 'repeat)
355 (aset table ?, 'catenate)
356 (aset table ?- 'except)
357 (aset table ?= 'equal)
358 (aset table ?\) 'end-group)
359 table)
360 "Vector used to map characters to a lexical token.")
361
362
363 (defun ebnf-iso-initialize ()
364 "Initialize ISO EBNF token table."
365 (if ebnf-iso-alternative-p
366 ;; Override alternative lexical characters:
367 (progn
368 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
369 (aset ebnf-iso-token-table ?\[ 'character)
370 (aset ebnf-iso-token-table ?\] 'character)
371 (aset ebnf-iso-token-table ?\{ 'character)
372 (aset ebnf-iso-token-table ?\} 'character)
373 (aset ebnf-iso-token-table ?| 'character)
374 (aset ebnf-iso-token-table ?\; 'character)
375 (aset ebnf-iso-token-table ?/ 'slash)
376 (aset ebnf-iso-token-table ?! 'alternative)
377 (aset ebnf-iso-token-table ?: 'colon)
378 (aset ebnf-iso-token-table ?. 'period))
379 ;; Override standard lexical characters:
380 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
381 (aset ebnf-iso-token-table ?\[ 'begin-optional)
382 (aset ebnf-iso-token-table ?\] 'end-optional)
383 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
384 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
385 (aset ebnf-iso-token-table ?| 'alternative)
386 (aset ebnf-iso-token-table ?\; 'period)
387 (aset ebnf-iso-token-table ?/ 'character)
388 (aset ebnf-iso-token-table ?! 'character)
389 (aset ebnf-iso-token-table ?: 'character)
390 (aset ebnf-iso-token-table ?. 'character)))
391
392
393 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
394 (defconst ebnf-iso-non-terminal-chars
395 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
396
397
398 (defun ebnf-iso-lex ()
399 "Lexical analyser for ISO EBNF.
400
401 Return a lexical token.
402
403 See documentation for variable `ebnf-iso-lex'."
404 (if (>= (point) ebnf-limit)
405 'end-of-input
406 (let (token)
407 ;; skip spaces and comments
408 (while (if (> (following-char) 255)
409 (progn
410 (setq token 'error)
411 nil)
412 (setq token (aref ebnf-iso-token-table (following-char)))
413 (cond
414 ((eq token 'space)
415 (skip-chars-forward " \013\n\r\t" ebnf-limit)
416 (< (point) ebnf-limit))
417 ((or (eq token 'begin-parenthesis)
418 (eq token 'left-parenthesis))
419 (forward-char)
420 (if (/= (following-char) ?*)
421 ;; no comment
422 nil
423 ;; comment
424 (ebnf-iso-skip-comment)
425 t))
426 ((eq token 'form-feed)
427 (forward-char)
428 (setq ebnf-action 'form-feed))
429 (t nil)
430 )))
431 (cond
432 ;; end of input
433 ((>= (point) ebnf-limit)
434 'end-of-input)
435 ;; error
436 ((eq token 'error)
437 (error "Illegal character"))
438 ;; integer
439 ((eq token 'integer)
440 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
441 'integer)
442 ;; special: ?special?
443 ((eq token 'special)
444 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
445 (ebnf-string " ->@-~" ?\? "special")
446 (and ebnf-special-show-delimiter "?")))
447 'special)
448 ;; terminal: "string"
449 ((eq token 'double-terminal)
450 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
451 'terminal)
452 ;; terminal: 'string'
453 ((eq token 'single-terminal)
454 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
455 'terminal)
456 ;; non-terminal
457 ((eq token 'non-terminal)
458 (setq ebnf-iso-lex
459 (ebnf-iso-normalize
460 (ebnf-trim-right
461 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
462 (and ebnf-no-meta-identifier
463 (error "Exception sequence should not contain a meta identifier"))
464 'non-terminal)
465 ;; begin optional, begin list or begin group
466 ((eq token 'left-parenthesis)
467 (forward-char)
468 (cond ((= (following-char) ?/)
469 (forward-char)
470 'begin-optional)
471 ((= (following-char) ?:)
472 (forward-char)
473 'begin-zero-or-more)
474 (t
475 'begin-group)
476 ))
477 ;; end optional or alternative
478 ((eq token 'slash)
479 (forward-char)
480 (if (/= (following-char) ?\))
481 'alternative
482 (forward-char)
483 'end-optional))
484 ;; end list
485 ((eq token 'colon)
486 (forward-char)
487 (if (/= (following-char) ?\))
488 'character
489 (forward-char)
490 'end-zero-or-more))
491 ;; begin group
492 ((eq token 'begin-parenthesis)
493 'begin-group)
494 ;; miscellaneous
495 (t
496 (forward-char)
497 token)
498 ))))
499
500
501 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
502 (defconst ebnf-iso-comment-chars
503 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
504
505
506 (defun ebnf-iso-skip-comment ()
507 (forward-char)
508 (cond
509 ;; open EPS file
510 ((and ebnf-eps-executing (= (following-char) ?\[))
511 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
512 ;; close EPS file
513 ((and ebnf-eps-executing (= (following-char) ?\]))
514 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
515 ;; any other action in comment
516 (t
517 (setq ebnf-action (aref ebnf-comment-table (following-char))))
518 )
519 (let ((pair 1))
520 (while (> pair 0)
521 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
522 (cond ((>= (point) ebnf-limit)
523 (error "Missing end of comment: `*)'"))
524 ((= (following-char) ?*)
525 (skip-chars-forward "*" ebnf-limit)
526 (when (= (following-char) ?\))
527 ;; end of comment
528 (forward-char)
529 (setq pair (1- pair))))
530 ((= (following-char) ?\()
531 (skip-chars-forward "(" ebnf-limit)
532 (when (= (following-char) ?*)
533 ;; beginning of comment
534 (forward-char)
535 (setq pair (1+ pair))))
536 (t
537 (error "Illegal character"))
538 ))))
539
540
541 (defun ebnf-iso-eps-filename ()
542 (forward-char)
543 (buffer-substring-no-properties
544 (point)
545 (let ((chars (concat ebnf-iso-comment-chars "\n"))
546 found)
547 (while (not found)
548 (skip-chars-forward chars ebnf-limit)
549 (setq found
550 (cond ((>= (point) ebnf-limit)
551 (point))
552 ((= (following-char) ?*)
553 (skip-chars-forward "*" ebnf-limit)
554 (if (/= (following-char) ?\))
555 nil
556 (backward-char)
557 (point)))
558 ((= (following-char) ?\()
559 (forward-char)
560 (if (/= (following-char) ?*)
561 nil
562 (backward-char)
563 (point)))
564 (t
565 (point))
566 )))
567 found)))
568
569
570 (defun ebnf-iso-normalize (str)
571 (if (not ebnf-iso-normalize-p)
572 str
573 (let ((len (length str))
574 (stri 0)
575 (spaces 0))
576 ;; count exceeding spaces
577 (while (< stri len)
578 (if (/= (aref str stri) ?\ )
579 (setq stri (1+ stri))
580 (setq stri (1+ stri))
581 (while (and (< stri len) (= (aref str stri) ?\ ))
582 (setq stri (1+ stri)
583 spaces (1+ spaces)))))
584 (if (zerop spaces)
585 ;; no exceeding space
586 str
587 ;; at least one exceeding space
588 (let ((new (make-string (- len spaces) ?\ ))
589 (newi 0))
590 ;; eliminate exceeding spaces
591 (setq stri 0)
592 (while (> spaces 0)
593 (if (/= (aref str stri) ?\ )
594 (progn
595 (aset new newi (aref str stri))
596 (setq stri (1+ stri)
597 newi (1+ newi)))
598 (aset new newi (aref str stri))
599 (setq stri (1+ stri)
600 newi (1+ newi))
601 (while (and (> spaces 0) (= (aref str stri) ?\ ))
602 (setq stri (1+ stri)
603 spaces (1- spaces)))))
604 ;; remaining is normalized
605 (while (< stri len)
606 (aset new newi (aref str stri))
607 (setq stri (1+ stri)
608 newi (1+ newi)))
609 new)))))
610
611 \f
612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
613
614
615 (provide 'ebnf-iso)
616
617
618 ;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
619 ;;; ebnf-iso.el ends here