]> code.delx.au - gnu-emacs/blob - lisp/progmodes/ebnf-bnf.el
5332e0d76f9d8a4d13dd6395eda20468ba7e4f8a
[gnu-emacs] / lisp / progmodes / ebnf-bnf.el
1 ;;; ebnf-bnf.el --- parser for EBNF
2
3 ;; Copyright (C) 1999-2011
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Keywords: wp, ebnf, PostScript
9 ;; Version: 1.10
10 ;; Package: ebnf2ps
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software: you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation, either version 3 of the License, or
17 ;; (at your option) any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26
27 ;;; Commentary:
28
29 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 ;;
31 ;;
32 ;; This is part of ebnf2ps package.
33 ;;
34 ;; This package defines a parser for EBNF.
35 ;;
36 ;; See ebnf2ps.el for documentation.
37 ;;
38 ;;
39 ;; EBNF Syntax
40 ;; -----------
41 ;;
42 ;; The current EBNF that ebnf2ps accepts has the following constructions:
43 ;;
44 ;; ; comment (until end of line)
45 ;; A non-terminal
46 ;; "C" terminal
47 ;; ?C? special
48 ;; $A default non-terminal
49 ;; $"C" default terminal
50 ;; $?C? default special
51 ;; A = B. production (A is the header and B the body)
52 ;; C D sequence (C occurs before D)
53 ;; C | D alternative (C or D occurs)
54 ;; A - B exception (A excluding B, B without any non-terminal)
55 ;; n * A repetition (A repeats at least n (integer) times)
56 ;; n * n A repetition (A repeats exactly n (integer) times)
57 ;; n * m A repetition (A repeats at least n (integer) and at most
58 ;; m (integer) times)
59 ;; (C) group (expression C is grouped together)
60 ;; [C] optional (C may or not occurs)
61 ;; C+ one or more occurrences of C
62 ;; {C}+ one or more occurrences of C
63 ;; {C}* zero or more occurrences of C
64 ;; {C} zero or more occurrences of C
65 ;; C / D equivalent to: C {D C}*
66 ;; {C || D}+ equivalent to: C {D C}*
67 ;; {C || D}* equivalent to: [C {D C}*]
68 ;; {C || D} equivalent to: [C {D C}*]
69 ;;
70 ;; The EBNF syntax written using the notation above is:
71 ;;
72 ;; EBNF = {production}+.
73 ;;
74 ;; production = non_terminal "=" body ".". ;; production
75 ;;
76 ;; body = {sequence || "|"}*. ;; alternative
77 ;;
78 ;; sequence = {exception}*. ;; sequence
79 ;;
80 ;; exception = repeat [ "-" repeat]. ;; exception
81 ;;
82 ;; repeat = [ integer "*" [ integer ]] term. ;; repetition
83 ;;
84 ;; term = factor
85 ;; | [factor] "+" ;; one-or-more
86 ;; | [factor] "/" [factor] ;; one-or-more
87 ;; .
88 ;;
89 ;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
90 ;; | [ "$" ] non_terminal ;; non-terminal
91 ;; | [ "$" ] "?" special "?" ;; special
92 ;; | "(" body ")" ;; group
93 ;; | "[" body "]" ;; zero-or-one
94 ;; | "{" body [ "||" body ] "}+" ;; one-or-more
95 ;; | "{" body [ "||" body ] "}*" ;; zero-or-more
96 ;; | "{" body [ "||" body ] "}" ;; zero-or-more
97 ;; .
98 ;;
99 ;; non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+".
100 ;; ;; that is, a valid non_terminal accepts decimal digits, letters (upper
101 ;; ;; and lower), 8-bit accentuated characters,
102 ;; ;; "!", "#", "%", "&", "'", "*", "+", ",", ":",
103 ;; ;; "<", ">", "@", "\", "^", "_", "`" and "~".
104 ;;
105 ;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
106 ;; ;; that is, a valid terminal accepts any printable character (including
107 ;; ;; 8-bit accentuated characters) except `"', as `"' is used to delimit a
108 ;; ;; terminal. Also, accepts escaped characters, that is, a character
109 ;; ;; pair starting with `\' followed by a printable character, for
110 ;; ;; example: \", \\.
111 ;;
112 ;; special = "[^?\\000-\\010\\012-\\037\\177-\\237]*".
113 ;; ;; that is, a valid special accepts any printable character (including
114 ;; ;; 8-bit accentuated characters) and tabs except `?', as `?' is used to
115 ;; ;; delimit a special.
116 ;;
117 ;; integer = "[0-9]+".
118 ;; ;; that is, an integer is a sequence of one or more decimal digits.
119 ;;
120 ;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
121 ;; ;; that is, a comment starts with the character `;' and terminates at end
122 ;; ;; of line. Also, it only accepts printable characters (including 8-bit
123 ;; ;; accentuated characters) and tabs.
124 ;;
125 ;;
126 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
127
128 ;;; Code:
129
130
131 (require 'ebnf-otz)
132
133
134 (defvar ebnf-bnf-lex nil
135 "Value returned by `ebnf-bnf-lex' function.")
136
137 \f
138 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
139 ;; Syntactic analyzer
140
141
142 ;;; EBNF = {production}+.
143
144 (defun ebnf-bnf-parser (start)
145 "EBNF parser."
146 (let ((total (+ (- ebnf-limit start) 1))
147 (bias (1- start))
148 (origin (point))
149 prod-list token rule)
150 (goto-char start)
151 (setq token (ebnf-bnf-lex))
152 (and (eq token 'end-of-input)
153 (error "Invalid EBNF file format"))
154 (while (not (eq token 'end-of-input))
155 (ebnf-message-float
156 "Parsing...%s%%"
157 (/ (* (- (point) bias) 100.0) total))
158 (setq token (ebnf-production token)
159 rule (cdr token)
160 token (car token))
161 (or (ebnf-add-empty-rule-list rule)
162 (setq prod-list (cons rule prod-list))))
163 (goto-char origin)
164 prod-list))
165
166
167 ;;; production = non-terminal "=" body ".".
168
169 (defun ebnf-production (token)
170 (let ((header ebnf-bnf-lex)
171 (action ebnf-action)
172 body)
173 (setq ebnf-action nil)
174 (or (eq token 'non-terminal)
175 (error "Invalid header production"))
176 (or (eq (ebnf-bnf-lex) 'equal)
177 (error "Invalid production: missing `='"))
178 (setq body (ebnf-body))
179 (or (eq (car body) 'period)
180 (error "Invalid production: missing `.'"))
181 (setq body (cdr body))
182 (ebnf-eps-add-production header)
183 (cons (ebnf-bnf-lex)
184 (ebnf-make-production header body action))))
185
186
187 ;;; body = {sequence || "|"}*.
188
189 (defun ebnf-body ()
190 (let (body sequence)
191 (while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
192 (setq sequence (cdr sequence)
193 body (cons sequence body)))
194 (ebnf-token-alternative body sequence)))
195
196
197 ;;; sequence = {exception}*.
198
199 (defun ebnf-sequence ()
200 (let ((token (ebnf-bnf-lex))
201 seq term)
202 (while (setq term (ebnf-exception token)
203 token (car term)
204 term (cdr term))
205 (setq seq (cons term seq)))
206 (cons token
207 (ebnf-token-sequence seq))))
208
209
210 ;;; exception = repeat [ "-" repeat].
211
212 (defun ebnf-exception (token)
213 (let ((term (ebnf-repeat token)))
214 (if (not (eq (car term) 'except))
215 ;; repeat
216 term
217 ;; repeat - repeat
218 (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
219 (ebnf-no-non-terminal (cdr exception))
220 (ebnf-token-except (cdr term) exception)))))
221
222
223 (defun ebnf-no-non-terminal (node)
224 (and (vectorp node)
225 (let ((kind (ebnf-node-kind node)))
226 (cond
227 ((eq kind 'ebnf-generate-non-terminal)
228 (error "Exception sequence should not contain a non-terminal"))
229 ((eq kind 'ebnf-generate-repeat)
230 (ebnf-no-non-terminal (ebnf-node-separator node)))
231 ((memq kind '(ebnf-generate-optional ebnf-generate-except))
232 (ebnf-no-non-terminal (ebnf-node-list node)))
233 ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
234 (ebnf-no-non-terminal (ebnf-node-list node))
235 (ebnf-no-non-terminal (ebnf-node-separator node)))
236 ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
237 (let ((seq (ebnf-node-list node)))
238 (while seq
239 (ebnf-no-non-terminal (car seq))
240 (setq seq (cdr seq)))))
241 ))))
242
243
244 ;;; repeat = [ integer "*" [ integer ]] term.
245
246 (defun ebnf-repeat (token)
247 (if (not (eq token 'integer))
248 (ebnf-term token)
249 (let ((times ebnf-bnf-lex)
250 upper)
251 (or (eq (ebnf-bnf-lex) 'repeat)
252 (error "Missing `*'"))
253 (setq token (ebnf-bnf-lex))
254 (when (eq token 'integer)
255 (setq upper ebnf-bnf-lex
256 token (ebnf-bnf-lex)))
257 (ebnf-token-repeat times (ebnf-term token) upper))))
258
259
260 ;;; term = factor
261 ;;; | [factor] "+" ;; one-or-more
262 ;;; | [factor] "/" [factor] ;; one-or-more
263 ;;; .
264
265 (defun ebnf-term (token)
266 (let ((factor (ebnf-factor token)))
267 (and factor
268 (setq token (ebnf-bnf-lex)))
269 (cond
270 ;; [factor] +
271 ((eq token 'one-or-more)
272 (cons (ebnf-bnf-lex)
273 (and factor
274 (let ((kind (ebnf-node-kind factor)))
275 (cond
276 ;; { A }+ + ==> { A }+
277 ;; { A }* + ==> { A }*
278 ((memq kind '(ebnf-generate-zero-or-more
279 ebnf-generate-one-or-more))
280 factor)
281 ;; [ A ] + ==> { A }*
282 ((eq kind 'ebnf-generate-optional)
283 (ebnf-make-zero-or-more (list factor)))
284 ;; A +
285 (t
286 (ebnf-make-one-or-more (list factor)))
287 )))))
288 ;; [factor] / [factor]
289 ((eq token 'list)
290 (setq token (ebnf-bnf-lex))
291 (let ((sep (ebnf-factor token)))
292 (and sep
293 (setq factor (or factor (ebnf-make-empty))))
294 (cons (if sep
295 (ebnf-bnf-lex)
296 token)
297 (and factor
298 (ebnf-make-one-or-more factor sep)))))
299 ;; factor
300 (t
301 (cons token factor))
302 )))
303
304
305 ;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
306 ;;; | [ "$" ] non_terminal ;; non-terminal
307 ;;; | [ "$" ] "?" special "?" ;; special
308 ;;; | "(" body ")" ;; group
309 ;;; | "[" body "]" ;; zero-or-one
310 ;;; | "{" body [ "||" body ] "}+" ;; one-or-more
311 ;;; | "{" body [ "||" body ] "}*" ;; zero-or-more
312 ;;; | "{" body [ "||" body ] "}" ;; zero-or-more
313 ;;; .
314
315 (defun ebnf-factor (token)
316 (cond
317 ;; terminal
318 ((eq token 'terminal)
319 (ebnf-make-terminal ebnf-bnf-lex))
320 ;; non-terminal
321 ((eq token 'non-terminal)
322 (ebnf-make-non-terminal ebnf-bnf-lex))
323 ;; special
324 ((eq token 'special)
325 (ebnf-make-special ebnf-bnf-lex))
326 ;; group
327 ((eq token 'begin-group)
328 (let ((body (ebnf-body)))
329 (or (eq (car body) 'end-group)
330 (error "Missing `)'"))
331 (cdr body)))
332 ;; optional
333 ((eq token 'begin-optional)
334 (let ((body (ebnf-body)))
335 (or (eq (car body) 'end-optional)
336 (error "Missing `]'"))
337 (ebnf-token-optional (cdr body))))
338 ;; list
339 ((eq token 'begin-list)
340 (let* ((body (ebnf-body))
341 (token (car body))
342 (list-part (cdr body))
343 sep-part)
344 (and (eq token 'list-separator)
345 ;; { A || B }
346 (setq body (ebnf-body) ; get separator
347 token (car body)
348 sep-part (cdr body)))
349 (cond
350 ;; { A }+
351 ((eq token 'end-one-or-more)
352 (ebnf-make-one-or-more list-part sep-part))
353 ;; { A }*
354 ((eq token 'end-zero-or-more)
355 (ebnf-make-zero-or-more list-part sep-part))
356 (t
357 (error "Missing `}+', `}*' or `}'"))
358 )))
359 ;; no term
360 (t
361 nil)
362 ))
363
364 \f
365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
366 ;; Lexical analyzer
367
368
369 (defconst ebnf-bnf-token-table (make-vector 256 'error)
370 "Vector used to map characters to a lexical token.")
371
372
373 (defun ebnf-bnf-initialize ()
374 "Initialize EBNF token table."
375 ;; control character & control 8-bit character are set to `error'
376 (let ((char ?\040))
377 ;; printable character:
378 (while (< char ?\060)
379 (aset ebnf-bnf-token-table char 'non-terminal)
380 (setq char (1+ char)))
381 ;; digits:
382 (while (< char ?\072)
383 (aset ebnf-bnf-token-table char 'integer)
384 (setq char (1+ char)))
385 ;; printable character:
386 (while (< char ?\177)
387 (aset ebnf-bnf-token-table char 'non-terminal)
388 (setq char (1+ char)))
389 ;; European 8-bit accentuated characters:
390 (setq char ?\240)
391 (while (< char ?\400)
392 (aset ebnf-bnf-token-table char 'non-terminal)
393 (setq char (1+ char)))
394 ;; Override space characters:
395 (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab
396 (aset ebnf-bnf-token-table ?\n 'space) ; [NL] linefeed
397 (aset ebnf-bnf-token-table ?\r 'space) ; [CR] carriage return
398 (aset ebnf-bnf-token-table ?\t 'space) ; [HT] horizontal tab
399 (aset ebnf-bnf-token-table ?\ 'space) ; [SP] space
400 ;; Override form feed character:
401 (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed
402 ;; Override other lexical characters:
403 (aset ebnf-bnf-token-table ?\" 'terminal)
404 (aset ebnf-bnf-token-table ?\? 'special)
405 (aset ebnf-bnf-token-table ?\( 'begin-group)
406 (aset ebnf-bnf-token-table ?\) 'end-group)
407 (aset ebnf-bnf-token-table ?* 'repeat)
408 (aset ebnf-bnf-token-table ?- 'except)
409 (aset ebnf-bnf-token-table ?= 'equal)
410 (aset ebnf-bnf-token-table ?\[ 'begin-optional)
411 (aset ebnf-bnf-token-table ?\] 'end-optional)
412 (aset ebnf-bnf-token-table ?\{ 'begin-list)
413 (aset ebnf-bnf-token-table ?| 'alternative)
414 (aset ebnf-bnf-token-table ?\} 'end-list)
415 (aset ebnf-bnf-token-table ?/ 'list)
416 (aset ebnf-bnf-token-table ?+ 'one-or-more)
417 (aset ebnf-bnf-token-table ?$ 'default)
418 ;; Override comment character:
419 (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
420 ;; Override end of production character:
421 (aset ebnf-bnf-token-table ebnf-lex-eop-char 'period)))
422
423
424 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
425 (defconst ebnf-bnf-non-terminal-chars
426 (ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?\240 ?\377))
427
428
429 (defun ebnf-bnf-lex ()
430 "Lexical analyzer for EBNF.
431
432 Return a lexical token.
433
434 See documentation for variable `ebnf-bnf-lex'."
435 (if (>= (point) ebnf-limit)
436 'end-of-input
437 (let (token)
438 ;; skip spaces and comments
439 (while (if (> (following-char) 255)
440 (progn
441 (setq token 'error)
442 nil)
443 (setq token (aref ebnf-bnf-token-table (following-char)))
444 (cond
445 ((eq token 'space)
446 (skip-chars-forward " \013\n\r\t" ebnf-limit)
447 (< (point) ebnf-limit))
448 ((eq token 'comment)
449 (ebnf-bnf-skip-comment))
450 ((eq token 'form-feed)
451 (forward-char)
452 (setq ebnf-action 'form-feed))
453 (t nil)
454 )))
455 (setq ebnf-default-p nil)
456 (cond
457 ;; end of input
458 ((>= (point) ebnf-limit)
459 'end-of-input)
460 ;; error
461 ((eq token 'error)
462 (error "Invalid character"))
463 ;; default
464 ((eq token 'default)
465 (forward-char)
466 (if (memq (aref ebnf-bnf-token-table (following-char))
467 '(terminal non-terminal special))
468 (prog1
469 (ebnf-bnf-lex)
470 (setq ebnf-default-p t))
471 (error "Invalid `default' element")))
472 ;; integer
473 ((eq token 'integer)
474 (setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
475 'integer)
476 ;; special: ?special?
477 ((eq token 'special)
478 (setq ebnf-bnf-lex (concat (and ebnf-special-show-delimiter "?")
479 (ebnf-string " ->@-~" ?\? "special")
480 (and ebnf-special-show-delimiter "?")))
481 'special)
482 ;; terminal: "string"
483 ((eq token 'terminal)
484 (setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
485 'terminal)
486 ;; non-terminal or terminal
487 ((eq token 'non-terminal)
488 (setq ebnf-bnf-lex (ebnf-buffer-substring ebnf-bnf-non-terminal-chars))
489 (let ((case-fold-search ebnf-case-fold-search)
490 match)
491 (if (and ebnf-terminal-regexp
492 (setq match (string-match ebnf-terminal-regexp
493 ebnf-bnf-lex))
494 (zerop match)
495 (= (match-end 0) (length ebnf-bnf-lex)))
496 'terminal
497 'non-terminal)))
498 ;; end of list: }+, }*, }
499 ((eq token 'end-list)
500 (forward-char)
501 (cond
502 ((= (following-char) ?+)
503 (forward-char)
504 'end-one-or-more)
505 ((= (following-char) ?*)
506 (forward-char)
507 'end-zero-or-more)
508 (t
509 'end-zero-or-more)
510 ))
511 ;; alternative: |, ||
512 ((eq token 'alternative)
513 (forward-char)
514 (if (/= (following-char) ?|)
515 'alternative
516 (forward-char)
517 'list-separator))
518 ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
519 (t
520 (forward-char)
521 token)
522 ))))
523
524
525 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
526 (defconst ebnf-bnf-comment-chars
527 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
528
529
530 (defun ebnf-bnf-skip-comment ()
531 (forward-char)
532 (cond
533 ;; open EPS file
534 ((and ebnf-eps-executing (= (following-char) ?\[))
535 (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
536 ;; close EPS file
537 ((and ebnf-eps-executing (= (following-char) ?\]))
538 (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
539 ;; EPS header
540 ((and ebnf-eps-executing (= (following-char) ?H))
541 (ebnf-eps-header-comment (ebnf-bnf-eps-filename)))
542 ;; EPS footer
543 ((and ebnf-eps-executing (= (following-char) ?F))
544 (ebnf-eps-footer-comment (ebnf-bnf-eps-filename)))
545 ;; any other action in comment
546 (t
547 (setq ebnf-action (aref ebnf-comment-table (following-char)))
548 (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
549 )
550 ;; check for a valid end of comment
551 (cond ((>= (point) ebnf-limit)
552 nil)
553 ((= (following-char) ?\n)
554 (forward-char)
555 t)
556 (t
557 (error "Invalid character"))
558 ))
559
560
561 (defun ebnf-bnf-eps-filename ()
562 (forward-char)
563 (ebnf-buffer-substring ebnf-bnf-comment-chars))
564
565
566 (defun ebnf-unescape-string (str)
567 (let* ((len (length str))
568 (size (1- len))
569 (istr 0)
570 (n-esc 0))
571 ;; count number of escapes
572 (while (< istr size)
573 (setq istr (+ istr
574 (if (= (aref str istr) ?\\)
575 (progn
576 (setq n-esc (1+ n-esc))
577 2)
578 1))))
579 (if (zerop n-esc)
580 ;; no escapes
581 str
582 ;; at least one escape
583 (let ((new (make-string (- len n-esc) ?\ ))
584 (inew 0))
585 ;; eliminate all escapes
586 (setq istr 0)
587 (while (> n-esc 0)
588 (and (= (aref str istr) ?\\)
589 (setq istr (1+ istr)
590 n-esc (1- n-esc)))
591 (aset new inew (aref str istr))
592 (setq inew (1+ inew)
593 istr (1+ istr)))
594 ;; remaining string has no escape
595 (while (< istr len)
596 (aset new inew (aref str istr))
597 (setq inew (1+ inew)
598 istr (1+ istr)))
599 new))))
600
601 \f
602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
603
604
605 (provide 'ebnf-bnf)
606
607
608 ;;; ebnf-bnf.el ends here