1 ;;; ebnf-abn.el --- parser for ABNF (Augmented BNF)
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 ;; Free Software Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Keywords: wp, ebnf, PostScript
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software: you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation, either version 3 of the License, or
17 ;; (at your option) any later version.
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
29 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;; This is part of ebnf2ps package.
34 ;; This package defines a parser for ABNF (Augmented BNF).
36 ;; See ebnf2ps.el for documentation.
43 ;; `http://www.ietf.org/rfc/rfc2234.txt'
45 ;; `http://www.faqs.org/rfcs/rfc2234.html'
47 ;; `http://www.rnp.br/ietf/rfc/rfc2234.txt'
48 ;; ("Augmented BNF for Syntax Specifications: ABNF").
51 ;; rulelist = 1*( rule / (*c-wsp c-nl) )
53 ;; rule = rulename defined-as elements c-nl
54 ;; ; continues if next line starts with white space
56 ;; rulename = ALPHA *(ALPHA / DIGIT / "-")
58 ;; defined-as = *c-wsp ("=" / "=/") *c-wsp
59 ;; ; basic rules definition and incremental
62 ;; elements = alternation *c-wsp
64 ;; c-wsp = WSP / (c-nl WSP)
66 ;; c-nl = comment / CRLF
67 ;; ; comment or newline
69 ;; comment = ";" *(WSP / VCHAR) CRLF
71 ;; alternation = concatenation
72 ;; *(*c-wsp "/" *c-wsp concatenation)
74 ;; concatenation = repetition *(1*c-wsp repetition)
76 ;; repetition = [repeat] element
78 ;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
80 ;; element = rulename / group / option /
81 ;; char-val / num-val / prose-val
83 ;; group = "(" *c-wsp alternation *c-wsp ")"
85 ;; option = "[" *c-wsp alternation *c-wsp "]"
87 ;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
88 ;; ; quoted string of SP and VCHAR without DQUOTE
90 ;; num-val = "%" (bin-val / dec-val / hex-val)
92 ;; bin-val = "b" 1*BIT
93 ;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
94 ;; ; series of concatenated bit values
95 ;; ; or single ONEOF range
97 ;; dec-val = "d" 1*DIGIT
98 ;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
100 ;; hex-val = "x" 1*HEXDIG
101 ;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
103 ;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
104 ;; ; bracketed string of SP and VCHAR without
106 ;; ; prose description, to be used as last resort
108 ;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII
110 ;; ALPHA = %x41-5A / %x61-7A
116 ;; ; any 7-bit US-ASCII character, excluding NUL
122 ;; ; Internet standard newline
124 ;; CTL = %x00-1F / %x7F
131 ;; ; " (Double Quote)
133 ;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
141 ;; LWSP = *(WSP / CRLF WSP)
142 ;; ; linear white space (past newline)
151 ;; ; visible (printing) characters
159 ;; 1. Rules name and terminal strings are case INSENSITIVE.
160 ;; So, the following rule names are all equals:
161 ;; Rule-name, rule-Name, rule-name, RULE-NAME
162 ;; Also, the following strings are equals:
163 ;; "abc", "ABC", "aBc", "Abc", "aBC", etc.
165 ;; 2. To have a case SENSITIVE string, use the character notation.
166 ;; For example, to specify the lowercase string "abc", use:
169 ;; 3. There are no implicit spaces between elements, for example, the
176 ;; mumble = foo bar foo
178 ;; Are equivalent to the following rule:
180 ;; mumble = %x61.62.61
182 ;; If spaces are needed, it should be explicit specified, like:
184 ;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs
186 ;; mumble = foo spaces bar spaces foo
188 ;; 4. Lines starting with space or tab are considered a continuation line.
189 ;; For example, the rule:
199 ;; Differences Between ABNF And ebnf2ps ABNF
200 ;; -----------------------------------------
202 ;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the
203 ;; underscore (_) for rule name and european 8-bit accentuated characters (from
204 ;; \240 to \377) for rule name, string and comment.
207 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
215 (defvar ebnf-abn-lex nil
216 "Value returned by `ebnf-abn-lex' function.")
219 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
220 ;; Syntactic analyzer
223 ;;; rulelist = 1*( rule / (*c-wsp c-nl) )
225 (defun ebnf-abn-parser (start)
227 (let ((total (+ (- ebnf-limit start) 1))
230 rule-list token rule)
232 (setq token (ebnf-abn-lex))
233 (and (eq token 'end-of-input)
234 (error "Invalid ABNF file format"))
235 (and (eq token 'end-of-rule)
236 (setq token (ebnf-abn-lex)))
237 (while (not (eq token 'end-of-input))
240 (/ (* (- (point) bias) 100.0) total))
241 (setq token (ebnf-abn-rule token)
244 (or (ebnf-add-empty-rule-list rule)
245 (setq rule-list (cons rule rule-list))))
250 ;;; rule = rulename defined-as elements c-nl
251 ;;; ; continues if next line starts with white space
253 ;;; rulename = ALPHA *(ALPHA / DIGIT / "-")
255 ;;; defined-as = *c-wsp ("=" / "=/") *c-wsp
256 ;;; ; basic rules definition and incremental
259 ;;; elements = alternation *c-wsp
261 ;;; c-wsp = WSP / (c-nl WSP)
263 ;;; c-nl = comment / CRLF
264 ;;; ; comment or newline
266 ;;; comment = ";" *(WSP / VCHAR) CRLF
269 (defun ebnf-abn-rule (token)
270 (let ((name ebnf-abn-lex)
273 (setq ebnf-action nil)
274 (or (eq token 'non-terminal)
275 (error "Invalid rule name"))
276 (setq token (ebnf-abn-lex))
277 (or (memq token '(equal incremental-alternative))
278 (error "Invalid rule: missing `=' or `=/'"))
279 (and (eq token 'incremental-alternative)
280 (setq name (concat name " =/")))
281 (setq elements (ebnf-abn-alternation))
282 (or (memq (car elements) '(end-of-rule end-of-input))
283 (error "Invalid rule: there is no end of rule"))
284 (setq elements (cdr elements))
285 (ebnf-eps-add-production name)
287 (ebnf-make-production name elements action))))
290 ;;; alternation = concatenation
291 ;;; *(*c-wsp "/" *c-wsp concatenation)
294 (defun ebnf-abn-alternation ()
295 (let (body concatenation)
296 (while (eq (car (setq concatenation
297 (ebnf-abn-concatenation (ebnf-abn-lex))))
299 (setq body (cons (cdr concatenation) body)))
300 (ebnf-token-alternative body concatenation)))
303 ;;; concatenation = repetition *(1*c-wsp repetition)
306 (defun ebnf-abn-concatenation (token)
307 (let ((term (ebnf-abn-repetition token))
309 (or (setq token (car term)
311 (error "Empty element"))
312 (setq seq (cons term seq))
313 (while (setq term (ebnf-abn-repetition token)
316 (setq seq (cons term seq)))
318 (ebnf-token-sequence seq))))
321 ;;; repetition = [repeat] element
323 ;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
326 (defun ebnf-abn-repetition (token)
328 ;; INTEGER [ "*" [ INTEGER ] ]
329 (when (eq token 'integer)
330 (setq lower ebnf-abn-lex
331 token (ebnf-abn-lex))
332 (or (eq token 'repeat)
335 (when (eq token 'repeat)
336 ;; only * ==> lower & upper are empty string
340 (when (eq (setq token (ebnf-abn-lex)) 'integer)
341 (setq upper ebnf-abn-lex
342 token (ebnf-abn-lex))))
343 (let ((element (ebnf-abn-element token)))
345 ;; there is a repetition
348 (error "Missing element repetition"))
349 (setq token (ebnf-abn-lex))
352 ((and (string= lower "1") (null upper))
353 (cons token (ebnf-make-one-or-more element)))
355 ((or (and (string= lower "0") (null upper))
356 (and (string= lower "") (string= upper "")))
357 (cons token (ebnf-make-zero-or-more element)))
360 (ebnf-token-repeat lower (cons token element) upper))))
361 ;; there is an element
363 (cons (ebnf-abn-lex) element))
364 ;; something that caller has to deal
366 (cons token nil))))))
369 ;;; element = rulename / group / option /
370 ;;; char-val / num-val / prose-val
372 ;;; group = "(" *c-wsp alternation *c-wsp ")"
374 ;;; option = "[" *c-wsp alternation *c-wsp "]"
376 ;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
377 ;;; ; quoted string of SP and VCHAR without DQUOTE
379 ;;; num-val = "%" (bin-val / dec-val / hex-val)
381 ;;; bin-val = "b" 1*BIT
382 ;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
383 ;;; ; series of concatenated bit values
384 ;;; ; or single ONEOF range
386 ;;; dec-val = "d" 1*DIGIT
387 ;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
389 ;;; hex-val = "x" 1*HEXDIG
390 ;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
392 ;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
393 ;;; ; bracketed string of SP and VCHAR without
395 ;;; ; prose description, to be used as last resort
398 (defun ebnf-abn-element (token)
401 ((eq token 'terminal)
402 (ebnf-make-terminal ebnf-abn-lex))
404 ((eq token 'non-terminal)
405 (ebnf-make-non-terminal ebnf-abn-lex))
407 ((eq token 'begin-group)
408 (let ((body (ebnf-abn-alternation)))
409 (or (eq (car body) 'end-group)
410 (error "Missing `)'"))
413 ((eq token 'begin-optional)
414 (let ((body (ebnf-abn-alternation)))
415 (or (eq (car body) 'end-optional)
416 (error "Missing `]'"))
417 (ebnf-token-optional (cdr body))))
424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
428 (defconst ebnf-abn-token-table (make-vector 256 'error)
429 "Vector used to map characters to a lexical token.")
432 (defun ebnf-abn-initialize ()
433 "Initialize EBNF token table."
434 ;; control character & control 8-bit character are set to `error'
437 (while (< char ?\072)
438 (aset ebnf-abn-token-table char 'integer)
439 (setq char (1+ char)))
440 ;; printable character: A-Z
442 (while (< char ?\133)
443 (aset ebnf-abn-token-table char 'non-terminal)
444 (setq char (1+ char)))
445 ;; printable character: a-z
447 (while (< char ?\173)
448 (aset ebnf-abn-token-table char 'non-terminal)
449 (setq char (1+ char)))
450 ;; European 8-bit accentuated characters:
452 (while (< char ?\400)
453 (aset ebnf-abn-token-table char 'non-terminal)
454 (setq char (1+ char)))
455 ;; Override end of line characters:
456 (aset ebnf-abn-token-table ?\n 'end-of-rule) ; [NL] linefeed
457 (aset ebnf-abn-token-table ?\r 'end-of-rule) ; [CR] carriage return
458 ;; Override space characters:
459 (aset ebnf-abn-token-table ?\013 'space) ; [VT] vertical tab
460 (aset ebnf-abn-token-table ?\t 'space) ; [HT] horizontal tab
461 (aset ebnf-abn-token-table ?\ 'space) ; [SP] space
462 ;; Override form feed character:
463 (aset ebnf-abn-token-table ?\f 'form-feed) ; [FF] form feed
464 ;; Override other lexical characters:
465 (aset ebnf-abn-token-table ?< 'non-terminal)
466 (aset ebnf-abn-token-table ?% 'terminal)
467 (aset ebnf-abn-token-table ?\" 'terminal)
468 (aset ebnf-abn-token-table ?\( 'begin-group)
469 (aset ebnf-abn-token-table ?\) 'end-group)
470 (aset ebnf-abn-token-table ?* 'repeat)
471 (aset ebnf-abn-token-table ?= 'equal)
472 (aset ebnf-abn-token-table ?\[ 'begin-optional)
473 (aset ebnf-abn-token-table ?\] 'end-optional)
474 (aset ebnf-abn-token-table ?/ 'alternative)
475 ;; Override comment character:
476 (aset ebnf-abn-token-table ?\; 'comment)))
479 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
480 (defconst ebnf-abn-non-terminal-chars
481 (ebnf-range-regexp "-_0-9A-Za-z" ?\240 ?\377))
482 (defconst ebnf-abn-non-terminal-letter-chars
483 (ebnf-range-regexp "A-Za-z" ?\240 ?\377))
486 (defun ebnf-abn-lex ()
487 "Lexical analyzer for ABNF.
489 Return a lexical token.
491 See documentation for variable `ebnf-abn-lex'."
492 (if (>= (point) ebnf-limit)
495 ;; skip spaces and comments
496 (while (if (> (following-char) 255)
500 (setq token (aref ebnf-abn-token-table (following-char)))
503 (skip-chars-forward " \013\t" ebnf-limit)
504 (< (point) ebnf-limit))
506 (ebnf-abn-skip-comment))
507 ((eq token 'form-feed)
509 (setq ebnf-action 'form-feed))
510 ((eq token 'end-of-rule)
511 (ebnf-abn-skip-end-of-rule))
516 ((>= (point) ebnf-limit)
520 (error "Invalid character"))
522 ((eq token 'end-of-rule)
526 (setq ebnf-abn-lex (ebnf-buffer-substring "0-9"))
528 ;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)?
529 ((eq token 'terminal)
531 (if (= (following-char) ?\")
533 (ebnf-abn-character)))
535 ;; non-terminal: NAME or <NAME>
536 ((eq token 'non-terminal)
537 (let ((prose-p (= (following-char) ?<)))
540 (or (looking-at ebnf-abn-non-terminal-letter-chars)
541 (error "Invalid prose value")))
543 (ebnf-buffer-substring ebnf-abn-non-terminal-chars))
545 (or (= (following-char) ?>)
546 (error "Invalid prose value"))
547 (setq ebnf-abn-lex (concat "<" ebnf-abn-lex ">"))))
552 (if (/= (following-char) ?/)
555 'incremental-alternative))
556 ;; miscellaneous: (, ), [, ], /, *
563 (defun ebnf-abn-skip-end-of-rule ()
566 ;; end of rule ==> 2 or more consecutive end of lines
567 (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
570 (skip-chars-forward " \013\t" ebnf-limit)
572 (and (= (following-char) ?\;)
573 (ebnf-abn-skip-comment))))
577 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
578 (defconst ebnf-abn-comment-chars
579 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
582 (defun ebnf-abn-skip-comment ()
586 ((and ebnf-eps-executing (= (following-char) ?\[))
587 (ebnf-eps-add-context (ebnf-abn-eps-filename)))
589 ((and ebnf-eps-executing (= (following-char) ?\]))
590 (ebnf-eps-remove-context (ebnf-abn-eps-filename)))
592 ((and ebnf-eps-executing (= (following-char) ?H))
593 (ebnf-eps-header-comment (ebnf-abn-eps-filename)))
595 ((and ebnf-eps-executing (= (following-char) ?F))
596 (ebnf-eps-footer-comment (ebnf-abn-eps-filename)))
597 ;; any other action in comment
599 (setq ebnf-action (aref ebnf-comment-table (following-char)))
600 (skip-chars-forward ebnf-abn-comment-chars ebnf-limit))
602 ;; check for a valid end of comment
603 (cond ((>= (point) ebnf-limit)
605 ((= (following-char) ?\n)
608 (error "Invalid character"))
612 (defun ebnf-abn-eps-filename ()
614 (ebnf-buffer-substring ebnf-abn-comment-chars))
617 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
618 (defconst ebnf-abn-string-chars
619 (ebnf-range-regexp " -!#-~" ?\240 ?\377))
622 (defun ebnf-abn-string ()
623 (buffer-substring-no-properties
628 (skip-chars-forward ebnf-abn-string-chars ebnf-limit)
629 (or (= (following-char) ?\")
630 (error "Missing `\"'"))
636 (defun ebnf-abn-character ()
637 ;; %[bdx]NNN((-NNN)|(.NNN)+)?
638 (buffer-substring-no-properties
642 (let* ((char (following-char))
643 (chars (cond ((or (= char ?B) (= char ?b)) "01")
644 ((or (= char ?D) (= char ?d)) "0-9")
645 ((or (= char ?X) (= char ?x)) "0-9A-Fa-f")
646 (t (error "Invalid terminal value")))))
648 (or (> (skip-chars-forward chars ebnf-limit) 0)
649 (error "Invalid terminal value"))
650 (if (= (following-char) ?-)
653 (or (> (skip-chars-forward chars ebnf-limit) 0)
654 (error "Invalid terminal value range")))
655 (while (= (following-char) ?.)
657 (or (> (skip-chars-forward chars ebnf-limit) 0)
658 (error "Invalid terminal value")))))
662 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
667 ;;; ebnf-abn.el ends here