1 ;;; ebnf-abn.el --- parser for ABNF (Augmented BNF)
3 ;; Copyright (C) 2004 Free Sofware Foundation, Inc.
5 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Time-stamp: <2004/02/28 17:40:41 vinicius>
8 ;; Keywords: wp, ebnf, PostScript
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33 ;; This is part of ebnf2ps package.
35 ;; This package defines a parser for ABNF (Augmented BNF).
37 ;; See ebnf2ps.el for documentation.
44 ;; `http://www.ietf.org/rfc/rfc2234.txt'
46 ;; `http://www.faqs.org/rfcs/rfc2234.html'
48 ;; `http://www.rnp.br/ietf/rfc/rfc2234.txt'
49 ;; ("Augmented BNF for Syntax Specifications: ABNF").
52 ;; rulelist = 1*( rule / (*c-wsp c-nl) )
54 ;; rule = rulename defined-as elements c-nl
55 ;; ; continues if next line starts with white space
57 ;; rulename = ALPHA *(ALPHA / DIGIT / "-")
59 ;; defined-as = *c-wsp ("=" / "=/") *c-wsp
60 ;; ; basic rules definition and incremental
63 ;; elements = alternation *c-wsp
65 ;; c-wsp = WSP / (c-nl WSP)
67 ;; c-nl = comment / CRLF
68 ;; ; comment or newline
70 ;; comment = ";" *(WSP / VCHAR) CRLF
72 ;; alternation = concatenation
73 ;; *(*c-wsp "/" *c-wsp concatenation)
75 ;; concatenation = repetition *(1*c-wsp repetition)
77 ;; repetition = [repeat] element
79 ;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
81 ;; element = rulename / group / option /
82 ;; char-val / num-val / prose-val
84 ;; group = "(" *c-wsp alternation *c-wsp ")"
86 ;; option = "[" *c-wsp alternation *c-wsp "]"
88 ;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
89 ;; ; quoted string of SP and VCHAR without DQUOTE
91 ;; num-val = "%" (bin-val / dec-val / hex-val)
93 ;; bin-val = "b" 1*BIT
94 ;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
95 ;; ; series of concatenated bit values
96 ;; ; or single ONEOF range
98 ;; dec-val = "d" 1*DIGIT
99 ;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
101 ;; hex-val = "x" 1*HEXDIG
102 ;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
104 ;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
105 ;; ; bracketed string of SP and VCHAR without
107 ;; ; prose description, to be used as last resort
109 ;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII
111 ;; ALPHA = %x41-5A / %x61-7A
117 ;; ; any 7-bit US-ASCII character, excluding NUL
123 ;; ; Internet standard newline
125 ;; CTL = %x00-1F / %x7F
132 ;; ; " (Double Quote)
134 ;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
142 ;; LWSP = *(WSP / CRLF WSP)
143 ;; ; linear white space (past newline)
152 ;; ; visible (printing) characters
160 ;; 1. Rules name and terminal strings are case INSENSITIVE.
161 ;; So, the following rule names are all equals:
162 ;; Rule-name, rule-Name, rule-name, RULE-NAME
163 ;; Also, the following strings are equals:
164 ;; "abc", "ABC", "aBc", "Abc", "aBC", etc.
166 ;; 2. To have a case SENSITIVE string, use the character notation.
167 ;; For example, to specify the lowercase string "abc", use:
170 ;; 3. There are no implicit spaces between elements, for example, the
177 ;; mumble = foo bar foo
179 ;; Are equivalent to the following rule:
181 ;; mumble = %x61.62.61
183 ;; If spaces are needed, it should be explicit specified, like:
185 ;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs
187 ;; mumble = foo spaces bar spaces foo
189 ;; 4. Lines starting with space or tab are considered a continuation line.
190 ;; For example, the rule:
200 ;; Differences Between ABNF And ebnf2ps ABNF
201 ;; -----------------------------------------
203 ;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the
204 ;; underscore (_) for rule name and european 8-bit accentuated characters (from
205 ;; \240 to \377) for rule name, string and comment.
208 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
216 (defvar ebnf-abn-lex nil
217 "Value returned by `ebnf-abn-lex' function.")
220 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
221 ;; Syntactic analyzer
224 ;;; rulelist = 1*( rule / (*c-wsp c-nl) )
226 (defun ebnf-abn-parser (start)
228 (let ((total (+ (- ebnf-limit start) 1))
231 rule-list token rule)
233 (setq token (ebnf-abn-lex))
234 (and (eq token 'end-of-input)
235 (error "Invalid ABNF file format"))
236 (while (not (eq token 'end-of-input))
239 (/ (* (- (point) bias) 100.0) total))
240 (setq token (ebnf-abn-rule token)
243 (or (ebnf-add-empty-rule-list rule)
244 (setq rule-list (cons rule rule-list))))
249 ;;; rule = rulename defined-as elements c-nl
250 ;;; ; continues if next line starts with white space
252 ;;; rulename = ALPHA *(ALPHA / DIGIT / "-")
254 ;;; defined-as = *c-wsp ("=" / "=/") *c-wsp
255 ;;; ; basic rules definition and incremental
258 ;;; elements = alternation *c-wsp
260 ;;; c-wsp = WSP / (c-nl WSP)
262 ;;; c-nl = comment / CRLF
263 ;;; ; comment or newline
265 ;;; comment = ";" *(WSP / VCHAR) CRLF
268 (defun ebnf-abn-rule (token)
269 (let ((name ebnf-abn-lex)
272 (setq ebnf-action nil)
273 (or (eq token 'non-terminal)
274 (error "Invalid rule name"))
275 (setq token (ebnf-abn-lex))
276 (or (memq token '(equal incremental-alternative))
277 (error "Invalid rule: missing `=' or `=/'"))
278 (and (eq token 'incremental-alternative)
279 (setq name (concat name " =/")))
280 (setq elements (ebnf-abn-alternation))
281 (or (memq (car elements) '(end-of-rule end-of-input))
282 (error "Invalid rule: there is no end of rule"))
283 (setq elements (cdr elements))
284 (ebnf-eps-add-production name)
286 (ebnf-make-production name elements action))))
289 ;;; alternation = concatenation
290 ;;; *(*c-wsp "/" *c-wsp concatenation)
293 (defun ebnf-abn-alternation ()
294 (let (body concatenation)
295 (while (eq (car (setq concatenation
296 (ebnf-abn-concatenation (ebnf-abn-lex))))
298 (setq body (cons (cdr concatenation) body)))
299 (ebnf-token-alternative body concatenation)))
302 ;;; concatenation = repetition *(1*c-wsp repetition)
305 (defun ebnf-abn-concatenation (token)
306 (let ((term (ebnf-abn-repetition token))
308 (or (setq token (car term)
310 (error "Empty element"))
311 (setq seq (cons term seq))
312 (while (setq term (ebnf-abn-repetition token)
315 (setq seq (cons term seq)))
317 (if (= (length seq) 1)
318 ;; sequence with only one element
321 (ebnf-make-sequence (nreverse seq))))))
324 ;;; repetition = [repeat] element
326 ;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
329 (defun ebnf-abn-repetition (token)
331 ;; INTEGER [ "*" [ INTEGER ] ]
332 (when (eq token 'integer)
333 (setq lower ebnf-abn-lex
334 token (ebnf-abn-lex))
335 (or (eq token 'repeat)
338 (when (eq token 'repeat)
339 ;; only * ==> lower & upper are empty string
343 (when (eq (setq token (ebnf-abn-lex)) 'integer)
344 (setq upper ebnf-abn-lex
345 token (ebnf-abn-lex))))
346 (let ((element (ebnf-abn-element token)))
348 ;; there is a repetition
351 (error "Missing element repetition"))
352 (setq token (ebnf-abn-lex))
355 ((and (string= lower "1") (null upper))
356 (cons token (ebnf-make-one-or-more element)))
358 ((or (and (string= lower "0") (null upper))
359 (and (string= lower "") (string= upper "")))
360 (cons token (ebnf-make-zero-or-more element)))
363 (ebnf-token-repeat lower (cons token element) upper))))
364 ;; there is an element
366 (cons (ebnf-abn-lex) element))
367 ;; something that caller has to deal
369 (cons token nil))))))
372 ;;; element = rulename / group / option /
373 ;;; char-val / num-val / prose-val
375 ;;; group = "(" *c-wsp alternation *c-wsp ")"
377 ;;; option = "[" *c-wsp alternation *c-wsp "]"
379 ;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
380 ;;; ; quoted string of SP and VCHAR without DQUOTE
382 ;;; num-val = "%" (bin-val / dec-val / hex-val)
384 ;;; bin-val = "b" 1*BIT
385 ;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
386 ;;; ; series of concatenated bit values
387 ;;; ; or single ONEOF range
389 ;;; dec-val = "d" 1*DIGIT
390 ;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
392 ;;; hex-val = "x" 1*HEXDIG
393 ;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
395 ;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
396 ;;; ; bracketed string of SP and VCHAR without
398 ;;; ; prose description, to be used as last resort
401 (defun ebnf-abn-element (token)
404 ((eq token 'terminal)
405 (ebnf-make-terminal ebnf-abn-lex))
407 ((eq token 'non-terminal)
408 (ebnf-make-non-terminal ebnf-abn-lex))
410 ((eq token 'begin-group)
411 (let ((body (ebnf-abn-alternation)))
412 (or (eq (car body) 'end-group)
413 (error "Missing `)'"))
416 ((eq token 'begin-optional)
417 (let ((body (ebnf-abn-alternation)))
418 (or (eq (car body) 'end-optional)
419 (error "Missing `]'"))
420 (ebnf-token-optional (cdr body))))
427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
431 (defconst ebnf-abn-token-table (make-vector 256 'error)
432 "Vector used to map characters to a lexical token.")
435 (defun ebnf-abn-initialize ()
436 "Initialize EBNF token table."
437 ;; control character & control 8-bit character are set to `error'
440 (while (< char ?\072)
441 (aset ebnf-abn-token-table char 'integer)
442 (setq char (1+ char)))
443 ;; printable character: A-Z
445 (while (< char ?\133)
446 (aset ebnf-abn-token-table char 'non-terminal)
447 (setq char (1+ char)))
448 ;; printable character: a-z
450 (while (< char ?\173)
451 (aset ebnf-abn-token-table char 'non-terminal)
452 (setq char (1+ char)))
453 ;; European 8-bit accentuated characters:
455 (while (< char ?\400)
456 (aset ebnf-abn-token-table char 'non-terminal)
457 (setq char (1+ char)))
458 ;; Override end of line characters:
459 (aset ebnf-abn-token-table ?\n 'end-of-rule) ; [NL] linefeed
460 (aset ebnf-abn-token-table ?\r 'end-of-rule) ; [CR] carriage return
461 ;; Override space characters:
462 (aset ebnf-abn-token-table ?\013 'space) ; [VT] vertical tab
463 (aset ebnf-abn-token-table ?\t 'space) ; [HT] horizontal tab
464 (aset ebnf-abn-token-table ?\ 'space) ; [SP] space
465 ;; Override form feed character:
466 (aset ebnf-abn-token-table ?\f 'form-feed) ; [FF] form feed
467 ;; Override other lexical characters:
468 (aset ebnf-abn-token-table ?< 'non-terminal)
469 (aset ebnf-abn-token-table ?% 'terminal)
470 (aset ebnf-abn-token-table ?\" 'terminal)
471 (aset ebnf-abn-token-table ?\( 'begin-group)
472 (aset ebnf-abn-token-table ?\) 'end-group)
473 (aset ebnf-abn-token-table ?* 'repeat)
474 (aset ebnf-abn-token-table ?= 'equal)
475 (aset ebnf-abn-token-table ?\[ 'begin-optional)
476 (aset ebnf-abn-token-table ?\] 'end-optional)
477 (aset ebnf-abn-token-table ?/ 'alternative)
478 ;; Override comment character:
479 (aset ebnf-abn-token-table ?\; 'comment)))
482 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
483 (defconst ebnf-abn-non-terminal-chars
484 (ebnf-range-regexp "-_0-9A-Za-z" ?\240 ?\377))
485 (defconst ebnf-abn-non-terminal-letter-chars
486 (ebnf-range-regexp "A-Za-z" ?\240 ?\377))
489 (defun ebnf-abn-lex ()
490 "Lexical analyser for ABNF.
492 Return a lexical token.
494 See documentation for variable `ebnf-abn-lex'."
495 (if (>= (point) ebnf-limit)
498 ;; skip spaces and comments
499 (while (if (> (following-char) 255)
503 (setq token (aref ebnf-abn-token-table (following-char)))
506 (skip-chars-forward " \013\t" ebnf-limit)
507 (< (point) ebnf-limit))
509 (ebnf-abn-skip-comment))
510 ((eq token 'form-feed)
512 (setq ebnf-action 'form-feed))
513 ((eq token 'end-of-rule)
514 (ebnf-abn-skip-end-of-rule))
519 ((>= (point) ebnf-limit)
523 (error "Illegal character"))
525 ((eq token 'end-of-rule)
529 (setq ebnf-abn-lex (ebnf-buffer-substring "0-9"))
531 ;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)?
532 ((eq token 'terminal)
534 (if (= (following-char) ?\")
536 (ebnf-abn-character)))
538 ;; non-terminal: NAME or <NAME>
539 ((eq token 'non-terminal)
540 (let ((prose-p (= (following-char) ?<)))
543 (or (looking-at ebnf-abn-non-terminal-letter-chars)
544 (error "Invalid prose value")))
546 (ebnf-buffer-substring ebnf-abn-non-terminal-chars))
548 (or (= (following-char) ?>)
549 (error "Invalid prose value"))
550 (setq ebnf-abn-lex (concat "<" ebnf-abn-lex ">"))))
555 (if (/= (following-char) ?/)
558 'incremental-alternative))
559 ;; miscellaneous: (, ), [, ], /, *
566 (defun ebnf-abn-skip-end-of-rule ()
569 ;; end of rule ==> 2 or more consecutive end of lines
570 (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
573 (skip-chars-forward " \013\t" ebnf-limit)
575 (and (= (following-char) ?\;)
576 (ebnf-abn-skip-comment))))
580 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
581 (defconst ebnf-abn-comment-chars
582 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
585 (defun ebnf-abn-skip-comment ()
589 ((and ebnf-eps-executing (= (following-char) ?\[))
590 (ebnf-eps-add-context (ebnf-abn-eps-filename)))
592 ((and ebnf-eps-executing (= (following-char) ?\]))
593 (ebnf-eps-remove-context (ebnf-abn-eps-filename)))
594 ;; any other action in comment
596 (setq ebnf-action (aref ebnf-comment-table (following-char)))
597 (skip-chars-forward ebnf-abn-comment-chars ebnf-limit))
599 ;; check for a valid end of comment
600 (cond ((>= (point) ebnf-limit)
602 ((= (following-char) ?\n)
605 (error "Illegal character"))
609 (defun ebnf-abn-eps-filename ()
611 (ebnf-buffer-substring ebnf-abn-comment-chars))
614 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
615 (defconst ebnf-abn-string-chars
616 (ebnf-range-regexp " -!#-~" ?\240 ?\377))
619 (defun ebnf-abn-string ()
620 (buffer-substring-no-properties
625 (skip-chars-forward ebnf-abn-string-chars ebnf-limit)
626 (or (= (following-char) ?\")
627 (error "Missing `\"'"))
633 (defun ebnf-abn-character ()
634 ;; %[bdx]NNN((-NNN)|(.NNN)+)?
635 (buffer-substring-no-properties
639 (let* ((char (following-char))
640 (chars (cond ((or (= char ?B) (= char ?b)) "01")
641 ((or (= char ?D) (= char ?d)) "0-9")
642 ((or (= char ?X) (= char ?x)) "0-9A-Fa-f")
643 (t (error "Invalid terminal value")))))
645 (or (> (skip-chars-forward chars ebnf-limit) 0)
646 (error "Invalid terminal value"))
647 (if (= (following-char) ?-)
650 (or (> (skip-chars-forward chars ebnf-limit) 0)
651 (error "Invalid terminal value range")))
652 (while (= (following-char) ?.)
654 (or (> (skip-chars-forward chars ebnf-limit) 0)
655 (error "Invalid terminal value")))))
659 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
664 ;;; arch-tag: 8d1b3c4d-4226-4393-b9ae-b7ccf07cf779
665 ;;; ebnf-abn.el ends here