1 ;;; ebnf-iso.el --- parser for ISO EBNF
3 ;; Copyright (C) 1999-2012 Free Software Foundation, Inc.
5 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Keywords: wp, ebnf, PostScript
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;; This is part of ebnf2ps package.
33 ;; This package defines a parser for ISO EBNF.
35 ;; See ebnf2ps.el for documentation.
42 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
43 ;; ("International Standard of the ISO EBNF Notation").
46 ;; ISO EBNF = syntax rule, {syntax rule};
48 ;; syntax rule = meta identifier, '=', definition list, ';';
50 ;; definition list = single definition, {'|', single definition};
52 ;; single definition = term, {',', term};
54 ;; term = factor, ['-', exception];
56 ;; exception = factor (* without <meta identifier> *);
58 ;; factor = [integer, '*'], primary;
60 ;; primary = optional sequence | repeated sequence | special sequence
61 ;; | grouped sequence | meta identifier | terminal string
66 ;; optional sequence = '[', definition list, ']';
68 ;; repeated sequence = '{', definition list, '}';
70 ;; grouped sequence = '(', definition list, ')';
72 ;; terminal string = "'", character - "'", {character - "'"}, "'"
73 ;; | '"', character - '"', {character - '"'}, '"';
75 ;; special sequence = '?', {character - '?'}, '?';
77 ;; meta identifier = letter, { letter | decimal digit | ' ' };
79 ;; integer = decimal digit, {decimal digit};
81 ;; comment = '(*', {comment symbol}, '*)';
83 ;; comment symbol = comment (* <== NESTED COMMENT *)
84 ;; | terminal string | special sequence | character;
86 ;; letter = ? A-Z a-z ?;
88 ;; decimal digit = ? 0-9 ?;
90 ;; character = letter | decimal digit
91 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
92 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
93 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
96 ;; There is also the following alternative representation:
98 ;; STANDARD ALTERNATIVE
107 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
108 ;; -------------------------------------------------
110 ;; ISO EBNF accepts the characters given by <character> production above,
111 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
112 ;; (^L), any other characters are invalid. But ebnf2ps accepts also the
113 ;; european 8-bit accentuated characters (from \240 to \377) and underscore
117 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
125 (defvar ebnf-iso-lex nil
126 "Value returned by `ebnf-iso-lex' function.")
129 (defvar ebnf-no-meta-identifier nil
130 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
133 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
134 ;; Syntactic analyzer
137 ;;; ISO EBNF = syntax rule, {syntax rule};
139 (defun ebnf-iso-parser (start)
141 (let ((total (+ (- ebnf-limit start) 1))
144 syntax-list token rule)
146 (setq token (ebnf-iso-lex))
147 (and (eq token 'end-of-input)
148 (error "Invalid ISO EBNF file format"))
149 (while (not (eq token 'end-of-input))
152 (/ (* (- (point) bias) 100.0) total))
153 (setq token (ebnf-iso-syntax-rule token)
156 (or (ebnf-add-empty-rule-list rule)
157 (setq syntax-list (cons rule syntax-list))))
162 ;;; syntax rule = meta identifier, '=', definition list, ';';
164 (defun ebnf-iso-syntax-rule (token)
165 (let ((header ebnf-iso-lex)
168 (setq ebnf-action nil)
169 (or (eq token 'non-terminal)
170 (error "Invalid meta identifier syntax rule"))
171 (or (eq (ebnf-iso-lex) 'equal)
172 (error "Invalid syntax rule: missing `='"))
173 (setq body (ebnf-iso-definition-list))
174 (or (eq (car body) 'period)
175 (error "Invalid syntax rule: missing `;' or `.'"))
176 (setq body (cdr body))
177 (ebnf-eps-add-production header)
179 (ebnf-make-production header body action))))
182 ;;; definition list = single definition, {'|', single definition};
184 (defun ebnf-iso-definition-list ()
186 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
188 (setq sequence (cdr sequence)
189 body (cons sequence body)))
190 (ebnf-token-alternative body sequence)))
193 ;;; single definition = term, {',', term};
195 (defun ebnf-iso-single-definition ()
196 (let (token seq term)
197 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
200 (eq token 'catenate))
201 (setq seq (cons term seq)))
203 (ebnf-token-sequence (if term
208 ;;; term = factor, ['-', exception];
210 ;;; exception = factor (* without <meta identifier> *);
212 (defun ebnf-iso-term (token)
213 (let ((factor (ebnf-iso-factor token)))
214 (if (not (eq (car factor) 'except))
217 ;; factor - exception
218 (let ((ebnf-no-meta-identifier t))
219 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
222 ;;; factor = [integer, '*'], primary;
224 (defun ebnf-iso-factor (token)
225 (if (eq token 'integer)
226 (let ((times ebnf-iso-lex))
227 (or (eq (ebnf-iso-lex) 'repeat)
228 (error "Missing `*'"))
229 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
230 (ebnf-iso-primary token)))
233 ;;; primary = optional sequence | repeated sequence | special sequence
234 ;;; | grouped sequence | meta identifier | terminal string
239 ;;; optional sequence = '[', definition list, ']';
241 ;;; repeated sequence = '{', definition list, '}';
243 ;;; grouped sequence = '(', definition list, ')';
245 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
246 ;;; | '"', character - '"', {character - '"'}, '"';
248 ;;; special sequence = '?', {character - '?'}, '?';
250 ;;; meta identifier = letter, {letter | decimal digit};
252 (defun ebnf-iso-primary (token)
256 ((eq token 'terminal)
257 (ebnf-make-terminal ebnf-iso-lex))
259 ((eq token 'non-terminal)
260 (ebnf-make-non-terminal ebnf-iso-lex))
263 (ebnf-make-special ebnf-iso-lex))
265 ((eq token 'begin-group)
266 (let ((body (ebnf-iso-definition-list)))
267 (or (eq (car body) 'end-group)
268 (error "Missing `)'"))
271 ((eq token 'begin-optional)
272 (let ((body (ebnf-iso-definition-list)))
273 (or (eq (car body) 'end-optional)
274 (error "Missing `]' or `/)'"))
275 (ebnf-token-optional (cdr body))))
277 ((eq token 'begin-zero-or-more)
278 (let* ((body (ebnf-iso-definition-list))
280 (or (eq (car body) 'end-zero-or-more)
281 (error "Missing `}' or `:)'"))
282 (ebnf-make-zero-or-more repeat)))
293 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
297 (defconst ebnf-iso-token-table
298 ;; control character & 8-bit character are set to `error'
299 (let ((table (make-vector 256 'error))
301 ;; printable character
302 (while (< char ?\060)
303 (aset table char 'character)
304 (setq char (1+ char)))
306 (while (< char ?\072)
307 (aset table char 'integer)
308 (setq char (1+ char)))
309 (while (< char ?\101)
310 (aset table char 'character)
311 (setq char (1+ char)))
312 ;; upper case letters:
313 (while (< char ?\133)
314 (aset table char 'non-terminal)
315 (setq char (1+ char)))
316 (while (< char ?\141)
317 (aset table char 'character)
318 (setq char (1+ char)))
319 ;; lower case letters:
320 (while (< char ?\173)
321 (aset table char 'non-terminal)
322 (setq char (1+ char)))
323 (while (< char ?\177)
324 (aset table char 'character)
325 (setq char (1+ char)))
326 ;; European 8-bit accentuated characters:
328 (while (< char ?\400)
329 (aset table char 'non-terminal)
330 (setq char (1+ char)))
331 ;; Override space characters:
332 (aset table ?\013 'space) ; [VT] vertical tab
333 (aset table ?\n 'space) ; [NL] linefeed
334 (aset table ?\r 'space) ; [CR] carriage return
335 (aset table ?\t 'space) ; [HT] horizontal tab
336 (aset table ?\ 'space) ; [SP] space
337 ;; Override form feed character:
338 (aset table ?\f 'form-feed) ; [FF] form feed
339 ;; Override other lexical characters:
340 (aset table ?_ 'non-terminal)
341 (aset table ?\" 'double-terminal)
342 (aset table ?\' 'single-terminal)
343 (aset table ?\? 'special)
344 (aset table ?* 'repeat)
345 (aset table ?, 'catenate)
346 (aset table ?- 'except)
347 (aset table ?= 'equal)
348 (aset table ?\) 'end-group)
350 "Vector used to map characters to a lexical token.")
353 (defun ebnf-iso-initialize ()
354 "Initialize ISO EBNF token table."
355 (if ebnf-iso-alternative-p
356 ;; Override alternative lexical characters:
358 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
359 (aset ebnf-iso-token-table ?\[ 'character)
360 (aset ebnf-iso-token-table ?\] 'character)
361 (aset ebnf-iso-token-table ?\{ 'character)
362 (aset ebnf-iso-token-table ?\} 'character)
363 (aset ebnf-iso-token-table ?| 'character)
364 (aset ebnf-iso-token-table ?\; 'character)
365 (aset ebnf-iso-token-table ?/ 'slash)
366 (aset ebnf-iso-token-table ?! 'alternative)
367 (aset ebnf-iso-token-table ?: 'colon)
368 (aset ebnf-iso-token-table ?. 'period))
369 ;; Override standard lexical characters:
370 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
371 (aset ebnf-iso-token-table ?\[ 'begin-optional)
372 (aset ebnf-iso-token-table ?\] 'end-optional)
373 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
374 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
375 (aset ebnf-iso-token-table ?| 'alternative)
376 (aset ebnf-iso-token-table ?\; 'period)
377 (aset ebnf-iso-token-table ?/ 'character)
378 (aset ebnf-iso-token-table ?! 'character)
379 (aset ebnf-iso-token-table ?: 'character)
380 (aset ebnf-iso-token-table ?. 'character)))
383 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
384 (defconst ebnf-iso-non-terminal-chars
385 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
388 (defun ebnf-iso-lex ()
389 "Lexical analyzer for ISO EBNF.
391 Return a lexical token.
393 See documentation for variable `ebnf-iso-lex'."
394 (if (>= (point) ebnf-limit)
397 ;; skip spaces and comments
398 (while (if (> (following-char) 255)
402 (setq token (aref ebnf-iso-token-table (following-char)))
405 (skip-chars-forward " \013\n\r\t" ebnf-limit)
406 (< (point) ebnf-limit))
407 ((or (eq token 'begin-parenthesis)
408 (eq token 'left-parenthesis))
410 (if (/= (following-char) ?*)
414 (ebnf-iso-skip-comment)
416 ((eq token 'form-feed)
418 (setq ebnf-action 'form-feed))
423 ((>= (point) ebnf-limit)
427 (error "Invalid character"))
430 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
432 ;; special: ?special?
434 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
435 (ebnf-string " ->@-~" ?\? "special")
436 (and ebnf-special-show-delimiter "?")))
438 ;; terminal: "string"
439 ((eq token 'double-terminal)
440 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
442 ;; terminal: 'string'
443 ((eq token 'single-terminal)
444 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
447 ((eq token 'non-terminal)
451 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
452 (and ebnf-no-meta-identifier
453 (error "Exception sequence should not contain a meta identifier"))
455 ;; begin optional, begin list or begin group
456 ((eq token 'left-parenthesis)
458 (cond ((= (following-char) ?/)
461 ((= (following-char) ?:)
467 ;; end optional or alternative
470 (if (/= (following-char) ?\))
477 (if (/= (following-char) ?\))
482 ((eq token 'begin-parenthesis)
491 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
492 (defconst ebnf-iso-comment-chars
493 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
496 (defun ebnf-iso-skip-comment ()
500 ((and ebnf-eps-executing (= (following-char) ?\[))
501 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
503 ((and ebnf-eps-executing (= (following-char) ?\]))
504 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
506 ((and ebnf-eps-executing (= (following-char) ?H))
507 (ebnf-eps-header-comment (ebnf-iso-eps-filename)))
509 ((and ebnf-eps-executing (= (following-char) ?F))
510 (ebnf-eps-footer-comment (ebnf-iso-eps-filename)))
511 ;; any other action in comment
513 (setq ebnf-action (aref ebnf-comment-table (following-char))))
517 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
518 (cond ((>= (point) ebnf-limit)
519 (error "Missing end of comment: `*)'"))
520 ((= (following-char) ?*)
521 (skip-chars-forward "*" ebnf-limit)
522 (when (= (following-char) ?\))
525 (setq pair (1- pair))))
526 ((= (following-char) ?\()
527 (skip-chars-forward "(" ebnf-limit)
528 (when (= (following-char) ?*)
529 ;; beginning of comment
531 (setq pair (1+ pair))))
533 (error "Invalid character"))
537 (defun ebnf-iso-eps-filename ()
539 (buffer-substring-no-properties
541 (let ((chars (concat ebnf-iso-comment-chars "\n"))
544 (skip-chars-forward chars ebnf-limit)
546 (cond ((>= (point) ebnf-limit)
548 ((= (following-char) ?*)
549 (skip-chars-forward "*" ebnf-limit)
550 (if (/= (following-char) ?\))
554 ((= (following-char) ?\()
556 (if (/= (following-char) ?*)
566 (defun ebnf-iso-normalize (str)
567 (if (not ebnf-iso-normalize-p)
569 (let ((len (length str))
572 ;; count exceeding spaces
574 (if (/= (aref str stri) ?\ )
575 (setq stri (1+ stri))
576 (setq stri (1+ stri))
577 (while (and (< stri len) (= (aref str stri) ?\ ))
579 spaces (1+ spaces)))))
581 ;; no exceeding space
583 ;; at least one exceeding space
584 (let ((new (make-string (- len spaces) ?\ ))
586 ;; eliminate exceeding spaces
589 (if (/= (aref str stri) ?\ )
591 (aset new newi (aref str stri))
594 (aset new newi (aref str stri))
597 (while (and (> spaces 0) (= (aref str stri) ?\ ))
599 spaces (1- spaces)))))
600 ;; remaining is normalized
602 (aset new newi (aref str stri))
608 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
614 ;;; ebnf-iso.el ends here