1 ;;; ebnf-iso.el --- parser for ISO EBNF
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
4 ;; Free Software Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Time-stamp: <2004/02/22 14:24:55 vinicius>
9 ;; Keywords: wp, ebnf, PostScript
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
31 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 ;; This is part of ebnf2ps package.
36 ;; This package defines a parser for ISO EBNF.
38 ;; See ebnf2ps.el for documentation.
45 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
46 ;; ("International Standard of the ISO EBNF Notation").
49 ;; ISO EBNF = syntax rule, {syntax rule};
51 ;; syntax rule = meta identifier, '=', definition list, ';';
53 ;; definition list = single definition, {'|', single definition};
55 ;; single definition = term, {',', term};
57 ;; term = factor, ['-', exception];
59 ;; exception = factor (* without <meta identifier> *);
61 ;; factor = [integer, '*'], primary;
63 ;; primary = optional sequence | repeated sequence | special sequence
64 ;; | grouped sequence | meta identifier | terminal string
69 ;; optional sequence = '[', definition list, ']';
71 ;; repeated sequence = '{', definition list, '}';
73 ;; grouped sequence = '(', definition list, ')';
75 ;; terminal string = "'", character - "'", {character - "'"}, "'"
76 ;; | '"', character - '"', {character - '"'}, '"';
78 ;; special sequence = '?', {character - '?'}, '?';
80 ;; meta identifier = letter, { letter | decimal digit | ' ' };
82 ;; integer = decimal digit, {decimal digit};
84 ;; comment = '(*', {comment symbol}, '*)';
86 ;; comment symbol = comment (* <== NESTED COMMENT *)
87 ;; | terminal string | special sequence | character;
89 ;; letter = ? A-Z a-z ?;
91 ;; decimal digit = ? 0-9 ?;
93 ;; character = letter | decimal digit
94 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
95 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
96 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
99 ;; There is also the following alternative representation:
101 ;; STANDARD ALTERNATIVE
110 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
111 ;; -------------------------------------------------
113 ;; ISO EBNF accepts the characters given by <character> production above,
114 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
115 ;; (^L), any other characters are illegal. But ebnf2ps accepts also the
116 ;; european 8-bit accentuated characters (from \240 to \377) and underscore.
119 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
127 (defvar ebnf-iso-lex nil
128 "Value returned by `ebnf-iso-lex' function.")
131 (defvar ebnf-no-meta-identifier nil
132 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
135 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
136 ;; Syntactic analyzer
139 ;;; ISO EBNF = syntax rule, {syntax rule};
141 (defun ebnf-iso-parser (start)
143 (let ((total (+ (- ebnf-limit start) 1))
146 syntax-list token rule)
148 (setq token (ebnf-iso-lex))
149 (and (eq token 'end-of-input)
150 (error "Invalid ISO EBNF file format"))
151 (while (not (eq token 'end-of-input))
154 (/ (* (- (point) bias) 100.0) total))
155 (setq token (ebnf-iso-syntax-rule token)
158 (or (ebnf-add-empty-rule-list rule)
159 (setq syntax-list (cons rule syntax-list))))
164 ;;; syntax rule = meta identifier, '=', definition list, ';';
166 (defun ebnf-iso-syntax-rule (token)
167 (let ((header ebnf-iso-lex)
170 (setq ebnf-action nil)
171 (or (eq token 'non-terminal)
172 (error "Invalid meta identifier syntax rule"))
173 (or (eq (ebnf-iso-lex) 'equal)
174 (error "Invalid syntax rule: missing `='"))
175 (setq body (ebnf-iso-definition-list))
176 (or (eq (car body) 'period)
177 (error "Invalid syntax rule: missing `;' or `.'"))
178 (setq body (cdr body))
179 (ebnf-eps-add-production header)
181 (ebnf-make-production header body action))))
184 ;;; definition list = single definition, {'|', single definition};
186 (defun ebnf-iso-definition-list ()
188 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
190 (setq sequence (cdr sequence)
191 body (cons sequence body)))
192 (ebnf-token-alternative body sequence)))
195 ;;; single definition = term, {',', term};
197 (defun ebnf-iso-single-definition ()
198 (let (token seq term)
199 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
202 (eq token 'catenate))
203 (setq seq (cons term seq)))
209 ;; sequence with only one element
210 ((and (null term) (= (length seq) 1))
214 (ebnf-make-sequence (nreverse (cons term seq))))
218 ;;; term = factor, ['-', exception];
220 ;;; exception = factor (* without <meta identifier> *);
222 (defun ebnf-iso-term (token)
223 (let ((factor (ebnf-iso-factor token)))
224 (if (not (eq (car factor) 'except))
227 ;; factor - exception
228 (let ((ebnf-no-meta-identifier t))
229 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
232 ;;; factor = [integer, '*'], primary;
234 (defun ebnf-iso-factor (token)
235 (if (eq token 'integer)
236 (let ((times ebnf-iso-lex))
237 (or (eq (ebnf-iso-lex) 'repeat)
238 (error "Missing `*'"))
239 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
240 (ebnf-iso-primary token)))
243 ;;; primary = optional sequence | repeated sequence | special sequence
244 ;;; | grouped sequence | meta identifier | terminal string
249 ;;; optional sequence = '[', definition list, ']';
251 ;;; repeated sequence = '{', definition list, '}';
253 ;;; grouped sequence = '(', definition list, ')';
255 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
256 ;;; | '"', character - '"', {character - '"'}, '"';
258 ;;; special sequence = '?', {character - '?'}, '?';
260 ;;; meta identifier = letter, {letter | decimal digit};
262 (defun ebnf-iso-primary (token)
266 ((eq token 'terminal)
267 (ebnf-make-terminal ebnf-iso-lex))
269 ((eq token 'non-terminal)
270 (ebnf-make-non-terminal ebnf-iso-lex))
273 (ebnf-make-special ebnf-iso-lex))
275 ((eq token 'begin-group)
276 (let ((body (ebnf-iso-definition-list)))
277 (or (eq (car body) 'end-group)
278 (error "Missing `)'"))
281 ((eq token 'begin-optional)
282 (let ((body (ebnf-iso-definition-list)))
283 (or (eq (car body) 'end-optional)
284 (error "Missing `]' or `/)'"))
285 (ebnf-token-optional (cdr body))))
287 ((eq token 'begin-zero-or-more)
288 (let* ((body (ebnf-iso-definition-list))
290 (or (eq (car body) 'end-zero-or-more)
291 (error "Missing `}' or `:)'"))
292 (ebnf-make-zero-or-more repeat)))
303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
307 (defconst ebnf-iso-token-table
308 ;; control character & 8-bit character are set to `error'
309 (let ((table (make-vector 256 'error))
311 ;; printable character
312 (while (< char ?\060)
313 (aset table char 'character)
314 (setq char (1+ char)))
316 (while (< char ?\072)
317 (aset table char 'integer)
318 (setq char (1+ char)))
319 (while (< char ?\101)
320 (aset table char 'character)
321 (setq char (1+ char)))
322 ;; upper case letters:
323 (while (< char ?\133)
324 (aset table char 'non-terminal)
325 (setq char (1+ char)))
326 (while (< char ?\141)
327 (aset table char 'character)
328 (setq char (1+ char)))
329 ;; lower case letters:
330 (while (< char ?\173)
331 (aset table char 'non-terminal)
332 (setq char (1+ char)))
333 (while (< char ?\177)
334 (aset table char 'character)
335 (setq char (1+ char)))
336 ;; European 8-bit accentuated characters:
338 (while (< char ?\400)
339 (aset table char 'non-terminal)
340 (setq char (1+ char)))
341 ;; Override space characters:
342 (aset table ?\013 'space) ; [VT] vertical tab
343 (aset table ?\n 'space) ; [NL] linefeed
344 (aset table ?\r 'space) ; [CR] carriage return
345 (aset table ?\t 'space) ; [HT] horizontal tab
346 (aset table ?\ 'space) ; [SP] space
347 ;; Override form feed character:
348 (aset table ?\f 'form-feed) ; [FF] form feed
349 ;; Override other lexical characters:
350 (aset table ?_ 'non-terminal)
351 (aset table ?\" 'double-terminal)
352 (aset table ?\' 'single-terminal)
353 (aset table ?\? 'special)
354 (aset table ?* 'repeat)
355 (aset table ?, 'catenate)
356 (aset table ?- 'except)
357 (aset table ?= 'equal)
358 (aset table ?\) 'end-group)
360 "Vector used to map characters to a lexical token.")
363 (defun ebnf-iso-initialize ()
364 "Initialize ISO EBNF token table."
365 (if ebnf-iso-alternative-p
366 ;; Override alternative lexical characters:
368 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
369 (aset ebnf-iso-token-table ?\[ 'character)
370 (aset ebnf-iso-token-table ?\] 'character)
371 (aset ebnf-iso-token-table ?\{ 'character)
372 (aset ebnf-iso-token-table ?\} 'character)
373 (aset ebnf-iso-token-table ?| 'character)
374 (aset ebnf-iso-token-table ?\; 'character)
375 (aset ebnf-iso-token-table ?/ 'slash)
376 (aset ebnf-iso-token-table ?! 'alternative)
377 (aset ebnf-iso-token-table ?: 'colon)
378 (aset ebnf-iso-token-table ?. 'period))
379 ;; Override standard lexical characters:
380 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
381 (aset ebnf-iso-token-table ?\[ 'begin-optional)
382 (aset ebnf-iso-token-table ?\] 'end-optional)
383 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
384 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
385 (aset ebnf-iso-token-table ?| 'alternative)
386 (aset ebnf-iso-token-table ?\; 'period)
387 (aset ebnf-iso-token-table ?/ 'character)
388 (aset ebnf-iso-token-table ?! 'character)
389 (aset ebnf-iso-token-table ?: 'character)
390 (aset ebnf-iso-token-table ?. 'character)))
393 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
394 (defconst ebnf-iso-non-terminal-chars
395 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
398 (defun ebnf-iso-lex ()
399 "Lexical analyser for ISO EBNF.
401 Return a lexical token.
403 See documentation for variable `ebnf-iso-lex'."
404 (if (>= (point) ebnf-limit)
407 ;; skip spaces and comments
408 (while (if (> (following-char) 255)
412 (setq token (aref ebnf-iso-token-table (following-char)))
415 (skip-chars-forward " \013\n\r\t" ebnf-limit)
416 (< (point) ebnf-limit))
417 ((or (eq token 'begin-parenthesis)
418 (eq token 'left-parenthesis))
420 (if (/= (following-char) ?*)
424 (ebnf-iso-skip-comment)
426 ((eq token 'form-feed)
428 (setq ebnf-action 'form-feed))
433 ((>= (point) ebnf-limit)
437 (error "Illegal character"))
440 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
442 ;; special: ?special?
444 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
445 (ebnf-string " ->@-~" ?\? "special")
446 (and ebnf-special-show-delimiter "?")))
448 ;; terminal: "string"
449 ((eq token 'double-terminal)
450 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
452 ;; terminal: 'string'
453 ((eq token 'single-terminal)
454 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
457 ((eq token 'non-terminal)
461 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
462 (and ebnf-no-meta-identifier
463 (error "Exception sequence should not contain a meta identifier"))
465 ;; begin optional, begin list or begin group
466 ((eq token 'left-parenthesis)
468 (cond ((= (following-char) ?/)
471 ((= (following-char) ?:)
477 ;; end optional or alternative
480 (if (/= (following-char) ?\))
487 (if (/= (following-char) ?\))
492 ((eq token 'begin-parenthesis)
501 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
502 (defconst ebnf-iso-comment-chars
503 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
506 (defun ebnf-iso-skip-comment ()
510 ((and ebnf-eps-executing (= (following-char) ?\[))
511 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
513 ((and ebnf-eps-executing (= (following-char) ?\]))
514 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
515 ;; any other action in comment
517 (setq ebnf-action (aref ebnf-comment-table (following-char))))
521 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
522 (cond ((>= (point) ebnf-limit)
523 (error "Missing end of comment: `*)'"))
524 ((= (following-char) ?*)
525 (skip-chars-forward "*" ebnf-limit)
526 (when (= (following-char) ?\))
529 (setq pair (1- pair))))
530 ((= (following-char) ?\()
531 (skip-chars-forward "(" ebnf-limit)
532 (when (= (following-char) ?*)
533 ;; beginning of comment
535 (setq pair (1+ pair))))
537 (error "Illegal character"))
541 (defun ebnf-iso-eps-filename ()
543 (buffer-substring-no-properties
545 (let ((chars (concat ebnf-iso-comment-chars "\n"))
548 (skip-chars-forward chars ebnf-limit)
550 (cond ((>= (point) ebnf-limit)
552 ((= (following-char) ?*)
553 (skip-chars-forward "*" ebnf-limit)
554 (if (/= (following-char) ?\))
558 ((= (following-char) ?\()
560 (if (/= (following-char) ?*)
570 (defun ebnf-iso-normalize (str)
571 (if (not ebnf-iso-normalize-p)
573 (let ((len (length str))
576 ;; count exceeding spaces
578 (if (/= (aref str stri) ?\ )
579 (setq stri (1+ stri))
580 (setq stri (1+ stri))
581 (while (and (< stri len) (= (aref str stri) ?\ ))
583 spaces (1+ spaces)))))
585 ;; no exceeding space
587 ;; at least one exceeding space
588 (let ((new (make-string (- len spaces) ?\ ))
590 ;; eliminate exceeding spaces
593 (if (/= (aref str stri) ?\ )
595 (aset new newi (aref str stri))
598 (aset new newi (aref str stri))
601 (while (and (> spaces 0) (= (aref str stri) ?\ ))
603 spaces (1- spaces)))))
604 ;; remaining is normalized
606 (aset new newi (aref str stri))
612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
618 ;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
619 ;;; ebnf-iso.el ends here