]> code.delx.au - gnu-emacs/blob - lisp/progmodes/ebnf-iso.el
(gud-tooltip-dereference): Add missing optional argument.
[gnu-emacs] / lisp / progmodes / ebnf-iso.el
1 ;;; ebnf-iso.el --- parser for ISO EBNF
2
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Time-stamp: <2004/04/03 16:48:52 vinicius>
9 ;; Keywords: wp, ebnf, PostScript
10 ;; Version: 1.8
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
28
29 ;;; Commentary:
30
31 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;;
33 ;;
34 ;; This is part of ebnf2ps package.
35 ;;
36 ;; This package defines a parser for ISO EBNF.
37 ;;
38 ;; See ebnf2ps.el for documentation.
39 ;;
40 ;;
41 ;; ISO EBNF Syntax
42 ;; ---------------
43 ;;
44 ;; See the URL:
45 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
46 ;; ("International Standard of the ISO EBNF Notation").
47 ;;
48 ;;
49 ;; ISO EBNF = syntax rule, {syntax rule};
50 ;;
51 ;; syntax rule = meta identifier, '=', definition list, ';';
52 ;;
53 ;; definition list = single definition, {'|', single definition};
54 ;;
55 ;; single definition = term, {',', term};
56 ;;
57 ;; term = factor, ['-', exception];
58 ;;
59 ;; exception = factor (* without <meta identifier> *);
60 ;;
61 ;; factor = [integer, '*'], primary;
62 ;;
63 ;; primary = optional sequence | repeated sequence | special sequence
64 ;; | grouped sequence | meta identifier | terminal string
65 ;; | empty;
66 ;;
67 ;; empty = ;
68 ;;
69 ;; optional sequence = '[', definition list, ']';
70 ;;
71 ;; repeated sequence = '{', definition list, '}';
72 ;;
73 ;; grouped sequence = '(', definition list, ')';
74 ;;
75 ;; terminal string = "'", character - "'", {character - "'"}, "'"
76 ;; | '"', character - '"', {character - '"'}, '"';
77 ;;
78 ;; special sequence = '?', {character - '?'}, '?';
79 ;;
80 ;; meta identifier = letter, { letter | decimal digit | ' ' };
81 ;;
82 ;; integer = decimal digit, {decimal digit};
83 ;;
84 ;; comment = '(*', {comment symbol}, '*)';
85 ;;
86 ;; comment symbol = comment (* <== NESTED COMMENT *)
87 ;; | terminal string | special sequence | character;
88 ;;
89 ;; letter = ? A-Z a-z ?;
90 ;;
91 ;; decimal digit = ? 0-9 ?;
92 ;;
93 ;; character = letter | decimal digit
94 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
95 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
96 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
97 ;;
98 ;;
99 ;; There is also the following alternative representation:
100 ;;
101 ;; STANDARD ALTERNATIVE
102 ;; | ==> / or !
103 ;; [ ==> (/
104 ;; ] ==> /)
105 ;; { ==> (:
106 ;; } ==> :)
107 ;; ; ==> .
108 ;;
109 ;;
110 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
111 ;; -------------------------------------------------
112 ;;
113 ;; ISO EBNF accepts the characters given by <character> production above,
114 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
115 ;; (^L), any other characters are invalid. But ebnf2ps accepts also the
116 ;; european 8-bit accentuated characters (from \240 to \377) and underscore
117 ;; (_).
118 ;;
119 ;;
120 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
121
122 ;;; Code:
123
124
125 (require 'ebnf-otz)
126
127
128 (defvar ebnf-iso-lex nil
129 "Value returned by `ebnf-iso-lex' function.")
130
131
132 (defvar ebnf-no-meta-identifier nil
133 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
134
135 \f
136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
137 ;; Syntactic analyzer
138
139
140 ;;; ISO EBNF = syntax rule, {syntax rule};
141
142 (defun ebnf-iso-parser (start)
143 "ISO EBNF parser."
144 (let ((total (+ (- ebnf-limit start) 1))
145 (bias (1- start))
146 (origin (point))
147 syntax-list token rule)
148 (goto-char start)
149 (setq token (ebnf-iso-lex))
150 (and (eq token 'end-of-input)
151 (error "Invalid ISO EBNF file format"))
152 (while (not (eq token 'end-of-input))
153 (ebnf-message-float
154 "Parsing...%s%%"
155 (/ (* (- (point) bias) 100.0) total))
156 (setq token (ebnf-iso-syntax-rule token)
157 rule (cdr token)
158 token (car token))
159 (or (ebnf-add-empty-rule-list rule)
160 (setq syntax-list (cons rule syntax-list))))
161 (goto-char origin)
162 syntax-list))
163
164
165 ;;; syntax rule = meta identifier, '=', definition list, ';';
166
167 (defun ebnf-iso-syntax-rule (token)
168 (let ((header ebnf-iso-lex)
169 (action ebnf-action)
170 body)
171 (setq ebnf-action nil)
172 (or (eq token 'non-terminal)
173 (error "Invalid meta identifier syntax rule"))
174 (or (eq (ebnf-iso-lex) 'equal)
175 (error "Invalid syntax rule: missing `='"))
176 (setq body (ebnf-iso-definition-list))
177 (or (eq (car body) 'period)
178 (error "Invalid syntax rule: missing `;' or `.'"))
179 (setq body (cdr body))
180 (ebnf-eps-add-production header)
181 (cons (ebnf-iso-lex)
182 (ebnf-make-production header body action))))
183
184
185 ;;; definition list = single definition, {'|', single definition};
186
187 (defun ebnf-iso-definition-list ()
188 (let (body sequence)
189 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
190 'alternative)
191 (setq sequence (cdr sequence)
192 body (cons sequence body)))
193 (ebnf-token-alternative body sequence)))
194
195
196 ;;; single definition = term, {',', term};
197
198 (defun ebnf-iso-single-definition ()
199 (let (token seq term)
200 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
201 token (car term)
202 term (cdr term))
203 (eq token 'catenate))
204 (setq seq (cons term seq)))
205 (cons token
206 (ebnf-token-sequence (if term
207 (cons term seq)
208 seq)))))
209
210
211 ;;; term = factor, ['-', exception];
212 ;;;
213 ;;; exception = factor (* without <meta identifier> *);
214
215 (defun ebnf-iso-term (token)
216 (let ((factor (ebnf-iso-factor token)))
217 (if (not (eq (car factor) 'except))
218 ;; factor
219 factor
220 ;; factor - exception
221 (let ((ebnf-no-meta-identifier t))
222 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
223
224
225 ;;; factor = [integer, '*'], primary;
226
227 (defun ebnf-iso-factor (token)
228 (if (eq token 'integer)
229 (let ((times ebnf-iso-lex))
230 (or (eq (ebnf-iso-lex) 'repeat)
231 (error "Missing `*'"))
232 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
233 (ebnf-iso-primary token)))
234
235
236 ;;; primary = optional sequence | repeated sequence | special sequence
237 ;;; | grouped sequence | meta identifier | terminal string
238 ;;; | empty;
239 ;;;
240 ;;; empty = ;
241 ;;;
242 ;;; optional sequence = '[', definition list, ']';
243 ;;;
244 ;;; repeated sequence = '{', definition list, '}';
245 ;;;
246 ;;; grouped sequence = '(', definition list, ')';
247 ;;;
248 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
249 ;;; | '"', character - '"', {character - '"'}, '"';
250 ;;;
251 ;;; special sequence = '?', {character - '?'}, '?';
252 ;;;
253 ;;; meta identifier = letter, {letter | decimal digit};
254
255 (defun ebnf-iso-primary (token)
256 (let ((primary
257 (cond
258 ;; terminal string
259 ((eq token 'terminal)
260 (ebnf-make-terminal ebnf-iso-lex))
261 ;; meta identifier
262 ((eq token 'non-terminal)
263 (ebnf-make-non-terminal ebnf-iso-lex))
264 ;; special sequence
265 ((eq token 'special)
266 (ebnf-make-special ebnf-iso-lex))
267 ;; grouped sequence
268 ((eq token 'begin-group)
269 (let ((body (ebnf-iso-definition-list)))
270 (or (eq (car body) 'end-group)
271 (error "Missing `)'"))
272 (cdr body)))
273 ;; optional sequence
274 ((eq token 'begin-optional)
275 (let ((body (ebnf-iso-definition-list)))
276 (or (eq (car body) 'end-optional)
277 (error "Missing `]' or `/)'"))
278 (ebnf-token-optional (cdr body))))
279 ;; repeated sequence
280 ((eq token 'begin-zero-or-more)
281 (let* ((body (ebnf-iso-definition-list))
282 (repeat (cdr body)))
283 (or (eq (car body) 'end-zero-or-more)
284 (error "Missing `}' or `:)'"))
285 (ebnf-make-zero-or-more repeat)))
286 ;; empty
287 (t
288 nil)
289 )))
290 (cons (if primary
291 (ebnf-iso-lex)
292 token)
293 primary)))
294
295 \f
296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
297 ;; Lexical analyzer
298
299
300 (defconst ebnf-iso-token-table
301 ;; control character & 8-bit character are set to `error'
302 (let ((table (make-vector 256 'error))
303 (char ?\040))
304 ;; printable character
305 (while (< char ?\060)
306 (aset table char 'character)
307 (setq char (1+ char)))
308 ;; digits:
309 (while (< char ?\072)
310 (aset table char 'integer)
311 (setq char (1+ char)))
312 (while (< char ?\101)
313 (aset table char 'character)
314 (setq char (1+ char)))
315 ;; upper case letters:
316 (while (< char ?\133)
317 (aset table char 'non-terminal)
318 (setq char (1+ char)))
319 (while (< char ?\141)
320 (aset table char 'character)
321 (setq char (1+ char)))
322 ;; lower case letters:
323 (while (< char ?\173)
324 (aset table char 'non-terminal)
325 (setq char (1+ char)))
326 (while (< char ?\177)
327 (aset table char 'character)
328 (setq char (1+ char)))
329 ;; European 8-bit accentuated characters:
330 (setq char ?\240)
331 (while (< char ?\400)
332 (aset table char 'non-terminal)
333 (setq char (1+ char)))
334 ;; Override space characters:
335 (aset table ?\013 'space) ; [VT] vertical tab
336 (aset table ?\n 'space) ; [NL] linefeed
337 (aset table ?\r 'space) ; [CR] carriage return
338 (aset table ?\t 'space) ; [HT] horizontal tab
339 (aset table ?\ 'space) ; [SP] space
340 ;; Override form feed character:
341 (aset table ?\f 'form-feed) ; [FF] form feed
342 ;; Override other lexical characters:
343 (aset table ?_ 'non-terminal)
344 (aset table ?\" 'double-terminal)
345 (aset table ?\' 'single-terminal)
346 (aset table ?\? 'special)
347 (aset table ?* 'repeat)
348 (aset table ?, 'catenate)
349 (aset table ?- 'except)
350 (aset table ?= 'equal)
351 (aset table ?\) 'end-group)
352 table)
353 "Vector used to map characters to a lexical token.")
354
355
356 (defun ebnf-iso-initialize ()
357 "Initialize ISO EBNF token table."
358 (if ebnf-iso-alternative-p
359 ;; Override alternative lexical characters:
360 (progn
361 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
362 (aset ebnf-iso-token-table ?\[ 'character)
363 (aset ebnf-iso-token-table ?\] 'character)
364 (aset ebnf-iso-token-table ?\{ 'character)
365 (aset ebnf-iso-token-table ?\} 'character)
366 (aset ebnf-iso-token-table ?| 'character)
367 (aset ebnf-iso-token-table ?\; 'character)
368 (aset ebnf-iso-token-table ?/ 'slash)
369 (aset ebnf-iso-token-table ?! 'alternative)
370 (aset ebnf-iso-token-table ?: 'colon)
371 (aset ebnf-iso-token-table ?. 'period))
372 ;; Override standard lexical characters:
373 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
374 (aset ebnf-iso-token-table ?\[ 'begin-optional)
375 (aset ebnf-iso-token-table ?\] 'end-optional)
376 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
377 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
378 (aset ebnf-iso-token-table ?| 'alternative)
379 (aset ebnf-iso-token-table ?\; 'period)
380 (aset ebnf-iso-token-table ?/ 'character)
381 (aset ebnf-iso-token-table ?! 'character)
382 (aset ebnf-iso-token-table ?: 'character)
383 (aset ebnf-iso-token-table ?. 'character)))
384
385
386 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
387 (defconst ebnf-iso-non-terminal-chars
388 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
389
390
391 (defun ebnf-iso-lex ()
392 "Lexical analyzer for ISO EBNF.
393
394 Return a lexical token.
395
396 See documentation for variable `ebnf-iso-lex'."
397 (if (>= (point) ebnf-limit)
398 'end-of-input
399 (let (token)
400 ;; skip spaces and comments
401 (while (if (> (following-char) 255)
402 (progn
403 (setq token 'error)
404 nil)
405 (setq token (aref ebnf-iso-token-table (following-char)))
406 (cond
407 ((eq token 'space)
408 (skip-chars-forward " \013\n\r\t" ebnf-limit)
409 (< (point) ebnf-limit))
410 ((or (eq token 'begin-parenthesis)
411 (eq token 'left-parenthesis))
412 (forward-char)
413 (if (/= (following-char) ?*)
414 ;; no comment
415 nil
416 ;; comment
417 (ebnf-iso-skip-comment)
418 t))
419 ((eq token 'form-feed)
420 (forward-char)
421 (setq ebnf-action 'form-feed))
422 (t nil)
423 )))
424 (cond
425 ;; end of input
426 ((>= (point) ebnf-limit)
427 'end-of-input)
428 ;; error
429 ((eq token 'error)
430 (error "Invalid character"))
431 ;; integer
432 ((eq token 'integer)
433 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
434 'integer)
435 ;; special: ?special?
436 ((eq token 'special)
437 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
438 (ebnf-string " ->@-~" ?\? "special")
439 (and ebnf-special-show-delimiter "?")))
440 'special)
441 ;; terminal: "string"
442 ((eq token 'double-terminal)
443 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
444 'terminal)
445 ;; terminal: 'string'
446 ((eq token 'single-terminal)
447 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
448 'terminal)
449 ;; non-terminal
450 ((eq token 'non-terminal)
451 (setq ebnf-iso-lex
452 (ebnf-iso-normalize
453 (ebnf-trim-right
454 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
455 (and ebnf-no-meta-identifier
456 (error "Exception sequence should not contain a meta identifier"))
457 'non-terminal)
458 ;; begin optional, begin list or begin group
459 ((eq token 'left-parenthesis)
460 (forward-char)
461 (cond ((= (following-char) ?/)
462 (forward-char)
463 'begin-optional)
464 ((= (following-char) ?:)
465 (forward-char)
466 'begin-zero-or-more)
467 (t
468 'begin-group)
469 ))
470 ;; end optional or alternative
471 ((eq token 'slash)
472 (forward-char)
473 (if (/= (following-char) ?\))
474 'alternative
475 (forward-char)
476 'end-optional))
477 ;; end list
478 ((eq token 'colon)
479 (forward-char)
480 (if (/= (following-char) ?\))
481 'character
482 (forward-char)
483 'end-zero-or-more))
484 ;; begin group
485 ((eq token 'begin-parenthesis)
486 'begin-group)
487 ;; miscellaneous
488 (t
489 (forward-char)
490 token)
491 ))))
492
493
494 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
495 (defconst ebnf-iso-comment-chars
496 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
497
498
499 (defun ebnf-iso-skip-comment ()
500 (forward-char)
501 (cond
502 ;; open EPS file
503 ((and ebnf-eps-executing (= (following-char) ?\[))
504 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
505 ;; close EPS file
506 ((and ebnf-eps-executing (= (following-char) ?\]))
507 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
508 ;; any other action in comment
509 (t
510 (setq ebnf-action (aref ebnf-comment-table (following-char))))
511 )
512 (let ((pair 1))
513 (while (> pair 0)
514 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
515 (cond ((>= (point) ebnf-limit)
516 (error "Missing end of comment: `*)'"))
517 ((= (following-char) ?*)
518 (skip-chars-forward "*" ebnf-limit)
519 (when (= (following-char) ?\))
520 ;; end of comment
521 (forward-char)
522 (setq pair (1- pair))))
523 ((= (following-char) ?\()
524 (skip-chars-forward "(" ebnf-limit)
525 (when (= (following-char) ?*)
526 ;; beginning of comment
527 (forward-char)
528 (setq pair (1+ pair))))
529 (t
530 (error "Invalid character"))
531 ))))
532
533
534 (defun ebnf-iso-eps-filename ()
535 (forward-char)
536 (buffer-substring-no-properties
537 (point)
538 (let ((chars (concat ebnf-iso-comment-chars "\n"))
539 found)
540 (while (not found)
541 (skip-chars-forward chars ebnf-limit)
542 (setq found
543 (cond ((>= (point) ebnf-limit)
544 (point))
545 ((= (following-char) ?*)
546 (skip-chars-forward "*" ebnf-limit)
547 (if (/= (following-char) ?\))
548 nil
549 (backward-char)
550 (point)))
551 ((= (following-char) ?\()
552 (forward-char)
553 (if (/= (following-char) ?*)
554 nil
555 (backward-char)
556 (point)))
557 (t
558 (point))
559 )))
560 found)))
561
562
563 (defun ebnf-iso-normalize (str)
564 (if (not ebnf-iso-normalize-p)
565 str
566 (let ((len (length str))
567 (stri 0)
568 (spaces 0))
569 ;; count exceeding spaces
570 (while (< stri len)
571 (if (/= (aref str stri) ?\ )
572 (setq stri (1+ stri))
573 (setq stri (1+ stri))
574 (while (and (< stri len) (= (aref str stri) ?\ ))
575 (setq stri (1+ stri)
576 spaces (1+ spaces)))))
577 (if (zerop spaces)
578 ;; no exceeding space
579 str
580 ;; at least one exceeding space
581 (let ((new (make-string (- len spaces) ?\ ))
582 (newi 0))
583 ;; eliminate exceeding spaces
584 (setq stri 0)
585 (while (> spaces 0)
586 (if (/= (aref str stri) ?\ )
587 (progn
588 (aset new newi (aref str stri))
589 (setq stri (1+ stri)
590 newi (1+ newi)))
591 (aset new newi (aref str stri))
592 (setq stri (1+ stri)
593 newi (1+ newi))
594 (while (and (> spaces 0) (= (aref str stri) ?\ ))
595 (setq stri (1+ stri)
596 spaces (1- spaces)))))
597 ;; remaining is normalized
598 (while (< stri len)
599 (aset new newi (aref str stri))
600 (setq stri (1+ stri)
601 newi (1+ newi)))
602 new)))))
603
604 \f
605 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
606
607
608 (provide 'ebnf-iso)
609
610
611 ;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
612 ;;; ebnf-iso.el ends here