1 ;;; Grammar compiler for the wisent LALR parser, integrating Wisi OpenToken output. -*- lexical-binding:t -*-
3 ;; Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc.
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
22 ;;; History: first experimental version Jan 2013
26 ;; Semantic (info "(semantic)Top") provides an LALR(1) parser
27 ;; wisent-parse. The grammar used is defined by the functions
28 ;; semantic-grammar-create-package, which reads a bison-like source
29 ;; file and produces corresponding elisp source, and
30 ;; wisent-compile-grammar, which generates a parser table.
32 ;; However, the algorithm used in wisent-compile-grammar cannot cope
33 ;; with the grammar for the Ada language, because it is not
34 ;; LALR(1). So we provide a generalized LALR parser, which spawns
35 ;; parallel LALR parsers at each conflict. Instead of also rewriting
36 ;; the entire semantic grammar compiler, we use the OpenToken LALR
37 ;; parser generator, which is easier to modify (it is written in Ada,
40 ;; The Ada function Wisi.Generate reads the bison-like input and
41 ;; produces corresponding elisp source code, similar to that
42 ;; produced by semantic-grammar-create-package.
44 ;; wisi-compile-grammar (provided here) generate the automaton
45 ;; structure required by wisi-parse, using functions from
50 (require 'semantic/wisent/comp)
52 (defun wisi-compose-action (value symbol-array nonterms)
53 (let ((symbol (intern-soft (format "%s:%d" (car value) (cdr value)) symbol-array))
54 (prod (car (nth (cdr value) (cdr (assoc (car value) nonterms))))))
56 (list (car value) symbol (length prod))
57 (error "%s not in symbol-array" symbol))))
59 (defun wisi-replace-actions (action symbol-array nonterms)
60 "Replace semantic action symbol names in ACTION with list as defined in `wisi-compile-grammar'.
61 ACTION is the alist for one state from the grammar; NONTERMS is from the grammar.
62 Return the new alist."
63 ;; result is (nonterm index action-symbol token-count)
66 (setq item (pop action))
69 (memq (cdr item) '(error accept))
74 (let ((value (cdr item)))
76 ((symbolp (car value))
78 (push (cons (car item)
79 (wisi-compose-action value symbol-array nonterms))
82 ((integerp (car value))
83 ;; shift/reduce conflict
84 (push (cons (car item)
86 (wisi-compose-action (cadr value) symbol-array nonterms)))
89 ((integerp (cadr value))
90 ;; reduce/shift conflict
91 (push (cons (car item)
92 (list (wisi-compose-action (car value) symbol-array nonterms)
96 (t ;; reduce/reduce conflict
97 (push (cons (car item)
98 (list (wisi-compose-action (car value) symbol-array nonterms)
99 (wisi-compose-action (cadr value) symbol-array nonterms)))
104 (error "unexpected '%s'; expected 'error, 'accept, numberp, stringp, listp" (cdr item)))
109 (defun wisi-semantic-action (r rcode tags rlhs)
110 "Define an Elisp function for semantic action at rule R.
111 On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY
112 is the body of the semantic action, N is the number of tokens in
113 the production, NTERM is the nonterminal the semantic action
114 belongs to, and I is the index of the production and associated
115 semantic action in the NTERM rule. Returns the semantic action
116 symbol, which is interned in RCODE[0].
118 The semantic action function accepts one argument, the list of
119 tokens to be reduced. It returns nil; it is called for the user
121 ;; based on comp.el wisent-semantic-action
122 (let* ((actn (aref rcode r))
123 (n (aref actn 1)) ; number of tokens in production
124 (name (apply 'format "%s:%d" (aref actn 2)))
126 (action-symbol (intern name (aref rcode 0))))
129 `(lambda (wisi-tokens)
130 (let* (($nterm ',(aref tags (aref rlhs r)))
131 ($1 nil));; wisent-parse-nonterminals defines a default body of $1 for empty actions
135 (list (car (aref actn 2)) action-symbol n)))
137 (defun wisi-compile-grammar (grammar)
138 ;; FIXME: This docstring is full of ambiguities making it unclear whether
139 ;; we're talking for example about data that includes the symbol `nonterm' as
140 ;; opposed to data that includes some non terminal object we denote
141 ;; with the meta-variable "nonterm".
142 ;; The convention in Elisp's docstrings is to use all-caps for metavariables
143 ;; (and `...' quoting as opposed to the '... quoting used below in a few
145 "Compile the LALR(1) GRAMMAR; return the automaton for wisi-parse.
146 GRAMMAR is a list TERMINALS NONTERMS ACTIONS GOTOS, where:
148 TERMINALS is a list of terminal token symbols.
150 NONTERMS is a list of productions; each production is a
151 list (nonterm (tokens action) ...) where `action' is any lisp form.
153 ACTIONS is an array indexed by parser state, of alists indexed by
154 terminal tokens. The value of each item in the alists is one of:
160 integer - shift; gives new state
162 '(nonterm . index) - reduce by nonterm production index.
164 '(integer (nonterm . index)) - a shift/reduce conflict
165 '((nonterm . index) integer) - a reduce/shift conflict
166 '((nonterm . index) (nonterm . index)) - a reduce/reduce conflict
168 The first item in the alist must have the key 'default (not a
169 terminal token); it is used when no other item matches the
172 GOTOS is an array indexed by parser state, of alists giving the
173 new state after a reduce for each nonterminal legal in that
176 The automaton is an array with 3 elements:
178 parser-actions is a copy of the input ACTIONS, with reduction
179 actions replaced by a list (NONTERM ACTION-SYMBOL TOKEN-COUNT),
180 where NONTERM is a symbol from NONTERMS, and is the
181 non-terminal to reduce to, TOKEN-COUNT is the number of tokens in
182 the reduction, ACTION-SYMBOL is nil if there is no user action,
183 or a symbol from semantic-actions (below).
185 gotos is a copy of GOTOS.
187 semantic-actions is an obarray containing functions that
188 implement the user action for each nonterminal; the function
189 names have the format nonterm:index."
190 (defvar nrules) (defvar ptable) (defvar rcode) (defvar rlhs) (defvar tags)
191 (defvar token-list) (defvar var-list)
192 (let (nrules ptable rcode rlhs tags token-list var-list)
193 (wisent-parse-grammar;; set global vars used by wisent-semantic-action
195 (nth 0 grammar);; TOKENS
197 (nth 1 grammar));; NONTERMS
200 (aset rcode 0 (make-vector 13 0));; obarray for semantic actions
202 ;; create semantic action functions, interned in rcode[0]
205 (wisi-semantic-action i rcode tags rlhs)
209 ;; replace semantic actions in ACTIONS with symbols from symbol-array
210 (let ((nactions (length (nth 2 grammar)))
211 (actions (nth 2 grammar))
212 (symbol-array (aref rcode 0))
214 (while (< i nactions)
216 (wisi-replace-actions (aref actions i) symbol-array (nth 1 grammar)))
224 (provide 'wisi-compile)
226 ;;; wisi-compile.el ends here