]> code.delx.au - gnu-emacs-elpa/blob - packages/wisi/wisi-compile.el
Merge commit '3e5c11a13981a1ff613cb4442ad644285c44e481' from gnorb
[gnu-emacs-elpa] / packages / wisi / wisi-compile.el
1 ;;; Grammar compiler for the wisent LALR parser, integrating Wisi OpenToken output.
2 ;;
3 ;; Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc.
4 ;;
5 ;; Author: Stephen Leake <stephen_leake@member.fsf.org>
6 ;;
7 ;; This file is part of GNU Emacs.
8 ;;
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
13 ;;
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
18 ;;
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
21 ;;
22 ;;; History: first experimental version Jan 2013
23 ;;
24 ;;; Context
25 ;;
26 ;; Semantic (info "(semantic)Top") provides an LALR(1) parser
27 ;; wisent-parse. The grammar used is defined by the functions
28 ;; semantic-grammar-create-package, which reads a bison-like source
29 ;; file and produces corresponding elisp source, and
30 ;; wisent-compile-grammar, which generates a parser table.
31 ;;
32 ;; However, the algorithm used in wisent-compile-grammar cannot cope
33 ;; with the grammar for the Ada language, because it is not
34 ;; LALR(1). So we provide a generalized LALR parser, which spawns
35 ;; parallel LALR parsers at each conflict. Instead of also rewriting
36 ;; the entire semantic grammar compiler, we use the OpenToken LALR
37 ;; parser generator, which is easier to modify (it is written in Ada,
38 ;; not Lisp).
39 ;;
40 ;; The Ada function Wisi.Generate reads the bison-like input and
41 ;; produces corresponding elisp source code, similar to that
42 ;; produced by semantic-grammar-create-package.
43 ;;
44 ;; wisi-compile-grammar (provided here) generate the automaton
45 ;; structure required by wisi-parse, using functions from
46 ;; wisent/comp.el
47 ;;
48 ;;;;
49
50 (require 'semantic/wisent/comp)
51
52 (defun wisi-compose-action (value symbol-array nonterms)
53 (let ((symbol (intern-soft (format "%s:%d" (car value) (cdr value)) symbol-array))
54 (prod (car (nth (cdr value) (cdr (assoc (car value) nonterms))))))
55 (if symbol
56 (list (car value) symbol (length prod))
57 (error "%s not in symbol-array" symbol))))
58
59 (defun wisi-replace-actions (action symbol-array nonterms)
60 "Replace semantic action symbol names in ACTION with list as defined in `wisi-compile-grammar'.
61 ACTION is the alist for one state from the grammar; NONTERMS is from the grammar.
62 Return the new alist."
63 ;; result is (nonterm index action-symbol token-count)
64 (let (result item)
65 (while action
66 (setq item (pop action))
67 (cond
68 ((or
69 (memq (cdr item) '(error accept))
70 (numberp (cdr item)))
71 (push item result))
72
73 ((listp (cdr item))
74 (let ((value (cdr item)))
75 (cond
76 ((symbolp (car value))
77 ;; reduction
78 (push (cons (car item)
79 (wisi-compose-action value symbol-array nonterms))
80 result))
81
82 ((integerp (car value))
83 ;; shift/reduce conflict
84 (push (cons (car item)
85 (list (car value)
86 (wisi-compose-action (cadr value) symbol-array nonterms)))
87 result))
88
89 ((integerp (cadr value))
90 ;; reduce/shift conflict
91 (push (cons (car item)
92 (list (wisi-compose-action (car value) symbol-array nonterms)
93 (cadr value)))
94 result))
95
96 (t ;; reduce/reduce conflict
97 (push (cons (car item)
98 (list (wisi-compose-action (car value) symbol-array nonterms)
99 (wisi-compose-action (cadr value) symbol-array nonterms)))
100 result))
101 )))
102
103 (t
104 (error "unexpected '%s'; expected 'error, 'accept, numberp, stringp, listp" (cdr item)))
105 ));; while/cond
106
107 (reverse result)))
108
109 (defun wisi-semantic-action (r rcode tags rlhs)
110 "Define an Elisp function for semantic action at rule R.
111 On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY
112 is the body of the semantic action, N is the number of tokens in
113 the production, NTERM is the nonterminal the semantic action
114 belongs to, and I is the index of the production and associated
115 semantic action in the NTERM rule. Returns the semantic action
116 symbol, which is interned in RCODE[0].
117
118 The semantic action function accepts one argument, the list of
119 tokens to be reduced. It returns nil; it is called for the user
120 side-effects only."
121 ;; based on comp.el wisent-semantic-action
122 (let* ((actn (aref rcode r))
123 (n (aref actn 1)) ; number of tokens in production
124 (name (apply 'format "%s:%d" (aref actn 2)))
125 (form (aref actn 0))
126 (action-symbol (intern name (aref rcode 0))))
127
128 (fset action-symbol
129 `(lambda (wisi-tokens)
130 (let* (($nterm ',(aref tags (aref rlhs r)))
131 ($1 nil));; wisent-parse-nonterminals defines a default body of $1 for empty actions
132 ,form
133 nil)))
134
135 (list (car (aref actn 2)) action-symbol n)))
136
137 (defun wisi-compile-grammar (grammar)
138 "Compile the LALR(1) GRAMMAR; return the automaton for wisi-parse.
139 GRAMMAR is a list TERMINALS NONTERMS ACTIONS GOTOS, where:
140
141 TERMINALS is a list of terminal token symbols.
142
143 NONTERMS is a list of productions; each production is a
144 list (nonterm (tokens action) ...) where `action' is any lisp form.
145
146 ACTIONS is an array indexed by parser state, of alists indexed by
147 terminal tokens. The value of each item in the alists is one of:
148
149 'error
150
151 'accept
152
153 integer - shift; gives new state
154
155 '(nonterm . index) - reduce by nonterm production index.
156
157 '(integer (nonterm . index)) - a shift/reduce conflict
158 '((nonterm . index) integer) - a reduce/shift conflict
159 '((nonterm . index) (nonterm . index)) - a reduce/reduce conflict
160
161 The first item in the alist must have the key 'default (not a
162 terminal token); it is used when no other item matches the
163 current token.
164
165 GOTOS is an array indexed by parser state, of alists giving the
166 new state after a reduce for each nonterminal legal in that
167 state.
168
169 The automaton is an array with 3 elements:
170
171 parser-actions is a copy of the input ACTIONS, with reduction
172 actions replaced by a list (nonterm action-symbol token-count),
173 where `nonterm' is a symbol from NONTERMS, and is the
174 non-terminal to reduce to, token-count is the number of tokens in
175 the reduction, action-symbol is nil if there is no user action,
176 or a symbol from semantic-actions (below).
177
178 gotos is a copy of GOTOS.
179
180 semantic-actions is an obarray containing functions that
181 implement the user action for each nonterminal; the function
182 names have the format nonterm:index."
183 (let (nrules ptable rcode rlhs tags token-list var-list)
184 (wisent-parse-grammar;; set global vars used by wisent-semantic-action
185 (cons
186 (nth 0 grammar);; TOKENS
187 (cons nil ;; ASSOCS
188 (nth 1 grammar));; NONTERMS
189 ))
190
191 (aset rcode 0 (make-vector 13 0));; obarray for semantic actions
192
193 ;; create semantic action functions, interned in rcode[0]
194 (let* ((i 1))
195 (while (<= i nrules)
196 (wisi-semantic-action i rcode tags rlhs)
197 (setq i (1+ i)))
198 )
199
200 ;; replace semantic actions in ACTIONS with symbols from symbol-array
201 (let ((nactions (length (nth 2 grammar)))
202 (actions (nth 2 grammar))
203 (symbol-array (aref rcode 0))
204 (i 0))
205 (while (< i nactions)
206 (aset actions i
207 (wisi-replace-actions (aref actions i) symbol-array (nth 1 grammar)))
208 (setq i (1+ i)))
209 (vector
210 actions
211 (nth 3 grammar)
212 symbol-array)
213 )))
214
215 (provide 'wisi-compile)
216
217 ;;;; end of file