]> code.delx.au - gnu-emacs/blob - lisp/cedet/semantic/wisent/comp.el
Add 2012 to FSF copyright years for Emacs files
[gnu-emacs] / lisp / cedet / semantic / wisent / comp.el
1 ;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler
2
3 ;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000-2007, 2009-2012
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: David Ponce <david@dponce.com>
7 ;; Maintainer: David Ponce <david@dponce.com>
8 ;; Created: 30 January 2002
9 ;; Keywords: syntax
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25
26 ;;; Commentary:
27 ;;
28 ;; Grammar compiler that produces Wisent's LALR automatons.
29 ;;
30 ;; Wisent (the European Bison ;-) is an Elisp implementation of the
31 ;; GNU Compiler Compiler Bison. The Elisp code is a port of the C
32 ;; code of GNU Bison 1.28 & 1.31.
33 ;;
34 ;; For more details on the basic concepts for understanding Wisent,
35 ;; read the Bison manual ;)
36 ;;
37 ;; For more details on Wisent itself read the Wisent manual.
38
39 ;;; History:
40 ;;
41
42 ;;; Code:
43 (require 'semantic/wisent)
44 \f
45 ;;;; -------------------
46 ;;;; Misc. useful things
47 ;;;; -------------------
48
49 ;; As much as possible I would like to keep the name of global
50 ;; variables used in Bison without polluting too much the Elisp global
51 ;; name space. Elisp dynamic binding allows that ;-)
52
53 ;; Here are simple macros to easily define and use set of variables
54 ;; bound locally, without all these "reference to free variable"
55 ;; compiler warnings!
56
57 (defmacro wisent-context-name (name)
58 "Return the context name from NAME."
59 `(if (and ,name (symbolp ,name))
60 (intern (format "wisent-context-%s" ,name))
61 (error "Invalid context name: %S" ,name)))
62
63 (defmacro wisent-context-bindings (name)
64 "Return the variables in context NAME."
65 `(symbol-value (wisent-context-name ,name)))
66
67 (defmacro wisent-defcontext (name &rest vars)
68 "Define a context NAME that will bind variables VARS."
69 (let* ((context (wisent-context-name name))
70 (bindings (mapcar #'(lambda (v) (list 'defvar v)) vars)))
71 `(eval-when-compile
72 ,@bindings
73 (defvar ,context ',vars))))
74 (put 'wisent-defcontext 'lisp-indent-function 1)
75
76 (defmacro wisent-with-context (name &rest body)
77 "Bind variables in context NAME then eval BODY."
78 `(let* ,(wisent-context-bindings name)
79 ,@body))
80 (put 'wisent-with-context 'lisp-indent-function 1)
81
82 ;; A naive implementation of data structures! But it suffice here ;-)
83
84 (defmacro wisent-struct (name &rest fields)
85 "Define a simple data structure called NAME.
86 Which contains data stored in FIELDS. FIELDS is a list of symbols
87 which are field names or pairs (FIELD INITIAL-VALUE) where
88 INITIAL-VALUE is a constant used as the initial value of FIELD when
89 the data structure is created. INITIAL-VALUE defaults to nil.
90
91 This defines a `make-NAME' constructor, get-able `NAME-FIELD' and
92 set-able `set-NAME-FIELD' accessors."
93 (let ((size (length fields))
94 (i 0)
95 accors field sufx fun ivals)
96 (while (< i size)
97 (setq field (car fields)
98 fields (cdr fields))
99 (if (consp field)
100 (setq ivals (cons (cadr field) ivals)
101 field (car field))
102 (setq ivals (cons nil ivals)))
103 (setq sufx (format "%s-%s" name field)
104 fun (intern (format "%s" sufx))
105 accors (cons `(defmacro ,fun (s)
106 (list 'aref s ,i))
107 accors)
108 fun (intern (format "set-%s" sufx))
109 accors (cons `(defmacro ,fun (s v)
110 (list 'aset s ,i v))
111 accors)
112 i (1+ i)))
113 `(progn
114 (defmacro ,(intern (format "make-%s" name)) ()
115 (cons 'vector ',(nreverse ivals)))
116 ,@accors)))
117 (put 'wisent-struct 'lisp-indent-function 1)
118
119 ;; Other utilities
120
121 (defsubst wisent-pad-string (s n &optional left)
122 "Fill string S with spaces.
123 Return a new string of at least N characters. Insert spaces on right.
124 If optional LEFT is non-nil insert spaces on left."
125 (let ((i (length s)))
126 (if (< i n)
127 (if left
128 (concat (make-string (- n i) ?\ ) s)
129 (concat s (make-string (- n i) ?\ )))
130 s)))
131 \f
132 ;;;; ------------------------
133 ;;;; Environment dependencies
134 ;;;; ------------------------
135
136 (defconst wisent-BITS-PER-WORD
137 (let ((i 1))
138 (while (not (zerop (lsh 1 i)))
139 (setq i (1+ i)))
140 i))
141
142 (defsubst wisent-WORDSIZE (n)
143 "(N + BITS-PER-WORD - 1) / BITS-PER-WORD."
144 (/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD))
145
146 (defsubst wisent-SETBIT (x i)
147 "X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)."
148 (let ((k (/ i wisent-BITS-PER-WORD)))
149 (aset x k (logior (aref x k)
150 (lsh 1 (% i wisent-BITS-PER-WORD))))))
151
152 (defsubst wisent-RESETBIT (x i)
153 "X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))."
154 (let ((k (/ i wisent-BITS-PER-WORD)))
155 (aset x k (logand (aref x k)
156 (lognot (lsh 1 (% i wisent-BITS-PER-WORD)))))))
157
158 (defsubst wisent-BITISSET (x i)
159 "(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0."
160 (not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD))
161 (lsh 1 (% i wisent-BITS-PER-WORD))))))
162
163 (defsubst wisent-noninteractive ()
164 "Return non-nil if running without interactive terminal."
165 (if (featurep 'xemacs)
166 (noninteractive)
167 noninteractive))
168
169 (defvar wisent-debug-flag nil
170 "Non-nil means enable some debug stuff.")
171 \f
172 ;;;; --------------
173 ;;;; Logging/Output
174 ;;;; --------------
175 (defconst wisent-log-buffer-name "*wisent-log*"
176 "Name of the log buffer.")
177
178 (defvar wisent-new-log-flag nil
179 "Non-nil means to start a new report.")
180
181 (defvar wisent-verbose-flag nil
182 "*Non-nil means to report verbose information on generated parser.")
183
184 (defun wisent-toggle-verbose-flag ()
185 "Toggle whether to report verbose information on generated parser."
186 (interactive)
187 (setq wisent-verbose-flag (not wisent-verbose-flag))
188 (when (called-interactively-p 'interactive)
189 (message "Verbose report %sabled"
190 (if wisent-verbose-flag "en" "dis"))))
191
192 (defmacro wisent-log-buffer ()
193 "Return the log buffer.
194 Its name is defined in constant `wisent-log-buffer-name'."
195 `(get-buffer-create wisent-log-buffer-name))
196
197 (defmacro wisent-clear-log ()
198 "Delete the entire contents of the log buffer."
199 `(with-current-buffer (wisent-log-buffer)
200 (erase-buffer)))
201
202 (defvar byte-compile-current-file)
203
204 (defun wisent-source ()
205 "Return the current source file name or nil."
206 (let ((source (or (and (boundp 'byte-compile-current-file)
207 byte-compile-current-file)
208 load-file-name (buffer-file-name))))
209 (if source
210 (file-relative-name source))))
211
212 (defun wisent-new-log ()
213 "Start a new entry into the log buffer."
214 (setq wisent-new-log-flag nil)
215 (let ((text (format "\n\n*** Wisent %s - %s\n\n"
216 (or (wisent-source) (buffer-name))
217 (format-time-string "%Y-%m-%d %R"))))
218 (with-current-buffer (wisent-log-buffer)
219 (goto-char (point-max))
220 (insert text))))
221
222 (defsubst wisent-log (&rest args)
223 "Insert text into the log buffer.
224 `format' is applied to ARGS and the result string is inserted into the
225 log buffer returned by the function `wisent-log-buffer'."
226 (and wisent-new-log-flag (wisent-new-log))
227 (with-current-buffer (wisent-log-buffer)
228 (insert (apply 'format args))))
229
230 (defconst wisent-log-file "wisent.output"
231 "The log file.
232 Used when running without interactive terminal.")
233
234 (defun wisent-append-to-log-file ()
235 "Append contents of logging buffer to `wisent-log-file'."
236 (if (get-buffer wisent-log-buffer-name)
237 (condition-case err
238 (with-current-buffer (wisent-log-buffer)
239 (widen)
240 (if (> (point-max) (point-min))
241 (write-region (point-min) (point-max)
242 wisent-log-file t)))
243 (error
244 (message "*** %s" (error-message-string err))))))
245 \f
246 ;;;; -----------------------------------
247 ;;;; Representation of the grammar rules
248 ;;;; -----------------------------------
249
250 ;; ntokens is the number of tokens, and nvars is the number of
251 ;; variables (nonterminals). nsyms is the total number, ntokens +
252 ;; nvars.
253
254 ;; Each symbol (either token or variable) receives a symbol number.
255 ;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are
256 ;; for variables. Symbol number zero is the end-of-input token. This
257 ;; token is counted in ntokens.
258
259 ;; The rules receive rule numbers 1 to nrules in the order they are
260 ;; written. Actions and guards are accessed via the rule number.
261
262 ;; The rules themselves are described by three arrays: rrhs, rlhs and
263 ;; ritem. rlhs[R] is the symbol number of the left hand side of rule
264 ;; R. The right hand side is stored as symbol numbers in a portion of
265 ;; ritem. rrhs[R] contains the index in ritem of the beginning of the
266 ;; portion for rule R.
267
268 ;; The length of the portion is one greater than the number of symbols
269 ;; in the rule's right hand side. The last element in the portion
270 ;; contains minus R, which identifies it as the end of a portion and
271 ;; says which rule it is for.
272
273 ;; The portions of ritem come in order of increasing rule number and
274 ;; are followed by an element which is nil to mark the end. nitems is
275 ;; the total length of ritem, not counting the final nil. Each
276 ;; element of ritem is called an "item" and its index in ritem is an
277 ;; item number.
278
279 ;; Item numbers are used in the finite state machine to represent
280 ;; places that parsing can get to.
281
282 ;; The vector rprec contains for each rule, the item number of the
283 ;; symbol giving its precedence level to this rule. The precedence
284 ;; level and associativity of each symbol is recorded in respectively
285 ;; the properties 'wisent--prec and 'wisent--assoc.
286
287 ;; Precedence levels are assigned in increasing order starting with 1
288 ;; so that numerically higher precedence values mean tighter binding
289 ;; as they ought to. nil as a symbol or rule's precedence means none
290 ;; is assigned.
291
292 (defcustom wisent-state-table-size 1009
293 "The size of the state table."
294 :type 'integer
295 :group 'wisent)
296
297 ;; These variables only exist locally in the function
298 ;; `wisent-compile-grammar' and are shared by all other nested
299 ;; callees.
300 (wisent-defcontext compile-grammar
301 F LA LAruleno accessing-symbol conflicts consistent default-prec
302 derives err-table fderives final-state first-reduction first-shift
303 first-state firsts from-state goto-map includes itemset nitemset
304 kernel-base kernel-end kernel-items last-reduction last-shift
305 last-state lookaheads lookaheadset lookback maxrhs ngotos nitems
306 nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset
307 reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful
308 rcode ruleset rulesetsize shift-symbol shift-table shiftset
309 src-count src-total start-table state-table tags this-state to-state
310 tokensetsize ;; nb of words req. to hold a bit for each rule
311 varsetsize ;; nb of words req. to hold a bit for each variable
312 error-token-number start-symbol token-list var-list
313 N P V V1 nuseless-nonterminals nuseless-productions
314 ptable ;; symbols & characters properties
315 )
316
317 (defmacro wisent-ISTOKEN (s)
318 "Return non-nil if item number S defines a token (terminal).
319 That is if S < `ntokens'."
320 `(< ,s ntokens))
321
322 (defmacro wisent-ISVAR(s)
323 "Return non-nil if item number S defines a nonterminal.
324 That is if S >= `ntokens'."
325 `(>= ,s ntokens))
326
327 (defsubst wisent-tag (s)
328 "Return printable form of item number S."
329 (wisent-item-to-string (aref tags s)))
330
331 ;; Symbol and character properties
332
333 (defsubst wisent-put (object propname value)
334 "Store OBJECT's PROPNAME property with value VALUE.
335 Use `eq' to locate OBJECT."
336 (let ((entry (assq object ptable)))
337 (or entry (setq entry (list object) ptable (cons entry ptable)))
338 (setcdr entry (plist-put (cdr entry) propname value))))
339
340 (defsubst wisent-get (object propname)
341 "Return the value of OBJECT's PROPNAME property.
342 Use `eq' to locate OBJECT."
343 (plist-get (cdr (assq object ptable)) propname))
344
345 (defsubst wisent-item-number (x)
346 "Return the item number of symbol X."
347 (wisent-get x 'wisent--item-no))
348
349 (defsubst wisent-set-item-number (x n)
350 "Set the item number of symbol X to N."
351 (wisent-put x 'wisent--item-no n))
352
353 (defsubst wisent-assoc (x)
354 "Return the associativity of symbol X."
355 (wisent-get x 'wisent--assoc))
356
357 (defsubst wisent-set-assoc (x a)
358 "Set the associativity of symbol X to A."
359 (wisent-put x 'wisent--assoc a))
360
361 (defsubst wisent-prec (x)
362 "Return the precedence level of symbol X."
363 (wisent-get x 'wisent--prec))
364
365 (defsubst wisent-set-prec (x p)
366 "Set the precedence level of symbol X to P."
367 (wisent-put x 'wisent--prec p))
368 \f
369 ;;;; ----------------------------------------------------------
370 ;;;; Type definitions for nondeterministic finite state machine
371 ;;;; ----------------------------------------------------------
372
373 ;; These type definitions are used to represent a nondeterministic
374 ;; finite state machine that parses the specified grammar. This
375 ;; information is generated by the function `wisent-generate-states'.
376
377 ;; Each state of the machine is described by a set of items --
378 ;; particular positions in particular rules -- that are the possible
379 ;; places where parsing could continue when the machine is in this
380 ;; state. These symbols at these items are the allowable inputs that
381 ;; can follow now.
382
383 ;; A core represents one state. States are numbered in the number
384 ;; field. When `wisent-generate-states' is finished, the starting
385 ;; state is state 0 and `nstates' is the number of states. (A
386 ;; transition to a state whose state number is `nstates' indicates
387 ;; termination.) All the cores are chained together and `first-state'
388 ;; points to the first one (state 0).
389
390 ;; For each state there is a particular symbol which must have been
391 ;; the last thing accepted to reach that state. It is the
392 ;; accessing-symbol of the core.
393
394 ;; Each core contains a vector of `nitems' items which are the indices
395 ;; in the `ritems' vector of the items that are selected in this
396 ;; state.
397
398 ;; The link field is used for chaining buckets that hash states by
399 ;; their itemsets. This is for recognizing equivalent states and
400 ;; combining them when the states are generated.
401
402 ;; The two types of transitions are shifts (push the lookahead token
403 ;; and read another) and reductions (combine the last n things on the
404 ;; stack via a rule, replace them with the symbol that the rule
405 ;; derives, and leave the lookahead token alone). When the states are
406 ;; generated, these transitions are represented in two other lists.
407
408 ;; Each shifts structure describes the possible shift transitions out
409 ;; of one state, the state whose number is in the number field. The
410 ;; shifts structures are linked through next and first-shift points to
411 ;; them. Each contains a vector of numbers of the states that shift
412 ;; transitions can go to. The accessing-symbol fields of those
413 ;; states' cores say what kind of input leads to them.
414
415 ;; A shift to state zero should be ignored. Conflict resolution
416 ;; deletes shifts by changing them to zero.
417
418 ;; Each reductions structure describes the possible reductions at the
419 ;; state whose number is in the number field. The data is a list of
420 ;; nreds rules, represented by their rule numbers. `first-reduction'
421 ;; points to the list of these structures.
422
423 ;; Conflict resolution can decide that certain tokens in certain
424 ;; states should explicitly be errors (for implementing %nonassoc).
425 ;; For each state, the tokens that are errors for this reason are
426 ;; recorded in an errs structure, which has the state number in its
427 ;; number field. The rest of the errs structure is full of token
428 ;; numbers.
429
430 ;; There is at least one shift transition present in state zero. It
431 ;; leads to a next-to-final state whose accessing-symbol is the
432 ;; grammar's start symbol. The next-to-final state has one shift to
433 ;; the final state, whose accessing-symbol is zero (end of input).
434 ;; The final state has one shift, which goes to the termination state
435 ;; (whose number is `nstates'-1).
436 ;; The reason for the extra state at the end is to placate the
437 ;; parser's strategy of making all decisions one token ahead of its
438 ;; actions.
439
440 (wisent-struct core
441 next ; -> core
442 link ; -> core
443 (number 0)
444 (accessing-symbol 0)
445 (nitems 0)
446 (items [0]))
447
448 (wisent-struct shifts
449 next ; -> shifts
450 (number 0)
451 (nshifts 0)
452 (shifts [0]))
453
454 (wisent-struct reductions
455 next ; -> reductions
456 (number 0)
457 (nreds 0)
458 (rules [0]))
459
460 (wisent-struct errs
461 (nerrs 0)
462 (errs [0]))
463 \f
464 ;;;; --------------------------------------------------------
465 ;;;; Find unreachable terminals, nonterminals and productions
466 ;;;; --------------------------------------------------------
467
468 (defun wisent-bits-equal (L R n)
469 "Visit L and R and return non-nil if their first N elements are `='.
470 L and R must be vectors of integers."
471 (let* ((i (1- n))
472 (iseq t))
473 (while (and iseq (natnump i))
474 (setq iseq (= (aref L i) (aref R i))
475 i (1- i)))
476 iseq))
477
478 (defun wisent-nbits (i)
479 "Return number of bits set in integer I."
480 (let ((count 0))
481 (while (not (zerop i))
482 ;; i ^= (i & ((unsigned) (-(int) i)))
483 (setq i (logxor i (logand i (- i)))
484 count (1+ count)))
485 count))
486
487 (defun wisent-bits-size (S n)
488 "In vector S count the total of bits set in first N elements.
489 S must be a vector of integers."
490 (let* ((i (1- n))
491 (count 0))
492 (while (natnump i)
493 (setq count (+ count (wisent-nbits (aref S i)))
494 i (1- i)))
495 count))
496
497 (defun wisent-useful-production (i N0)
498 "Return non-nil if production I is in useful set N0."
499 (let* ((useful t)
500 (r (aref rrhs i))
501 n)
502 (while (and useful (> (setq n (aref ritem r)) 0))
503 (if (wisent-ISVAR n)
504 (setq useful (wisent-BITISSET N0 (- n ntokens))))
505 (setq r (1+ r)))
506 useful))
507
508 (defun wisent-useless-nonterminals ()
509 "Find out which nonterminals are used."
510 (let (Np Ns i n break)
511 ;; N is set as built. Np is set being built this iteration. P is
512 ;; set of all productions which have a RHS all in N.
513 (setq n (wisent-WORDSIZE nvars)
514 Np (make-vector n 0))
515
516 ;; The set being computed is a set of nonterminals which can
517 ;; derive the empty string or strings consisting of all
518 ;; terminals. At each iteration a nonterminal is added to the set
519 ;; if there is a production with that nonterminal as its LHS for
520 ;; which all the nonterminals in its RHS are already in the set.
521 ;; Iterate until the set being computed remains unchanged. Any
522 ;; nonterminals not in the set at that point are useless in that
523 ;; they will never be used in deriving a sentence of the language.
524
525 ;; This iteration doesn't use any special traversal over the
526 ;; productions. A set is kept of all productions for which all
527 ;; the nonterminals in the RHS are in useful. Only productions
528 ;; not in this set are scanned on each iteration. At the end,
529 ;; this set is saved to be used when finding useful productions:
530 ;; only productions in this set will appear in the final grammar.
531
532 (while (not break)
533 (setq i (1- n))
534 (while (natnump i)
535 ;; Np[i] = N[i]
536 (aset Np i (aref N i))
537 (setq i (1- i)))
538
539 (setq i 1)
540 (while (<= i nrules)
541 (if (not (wisent-BITISSET P i))
542 (when (wisent-useful-production i N)
543 (wisent-SETBIT Np (- (aref rlhs i) ntokens))
544 (wisent-SETBIT P i)))
545 (setq i (1+ i)))
546 (if (wisent-bits-equal N Np n)
547 (setq break t)
548 (setq Ns Np
549 Np N
550 N Ns)))
551 (setq N Np)))
552
553 (defun wisent-inaccessable-symbols ()
554 "Find out which productions are reachable and which symbols are used."
555 ;; Starting with an empty set of productions and a set of symbols
556 ;; which only has the start symbol in it, iterate over all
557 ;; productions until the set of productions remains unchanged for an
558 ;; iteration. For each production which has a LHS in the set of
559 ;; reachable symbols, add the production to the set of reachable
560 ;; productions, and add all of the nonterminals in the RHS of the
561 ;; production to the set of reachable symbols.
562
563 ;; Consider only the (partially) reduced grammar which has only
564 ;; nonterminals in N and productions in P.
565
566 ;; The result is the set P of productions in the reduced grammar,
567 ;; and the set V of symbols in the reduced grammar.
568
569 ;; Although this algorithm also computes the set of terminals which
570 ;; are reachable, no terminal will be deleted from the grammar. Some
571 ;; terminals might not be in the grammar but might be generated by
572 ;; semantic routines, and so the user might want them available with
573 ;; specified numbers. (Is this true?) However, the non reachable
574 ;; terminals are printed (if running in verbose mode) so that the
575 ;; user can know.
576 (let (Vp Vs Pp i tt r n m break)
577 (setq n (wisent-WORDSIZE nsyms)
578 m (wisent-WORDSIZE (1+ nrules))
579 Vp (make-vector n 0)
580 Pp (make-vector m 0))
581
582 ;; If the start symbol isn't useful, then nothing will be useful.
583 (when (wisent-BITISSET N (- start-symbol ntokens))
584 (wisent-SETBIT V start-symbol)
585 (while (not break)
586 (setq i (1- n))
587 (while (natnump i)
588 (aset Vp i (aref V i))
589 (setq i (1- i)))
590 (setq i 1)
591 (while (<= i nrules)
592 (when (and (not (wisent-BITISSET Pp i))
593 (wisent-BITISSET P i)
594 (wisent-BITISSET V (aref rlhs i)))
595 (setq r (aref rrhs i))
596 (while (natnump (setq tt (aref ritem r)))
597 (if (or (wisent-ISTOKEN tt)
598 (wisent-BITISSET N (- tt ntokens)))
599 (wisent-SETBIT Vp tt))
600 (setq r (1+ r)))
601 (wisent-SETBIT Pp i))
602 (setq i (1+ i)))
603 (if (wisent-bits-equal V Vp n)
604 (setq break t)
605 (setq Vs Vp
606 Vp V
607 V Vs))))
608 (setq V Vp)
609
610 ;; Tokens 0, 1 are internal to Wisent. Consider them useful.
611 (wisent-SETBIT V 0) ;; end-of-input token
612 (wisent-SETBIT V 1) ;; error token
613 (setq P Pp)
614
615 (setq nuseless-productions (- nrules (wisent-bits-size P m))
616 nuseless-nonterminals nvars
617 i ntokens)
618 (while (< i nsyms)
619 (if (wisent-BITISSET V i)
620 (setq nuseless-nonterminals (1- nuseless-nonterminals)))
621 (setq i (1+ i)))
622
623 ;; A token that was used in %prec should not be warned about.
624 (setq i 1)
625 (while (<= i nrules)
626 (if (aref rprec i)
627 (wisent-SETBIT V1 (aref rprec i)))
628 (setq i (1+ i)))
629 ))
630
631 (defun wisent-reduce-grammar-tables ()
632 "Disable useless productions."
633 (if (> nuseless-productions 0)
634 (let ((pn 1))
635 (while (<= pn nrules)
636 (aset ruseful pn (wisent-BITISSET P pn))
637 (setq pn (1+ pn))))))
638
639 (defun wisent-nonterminals-reduce ()
640 "Remove useless nonterminals."
641 (let (i n r item nontermmap tags-sorted)
642 ;; Map the nonterminals to their new index: useful first, useless
643 ;; afterwards. Kept for later report.
644 (setq nontermmap (make-vector nvars 0)
645 n ntokens
646 i ntokens)
647 (while (< i nsyms)
648 (when (wisent-BITISSET V i)
649 (aset nontermmap (- i ntokens) n)
650 (setq n (1+ n)))
651 (setq i (1+ i)))
652 (setq i ntokens)
653 (while (< i nsyms)
654 (unless (wisent-BITISSET V i)
655 (aset nontermmap (- i ntokens) n)
656 (setq n (1+ n)))
657 (setq i (1+ i)))
658 ;; Shuffle elements of tables indexed by symbol number
659 (setq tags-sorted (make-vector nvars nil)
660 i ntokens)
661 (while (< i nsyms)
662 (setq n (aref nontermmap (- i ntokens)))
663 (aset tags-sorted (- n ntokens) (aref tags i))
664 (setq i (1+ i)))
665 (setq i ntokens)
666 (while (< i nsyms)
667 (aset tags i (aref tags-sorted (- i ntokens)))
668 (setq i (1+ i)))
669 ;; Replace all symbol numbers in valid data structures.
670 (setq i 1)
671 (while (<= i nrules)
672 (aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens)))
673 (setq i (1+ i)))
674 (setq r 0)
675 (while (setq item (aref ritem r))
676 (if (wisent-ISVAR item)
677 (aset ritem r (aref nontermmap (- item ntokens))))
678 (setq r (1+ r)))
679 (setq start-symbol (aref nontermmap (- start-symbol ntokens))
680 nsyms (- nsyms nuseless-nonterminals)
681 nvars (- nvars nuseless-nonterminals))
682 ))
683
684 (defun wisent-total-useless ()
685 "Report number of useless nonterminals and productions."
686 (let* ((src (wisent-source))
687 (src (if src (concat " in " src) ""))
688 (msg (format "Grammar%s contains" src)))
689 (if (> nuseless-nonterminals 0)
690 (setq msg (format "%s %d useless nonterminal%s"
691 msg nuseless-nonterminals
692 (if (> nuseless-nonterminals 0) "s" ""))))
693 (if (and (> nuseless-nonterminals 0) (> nuseless-productions 0))
694 (setq msg (format "%s and" msg)))
695 (if (> nuseless-productions 0)
696 (setq msg (format "%s %d useless rule%s"
697 msg nuseless-productions
698 (if (> nuseless-productions 0) "s" ""))))
699 (message msg)))
700
701 (defun wisent-reduce-grammar ()
702 "Find unreachable terminals, nonterminals and productions."
703 ;; Allocate the global sets used to compute the reduced grammar
704 (setq N (make-vector (wisent-WORDSIZE nvars) 0)
705 P (make-vector (wisent-WORDSIZE (1+ nrules)) 0)
706 V (make-vector (wisent-WORDSIZE nsyms) 0)
707 V1 (make-vector (wisent-WORDSIZE nsyms) 0)
708 nuseless-nonterminals 0
709 nuseless-productions 0)
710
711 (wisent-useless-nonterminals)
712 (wisent-inaccessable-symbols)
713
714 (when (> (+ nuseless-nonterminals nuseless-productions) 0)
715 (wisent-total-useless)
716 (or (wisent-BITISSET N (- start-symbol ntokens))
717 (error "Start symbol `%s' does not derive any sentence"
718 (wisent-tag start-symbol)))
719 (wisent-reduce-grammar-tables)
720 (if (> nuseless-nonterminals 0)
721 (wisent-nonterminals-reduce))))
722
723 (defun wisent-print-useless ()
724 "Output the detailed results of the reductions."
725 (let (i b r)
726 (when (> nuseless-nonterminals 0)
727 ;; Useless nonterminals have been moved after useful ones.
728 (wisent-log "\n\nUseless nonterminals:\n\n")
729 (setq i 0)
730 (while (< i nuseless-nonterminals)
731 (wisent-log " %s\n" (wisent-tag (+ nsyms i)))
732 (setq i (1+ i))))
733 (setq b nil
734 i 0)
735 (while (< i ntokens)
736 (unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i))
737 (or b
738 (wisent-log "\n\nTerminals which are not used:\n\n"))
739 (setq b t)
740 (wisent-log " %s\n" (wisent-tag i)))
741 (setq i (1+ i)))
742 (when (> nuseless-productions 0)
743 (wisent-log "\n\nUseless rules:\n\n")
744 (setq i 1)
745 (while (<= i nrules)
746 (unless (aref ruseful i)
747 (wisent-log "#%s " (wisent-pad-string (format "%d" i) 4))
748 (wisent-log "%s:" (wisent-tag (aref rlhs i)))
749 (setq r (aref rrhs i))
750 (while (natnump (aref ritem r))
751 (wisent-log " %s" (wisent-tag (aref ritem r)))
752 (setq r (1+ r)))
753 (wisent-log ";\n"))
754 (setq i (1+ i))))
755 (if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0))
756 (wisent-log "\n\n"))
757 ))
758 \f
759 ;;;; -----------------------------
760 ;;;; Match rules with nonterminals
761 ;;;; -----------------------------
762
763 (defun wisent-set-derives ()
764 "Find, for each variable (nonterminal), which rules can derive it.
765 It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to
766 a list of rule numbers, terminated with -1."
767 (let (i lhs p q dset delts)
768 (setq dset (make-vector nvars nil)
769 delts (make-vector (1+ nrules) 0))
770 (setq p 0 ;; p = delts
771 i nrules)
772 (while (> i 0)
773 (when (aref ruseful i)
774 (setq lhs (aref rlhs i))
775 ;; p->next = dset[lhs];
776 ;; p->value = i;
777 (aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next)
778 (aset dset (- lhs ntokens) p) ;; dset[lhs] = p
779 (setq p (1+ p)) ;; p++
780 )
781 (setq i (1- i)))
782
783 (setq derives (make-vector nvars nil)
784 i ntokens)
785
786 (while (< i nsyms)
787 (setq q nil
788 p (aref dset (- i ntokens))) ;; p = dset[i]
789
790 (while p
791 (setq p (aref delts p)
792 q (cons (car p) q) ;;q++ = p->value
793 p (cdr p))) ;; p = p->next
794 (setq q (nreverse (cons -1 q))) ;; *q++ = -1
795 (aset derives (- i ntokens) q) ;; derives[i] = q
796 (setq i (1+ i)))
797 ))
798 \f
799 ;;;; --------------------------------------------------------
800 ;;;; Find which nonterminals can expand into the null string.
801 ;;;; --------------------------------------------------------
802
803 (defun wisent-print-nullable ()
804 "Print NULLABLE."
805 (let (i)
806 (wisent-log "NULLABLE\n")
807 (setq i ntokens)
808 (while (< i nsyms)
809 (wisent-log "\t%s: %s\n" (wisent-tag i)
810 (if (aref nullable (- i ntokens))
811 "yes" : "no"))
812 (setq i (1+ i)))
813 (wisent-log "\n\n")))
814
815 (defun wisent-set-nullable ()
816 "Set up NULLABLE.
817 A vector saying which nonterminals can expand into the null string.
818 NULLABLE[i - NTOKENS] is nil if symbol I can do so."
819 (let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens)
820 (setq squeue (make-vector nvars 0)
821 rcount (make-vector (1+ nrules) 0)
822 rsets (make-vector nvars nil) ;; - ntokens
823 relts (make-vector (+ nitems nvars 1) nil)
824 nullable (make-vector nvars nil)) ;; - ntokens
825 (setq s1 0 s2 0 ;; s1 = s2 = squeue
826 p 0 ;; p = relts
827 ruleno 1)
828 (while (<= ruleno nrules)
829 (when (aref ruseful ruleno)
830 (if (> (aref ritem (aref rrhs ruleno)) 0)
831 (progn
832 ;; This rule has a non empty RHS.
833 (setq any-tokens nil
834 r (aref rrhs ruleno))
835 (while (> (aref ritem r) 0)
836 (if (wisent-ISTOKEN (aref ritem r))
837 (setq any-tokens t))
838 (setq r (1+ r)))
839
840 ;; This rule has only nonterminals: schedule it for the
841 ;; second pass.
842 (unless any-tokens
843 (setq r (aref rrhs ruleno))
844 (while (> (setq item (aref ritem r)) 0)
845 (aset rcount ruleno (1+ (aref rcount ruleno)))
846 ;; p->next = rsets[item];
847 ;; p->value = ruleno;
848 (aset relts p (cons ruleno (aref rsets (- item ntokens))))
849 ;; rsets[item] = p;
850 (aset rsets (- item ntokens) p)
851 (setq p (1+ p)
852 r (1+ r)))))
853 ;; This rule has an empty RHS.
854 ;; assert (ritem[rrhs[ruleno]] == -ruleno)
855 (when (and (aref ruseful ruleno)
856 (setq item (aref rlhs ruleno))
857 (not (aref nullable (- item ntokens))))
858 (aset nullable (- item ntokens) t)
859 (aset squeue s2 item)
860 (setq s2 (1+ s2)))
861 )
862 )
863 (setq ruleno (1+ ruleno)))
864
865 (while (< s1 s2)
866 ;; p = rsets[*s1++]
867 (setq p (aref rsets (- (aref squeue s1) ntokens))
868 s1 (1+ s1))
869 (while p
870 (setq p (aref relts p)
871 ruleno (car p)
872 p (cdr p)) ;; p = p->next
873 ;; if (--rcount[ruleno] == 0)
874 (when (zerop (aset rcount ruleno (1- (aref rcount ruleno))))
875 (setq item (aref rlhs ruleno))
876 (aset nullable (- item ntokens) t)
877 (aset squeue s2 item)
878 (setq s2 (1+ s2)))))
879
880 (if wisent-debug-flag
881 (wisent-print-nullable))
882 ))
883 \f
884 ;;;; -----------
885 ;;;; Subroutines
886 ;;;; -----------
887
888 (defun wisent-print-fderives ()
889 "Print FDERIVES."
890 (let (i j rp)
891 (wisent-log "\n\n\nFDERIVES\n")
892 (setq i ntokens)
893 (while (< i nsyms)
894 (wisent-log "\n\n%s derives\n\n" (wisent-tag i))
895 (setq rp (aref fderives (- i ntokens))
896 j 0)
897 (while (<= j nrules)
898 (if (wisent-BITISSET rp j)
899 (wisent-log " %d\n" j))
900 (setq j (1+ j)))
901 (setq i (1+ i)))))
902
903 (defun wisent-set-fderives ()
904 "Set up FDERIVES.
905 An NVARS by NRULES matrix of bits indicating which rules can help
906 derive the beginning of the data for each nonterminal. For example,
907 if symbol 5 can be derived as the sequence of symbols 8 3 20, and one
908 of the rules for deriving symbol 8 is rule 4, then the
909 \[5 - NTOKENS, 4] bit in FDERIVES is set."
910 (let (i j k)
911 (setq fderives (make-vector nvars nil))
912 (setq i 0)
913 (while (< i nvars)
914 (aset fderives i (make-vector rulesetsize 0))
915 (setq i (1+ i)))
916
917 (wisent-set-firsts)
918
919 (setq i ntokens)
920 (while (< i nsyms)
921 (setq j ntokens)
922 (while (< j nsyms)
923 ;; if (BITISSET (FIRSTS (i), j - ntokens))
924 (when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens))
925 (setq k (aref derives (- j ntokens)))
926 (while (> (car k) 0) ;; derives[j][k] > 0
927 ;; SETBIT (FDERIVES (i), derives[j][k]);
928 (wisent-SETBIT (aref fderives (- i ntokens)) (car k))
929 (setq k (cdr k))))
930 (setq j (1+ j)))
931 (setq i (1+ i)))
932
933 (if wisent-debug-flag
934 (wisent-print-fderives))
935 ))
936
937 (defun wisent-print-firsts ()
938 "Print FIRSTS."
939 (let (i j v)
940 (wisent-log "\n\n\nFIRSTS\n\n")
941 (setq i ntokens)
942 (while (< i nsyms)
943 (wisent-log "\n\n%s firsts\n\n" (wisent-tag i))
944 (setq v (aref firsts (- i ntokens))
945 j 0)
946 (while (< j nvars)
947 (if (wisent-BITISSET v j)
948 (wisent-log "\t\t%d (%s)\n"
949 (+ j ntokens) (wisent-tag (+ j ntokens))))
950 (setq j (1+ j)))
951 (setq i (1+ i)))))
952
953 (defun wisent-TC (R n)
954 "Transitive closure.
955 Given R an N by N matrix of bits, modify its contents to be the
956 transitive closure of what was given."
957 (let (i j k)
958 ;; R (J, I) && R (I, K) => R (J, K).
959 ;; I *must* be the outer loop.
960 (setq i 0)
961 (while (< i n)
962 (setq j 0)
963 (while (< j n)
964 (when (wisent-BITISSET (aref R j) i)
965 (setq k 0)
966 (while (< k n)
967 (if (wisent-BITISSET (aref R i) k)
968 (wisent-SETBIT (aref R j) k))
969 (setq k (1+ k))))
970 (setq j (1+ j)))
971 (setq i (1+ i)))))
972
973 (defun wisent-RTC (R n)
974 "Reflexive Transitive Closure.
975 Same as `wisent-TC' and then set all the bits on the diagonal of R, an
976 N by N matrix of bits."
977 (let (i)
978 (wisent-TC R n)
979 (setq i 0)
980 (while (< i n)
981 (wisent-SETBIT (aref R i) i)
982 (setq i (1+ i)))))
983
984 (defun wisent-set-firsts ()
985 "Set up FIRSTS.
986 An NVARS by NVARS bit matrix indicating which items can represent the
987 beginning of the input corresponding to which other items. For
988 example, if some rule expands symbol 5 into the sequence of symbols 8
989 3 20, the symbol 8 can be the beginning of the data for symbol 5, so
990 the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set."
991 (let (row symbol sp rowsize i)
992 (setq rowsize (wisent-WORDSIZE nvars)
993 varsetsize rowsize
994 firsts (make-vector nvars nil)
995 i 0)
996 (while (< i nvars)
997 (aset firsts i (make-vector rowsize 0))
998 (setq i (1+ i)))
999
1000 (setq row 0 ;; row = firsts
1001 i ntokens)
1002 (while (< i nsyms)
1003 (setq sp (aref derives (- i ntokens)))
1004 (while (>= (car sp) 0)
1005 (setq symbol (aref ritem (aref rrhs (car sp)))
1006 sp (cdr sp))
1007 (when (wisent-ISVAR symbol)
1008 (setq symbol (- symbol ntokens))
1009 (wisent-SETBIT (aref firsts row) symbol)
1010 ))
1011 (setq row (1+ row)
1012 i (1+ i)))
1013
1014 (wisent-RTC firsts nvars)
1015
1016 (if wisent-debug-flag
1017 (wisent-print-firsts))
1018 ))
1019
1020 (defun wisent-initialize-closure (n)
1021 "Allocate the ITEMSET and RULESET vectors.
1022 And precompute useful data so that `wisent-closure' can be called.
1023 N is the number of elements to allocate for ITEMSET."
1024 (setq itemset (make-vector n 0)
1025 rulesetsize (wisent-WORDSIZE (1+ nrules))
1026 ruleset (make-vector rulesetsize 0))
1027
1028 (wisent-set-fderives))
1029
1030 (defun wisent-print-closure ()
1031 "Print ITEMSET."
1032 (let (i)
1033 (wisent-log "\n\nclosure n = %d\n\n" nitemset)
1034 (setq i 0) ;; isp = itemset
1035 (while (< i nitemset)
1036 (wisent-log " %d\n" (aref itemset i))
1037 (setq i (1+ i)))))
1038
1039 (defun wisent-closure (core n)
1040 "Set up RULESET and ITEMSET for the transitions out of CORE state.
1041 Given a vector of item numbers items, of length N, set up RULESET and
1042 ITEMSET to indicate what rules could be run and which items could be
1043 accepted when those items are the active ones.
1044
1045 RULESET contains a bit for each rule. `wisent-closure' sets the bits
1046 for all rules which could potentially describe the next input to be
1047 read.
1048
1049 ITEMSET is a vector of item numbers; NITEMSET is the number of items
1050 in ITEMSET. `wisent-closure' places there the indices of all items
1051 which represent units of input that could arrive next."
1052 (let (c r v symbol ruleno itemno)
1053 (if (zerop n)
1054 (progn
1055 (setq r 0
1056 v (aref fderives (- start-symbol ntokens)))
1057 (while (< r rulesetsize)
1058 ;; ruleset[r] = FDERIVES (start-symbol)[r];
1059 (aset ruleset r (aref v r))
1060 (setq r (1+ r)))
1061 )
1062 (fillarray ruleset 0)
1063 (setq c 0)
1064 (while (< c n)
1065 (setq symbol (aref ritem (aref core c)))
1066 (when (wisent-ISVAR symbol)
1067 (setq r 0
1068 v (aref fderives (- symbol ntokens)))
1069 (while (< r rulesetsize)
1070 ;; ruleset[r] |= FDERIVES (ritem[core[c]])[r];
1071 (aset ruleset r (logior (aref ruleset r) (aref v r)))
1072 (setq r (1+ r))))
1073 (setq c (1+ c)))
1074 )
1075 (setq nitemset 0
1076 c 0
1077 ruleno 0
1078 r (* rulesetsize wisent-BITS-PER-WORD))
1079 (while (< ruleno r)
1080 (when (wisent-BITISSET ruleset ruleno)
1081 (setq itemno (aref rrhs ruleno))
1082 (while (and (< c n) (< (aref core c) itemno))
1083 (aset itemset nitemset (aref core c))
1084 (setq nitemset (1+ nitemset)
1085 c (1+ c)))
1086 (aset itemset nitemset itemno)
1087 (setq nitemset (1+ nitemset)))
1088 (setq ruleno (1+ ruleno)))
1089
1090 (while (< c n)
1091 (aset itemset nitemset (aref core c))
1092 (setq nitemset (1+ nitemset)
1093 c (1+ c)))
1094
1095 (if wisent-debug-flag
1096 (wisent-print-closure))
1097 ))
1098 \f
1099 ;;;; --------------------------------------------------
1100 ;;;; Generate the nondeterministic finite state machine
1101 ;;;; --------------------------------------------------
1102
1103 (defun wisent-allocate-itemsets ()
1104 "Allocate storage for itemsets."
1105 (let (symbol i count symbol-count)
1106 ;; Count the number of occurrences of all the symbols in RITEMS.
1107 ;; Note that useless productions (hence useless nonterminals) are
1108 ;; browsed too, hence we need to allocate room for _all_ the
1109 ;; symbols.
1110 (setq count 0
1111 symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0)
1112 i 0)
1113 (while (setq symbol (aref ritem i))
1114 (when (> symbol 0)
1115 (setq count (1+ count))
1116 (aset symbol-count symbol (1+ (aref symbol-count symbol))))
1117 (setq i (1+ i)))
1118 ;; See comments before `wisent-new-itemsets'. All the vectors of
1119 ;; items live inside kernel-items. The number of active items
1120 ;; after some symbol cannot be more than the number of times that
1121 ;; symbol appears as an item, which is symbol-count[symbol]. We
1122 ;; allocate that much space for each symbol.
1123 (setq kernel-base (make-vector nsyms nil)
1124 kernel-items (make-vector count 0)
1125 count 0
1126 i 0)
1127 (while (< i nsyms)
1128 (aset kernel-base i count)
1129 (setq count (+ count (aref symbol-count i))
1130 i (1+ i)))
1131 (setq shift-symbol symbol-count
1132 kernel-end (make-vector nsyms nil))
1133 ))
1134
1135 (defun wisent-allocate-storage ()
1136 "Allocate storage for the state machine."
1137 (wisent-allocate-itemsets)
1138 (setq shiftset (make-vector nsyms 0)
1139 redset (make-vector (1+ nrules) 0)
1140 state-table (make-vector wisent-state-table-size nil)))
1141
1142 (defun wisent-new-itemsets ()
1143 "Find which symbols can be shifted in the current state.
1144 And for each one record which items would be active after that shift.
1145 Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the
1146 symbols that can be shifted. For each symbol in the grammar,
1147 KERNEL-BASE[symbol] points to a vector of item numbers activated if
1148 that symbol is shifted, and KERNEL-END[symbol] points after the end of
1149 that vector."
1150 (let (i shiftcount isp ksp symbol)
1151 (fillarray kernel-end nil)
1152 (setq shiftcount 0
1153 isp 0)
1154 (while (< isp nitemset)
1155 (setq i (aref itemset isp)
1156 isp (1+ isp)
1157 symbol (aref ritem i))
1158 (when (> symbol 0)
1159 (setq ksp (aref kernel-end symbol))
1160 (when (not ksp)
1161 ;; shift-symbol[shiftcount++] = symbol;
1162 (aset shift-symbol shiftcount symbol)
1163 (setq shiftcount (1+ shiftcount)
1164 ksp (aref kernel-base symbol)))
1165 ;; *ksp++ = i + 1;
1166 (aset kernel-items ksp (1+ i))
1167 (setq ksp (1+ ksp))
1168 (aset kernel-end symbol ksp)))
1169 (setq nshifts shiftcount)))
1170
1171 (defun wisent-new-state (symbol)
1172 "Create a new state for those items, if necessary.
1173 SYMBOL is the core accessing-symbol.
1174 Subroutine of `wisent-get-state'."
1175 (let (n p isp1 isp2 iend items)
1176 (setq isp1 (aref kernel-base symbol)
1177 iend (aref kernel-end symbol)
1178 n (- iend isp1)
1179 p (make-core)
1180 items (make-vector n 0))
1181 (set-core-accessing-symbol p symbol)
1182 (set-core-number p nstates)
1183 (set-core-nitems p n)
1184 (set-core-items p items)
1185 (setq isp2 0) ;; isp2 = p->items
1186 (while (< isp1 iend)
1187 ;; *isp2++ = *isp1++;
1188 (aset items isp2 (aref kernel-items isp1))
1189 (setq isp1 (1+ isp1)
1190 isp2 (1+ isp2)))
1191 (set-core-next last-state p)
1192 (setq last-state p
1193 nstates (1+ nstates))
1194 p))
1195
1196 (defun wisent-get-state (symbol)
1197 "Find the state we would get to by shifting SYMBOL.
1198 Return the state number for the state we would get to (from the
1199 current state) by shifting SYMBOL. Create a new state if no
1200 equivalent one exists already. Used by `wisent-append-states'."
1201 (let (key isp1 isp2 iend sp sp2 found n)
1202 (setq isp1 (aref kernel-base symbol)
1203 iend (aref kernel-end symbol)
1204 n (- iend isp1)
1205 key 0)
1206 ;; Add up the target state's active item numbers to get a hash key
1207 (while (< isp1 iend)
1208 (setq key (+ key (aref kernel-items isp1))
1209 isp1 (1+ isp1)))
1210 (setq key (% key wisent-state-table-size)
1211 sp (aref state-table key))
1212 (if sp
1213 (progn
1214 (setq found nil)
1215 (while (not found)
1216 (when (= (core-nitems sp) n)
1217 (setq found t
1218 isp1 (aref kernel-base symbol)
1219 ;; isp2 = sp->items;
1220 sp2 (core-items sp)
1221 isp2 0)
1222
1223 (while (and found (< isp1 iend))
1224 ;; if (*isp1++ != *isp2++)
1225 (if (not (= (aref kernel-items isp1)
1226 (aref sp2 isp2)))
1227 (setq found nil))
1228 (setq isp1 (1+ isp1)
1229 isp2 (1+ isp2))))
1230 (if (not found)
1231 (if (core-link sp)
1232 (setq sp (core-link sp))
1233 ;; sp = sp->link = new-state(symbol)
1234 (setq sp (set-core-link sp (wisent-new-state symbol))
1235 found t)))))
1236 ;; bucket is empty
1237 ;; state-table[key] = sp = new-state(symbol)
1238 (setq sp (wisent-new-state symbol))
1239 (aset state-table key sp))
1240 ;; return (sp->number);
1241 (core-number sp)))
1242
1243 (defun wisent-append-states ()
1244 "Find or create the core structures for states.
1245 Use the information computed by `wisent-new-itemsets' to find the
1246 state numbers reached by each shift transition from the current state.
1247 SHIFTSET is set up as a vector of state numbers of those states."
1248 (let (i j symbol)
1249 ;; First sort shift-symbol into increasing order
1250 (setq i 1)
1251 (while (< i nshifts)
1252 (setq symbol (aref shift-symbol i)
1253 j i)
1254 (while (and (> j 0) (> (aref shift-symbol (1- j)) symbol))
1255 (aset shift-symbol j (aref shift-symbol (1- j)))
1256 (setq j (1- j)))
1257 (aset shift-symbol j symbol)
1258 (setq i (1+ i)))
1259 (setq i 0)
1260 (while (< i nshifts)
1261 (setq symbol (aref shift-symbol i))
1262 (aset shiftset i (wisent-get-state symbol))
1263 (setq i (1+ i)))
1264 ))
1265
1266 (defun wisent-initialize-states ()
1267 "Initialize states."
1268 (let ((p (make-core)))
1269 (setq first-state p
1270 last-state p
1271 this-state p
1272 nstates 1)))
1273
1274 (defun wisent-save-shifts ()
1275 "Save the NSHIFTS of SHIFTSET into the current linked list."
1276 (let (p i shifts)
1277 (setq p (make-shifts)
1278 shifts (make-vector nshifts 0)
1279 i 0)
1280 (set-shifts-number p (core-number this-state))
1281 (set-shifts-nshifts p nshifts)
1282 (set-shifts-shifts p shifts)
1283 (while (< i nshifts)
1284 ;; (p->shifts)[i] = shiftset[i];
1285 (aset shifts i (aref shiftset i))
1286 (setq i (1+ i)))
1287
1288 (if last-shift
1289 (set-shifts-next last-shift p)
1290 (setq first-shift p))
1291 (setq last-shift p)))
1292
1293 (defun wisent-insert-start-shift ()
1294 "Create the next-to-final state.
1295 That is the state to which a shift has already been made in the
1296 initial state. Subroutine of `wisent-augment-automaton'."
1297 (let (statep sp)
1298 (setq statep (make-core))
1299 (set-core-number statep nstates)
1300 (set-core-accessing-symbol statep start-symbol)
1301 (set-core-next last-state statep)
1302 (setq last-state statep)
1303 ;; Make a shift from this state to (what will be) the final state.
1304 (setq sp (make-shifts))
1305 (set-shifts-number sp nstates)
1306 (setq nstates (1+ nstates))
1307 (set-shifts-nshifts sp 1)
1308 (set-shifts-shifts sp (vector nstates))
1309 (set-shifts-next last-shift sp)
1310 (setq last-shift sp)))
1311
1312 (defun wisent-augment-automaton ()
1313 "Set up initial and final states as parser wants them.
1314 Make sure that the initial state has a shift that accepts the
1315 grammar's start symbol and goes to the next-to-final state, which has
1316 a shift going to the final state, which has a shift to the termination
1317 state. Create such states and shifts if they don't happen to exist
1318 already."
1319 (let (i k statep sp sp2 sp1 shifts)
1320 (setq sp first-shift)
1321 (if sp
1322 (progn
1323 (if (zerop (shifts-number sp))
1324 (progn
1325 (setq k (shifts-nshifts sp)
1326 statep (core-next first-state))
1327 ;; The states reached by shifts from first-state are
1328 ;; numbered 1...K. Look for one reached by
1329 ;; START-SYMBOL.
1330 (while (and (< (core-accessing-symbol statep) start-symbol)
1331 (< (core-number statep) k))
1332 (setq statep (core-next statep)))
1333 (if (= (core-accessing-symbol statep) start-symbol)
1334 (progn
1335 ;; We already have a next-to-final state. Make
1336 ;; sure it has a shift to what will be the final
1337 ;; state.
1338 (setq k (core-number statep))
1339 (while (and sp (< (shifts-number sp) k))
1340 (setq sp1 sp
1341 sp (shifts-next sp)))
1342 (if (and sp (= (shifts-number sp) k))
1343 (progn
1344 (setq i (shifts-nshifts sp)
1345 sp2 (make-shifts)
1346 shifts (make-vector (1+ i) 0))
1347 (set-shifts-number sp2 k)
1348 (set-shifts-nshifts sp2 (1+ i))
1349 (set-shifts-shifts sp2 shifts)
1350 (aset shifts 0 nstates)
1351 (while (> i 0)
1352 ;; sp2->shifts[i] = sp->shifts[i - 1];
1353 (aset shifts i (aref (shifts-shifts sp) (1- i)))
1354 (setq i (1- i)))
1355 ;; Patch sp2 into the chain of shifts in
1356 ;; place of sp, following sp1.
1357 (set-shifts-next sp2 (shifts-next sp))
1358 (set-shifts-next sp1 sp2)
1359 (if (eq sp last-shift)
1360 (setq last-shift sp2))
1361 )
1362 (setq sp2 (make-shifts))
1363 (set-shifts-number sp2 k)
1364 (set-shifts-nshifts sp2 1)
1365 (set-shifts-shifts sp2 (vector nstates))
1366 ;; Patch sp2 into the chain of shifts between
1367 ;; sp1 and sp.
1368 (set-shifts-next sp2 sp)
1369 (set-shifts-next sp1 sp2)
1370 (if (not sp)
1371 (setq last-shift sp2))
1372 )
1373 )
1374 ;; There is no next-to-final state as yet.
1375 ;; Add one more shift in FIRST-SHIFT, going to the
1376 ;; next-to-final state (yet to be made).
1377 (setq sp first-shift
1378 sp2 (make-shifts)
1379 i (shifts-nshifts sp)
1380 shifts (make-vector (1+ i) 0))
1381 (set-shifts-nshifts sp2 (1+ i))
1382 (set-shifts-shifts sp2 shifts)
1383 ;; Stick this shift into the vector at the proper place.
1384 (setq statep (core-next first-state)
1385 k 0
1386 i 0)
1387 (while (< i (shifts-nshifts sp))
1388 (when (and (> (core-accessing-symbol statep) start-symbol)
1389 (= i k))
1390 (aset shifts k nstates)
1391 (setq k (1+ k)))
1392 (aset shifts k (aref (shifts-shifts sp) i))
1393 (setq statep (core-next statep))
1394 (setq i (1+ i)
1395 k (1+ k)))
1396 (when (= i k)
1397 (aset shifts k nstates)
1398 (setq k (1+ k)))
1399 ;; Patch sp2 into the chain of shifts in place of
1400 ;; sp, at the beginning.
1401 (set-shifts-next sp2 (shifts-next sp))
1402 (setq first-shift sp2)
1403 (if (eq last-shift sp)
1404 (setq last-shift sp2))
1405 ;; Create the next-to-final state, with shift to
1406 ;; what will be the final state.
1407 (wisent-insert-start-shift)))
1408 ;; The initial state didn't even have any shifts. Give it
1409 ;; one shift, to the next-to-final state.
1410 (setq sp (make-shifts))
1411 (set-shifts-nshifts sp 1)
1412 (set-shifts-shifts sp (vector nstates))
1413 ;; Patch sp into the chain of shifts at the beginning.
1414 (set-shifts-next sp first-shift)
1415 (setq first-shift sp)
1416 ;; Create the next-to-final state, with shift to what will
1417 ;; be the final state.
1418 (wisent-insert-start-shift)))
1419 ;; There are no shifts for any state. Make one shift, from the
1420 ;; initial state to the next-to-final state.
1421 (setq sp (make-shifts))
1422 (set-shifts-nshifts sp 1)
1423 (set-shifts-shifts sp (vector nstates))
1424 ;; Initialize the chain of shifts with sp.
1425 (setq first-shift sp
1426 last-shift sp)
1427 ;; Create the next-to-final state, with shift to what will be
1428 ;; the final state.
1429 (wisent-insert-start-shift))
1430 ;; Make the final state--the one that follows a shift from the
1431 ;; next-to-final state. The symbol for that shift is 0
1432 ;; (end-of-file).
1433 (setq statep (make-core))
1434 (set-core-number statep nstates)
1435 (set-core-next last-state statep)
1436 (setq last-state statep)
1437 ;; Make the shift from the final state to the termination state.
1438 (setq sp (make-shifts))
1439 (set-shifts-number sp nstates)
1440 (setq nstates (1+ nstates))
1441 (set-shifts-nshifts sp 1)
1442 (set-shifts-shifts sp (vector nstates))
1443 (set-shifts-next last-shift sp)
1444 (setq last-shift sp)
1445 ;; Note that the variable FINAL-STATE refers to what we sometimes
1446 ;; call the termination state.
1447 (setq final-state nstates)
1448 ;; Make the termination state.
1449 (setq statep (make-core))
1450 (set-core-number statep nstates)
1451 (setq nstates (1+ nstates))
1452 (set-core-next last-state statep)
1453 (setq last-state statep)))
1454
1455 (defun wisent-save-reductions ()
1456 "Make a reductions structure.
1457 Find which rules can be used for reduction transitions from the
1458 current state and make a reductions structure for the state to record
1459 their rule numbers."
1460 (let (i item count p rules)
1461 ;; Find and count the active items that represent ends of rules.
1462 (setq count 0
1463 i 0)
1464 (while (< i nitemset)
1465 (setq item (aref ritem (aref itemset i)))
1466 (when (< item 0)
1467 (aset redset count (- item))
1468 (setq count (1+ count)))
1469 (setq i (1+ i)))
1470 ;; Make a reductions structure and copy the data into it.
1471 (when (> count 0)
1472 (setq p (make-reductions)
1473 rules (make-vector count 0))
1474 (set-reductions-number p (core-number this-state))
1475 (set-reductions-nreds p count)
1476 (set-reductions-rules p rules)
1477 (setq i 0)
1478 (while (< i count)
1479 ;; (p->rules)[i] = redset[i]
1480 (aset rules i (aref redset i))
1481 (setq i (1+ i)))
1482 (if last-reduction
1483 (set-reductions-next last-reduction p)
1484 (setq first-reduction p))
1485 (setq last-reduction p))))
1486
1487 (defun wisent-generate-states ()
1488 "Compute the nondeterministic finite state machine from the grammar."
1489 (wisent-allocate-storage)
1490 (wisent-initialize-closure nitems)
1491 (wisent-initialize-states)
1492 (while this-state
1493 ;; Set up RULESET and ITEMSET for the transitions out of this
1494 ;; state. RULESET gets a 1 bit for each rule that could reduce
1495 ;; now. ITEMSET gets a vector of all the items that could be
1496 ;; accepted next.
1497 (wisent-closure (core-items this-state) (core-nitems this-state))
1498 ;; Record the reductions allowed out of this state.
1499 (wisent-save-reductions)
1500 ;; Find the itemsets of the states that shifts can reach.
1501 (wisent-new-itemsets)
1502 ;; Find or create the core structures for those states.
1503 (wisent-append-states)
1504 ;; Create the shifts structures for the shifts to those states,
1505 ;; now that the state numbers transitioning to are known.
1506 (if (> nshifts 0)
1507 (wisent-save-shifts))
1508 ;; States are queued when they are created; process them all.
1509 (setq this-state (core-next this-state)))
1510 ;; Set up initial and final states as parser wants them.
1511 (wisent-augment-automaton))
1512 \f
1513 ;;;; ---------------------------
1514 ;;;; Compute look-ahead criteria
1515 ;;;; ---------------------------
1516
1517 ;; Compute how to make the finite state machine deterministic; find
1518 ;; which rules need lookahead in each state, and which lookahead
1519 ;; tokens they accept.
1520
1521 ;; `wisent-lalr', the entry point, builds these data structures:
1522
1523 ;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition
1524 ;; which accepts a variable (a nonterminal). NGOTOS is the number of
1525 ;; such transitions.
1526 ;; FROM-STATE[t] is the state number which a transition leads from and
1527 ;; TO-STATE[t] is the state number it leads to.
1528 ;; All the transitions that accept a particular variable are grouped
1529 ;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and
1530 ;; TO-STATE of the first of them.
1531
1532 ;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what
1533 ;; to do in state s.
1534
1535 ;; LARULENO is a vector which records the rules that need lookahead in
1536 ;; various states. The elements of LARULENO that apply to state s are
1537 ;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element
1538 ;; of LARULENO is a rule number.
1539
1540 ;; If LR is the length of LARULENO, then a number from 0 to LR-1 can
1541 ;; specify both a rule and a state where the rule might be applied.
1542 ;; LA is a LR by NTOKENS matrix of bits.
1543 ;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the
1544 ;; appropriate state when the next token is symbol i.
1545 ;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
1546
1547 (wisent-defcontext digraph
1548 INDEX R VERTICES
1549 infinity top)
1550
1551 (defun wisent-traverse (i)
1552 "Traverse I."
1553 (let (j k height Ri Fi break)
1554 (setq top (1+ top)
1555 height top)
1556 (aset VERTICES top i) ;; VERTICES[++top] = i
1557 (aset INDEX i top) ;; INDEX[i] = height = top
1558
1559 (setq Ri (aref R i))
1560 (when Ri
1561 (setq j 0)
1562 (while (>= (aref Ri j) 0)
1563 (if (zerop (aref INDEX (aref Ri j)))
1564 (wisent-traverse (aref Ri j)))
1565 ;; if (INDEX[i] > INDEX[R[i][j]])
1566 (if (> (aref INDEX i) (aref INDEX (aref Ri j)))
1567 ;; INDEX[i] = INDEX[R[i][j]];
1568 (aset INDEX i (aref INDEX (aref Ri j))))
1569 (setq Fi (aref F i)
1570 k 0)
1571 (while (< k tokensetsize)
1572 ;; F (i)[k] |= F (R[i][j])[k];
1573 (aset Fi k (logior (aref Fi k)
1574 (aref (aref F (aref Ri j)) k)))
1575 (setq k (1+ k)))
1576 (setq j (1+ j))))
1577
1578 (when (= (aref INDEX i) height)
1579 (setq break nil)
1580 (while (not break)
1581 (setq j (aref VERTICES top) ;; j = VERTICES[top--]
1582 top (1- top))
1583 (aset INDEX j infinity)
1584 (if (= i j)
1585 (setq break t)
1586 (setq k 0)
1587 (while (< k tokensetsize)
1588 ;; F (j)[k] = F (i)[k];
1589 (aset (aref F j) k (aref (aref F i) k))
1590 (setq k (1+ k))))))
1591 ))
1592
1593 (defun wisent-digraph (relation)
1594 "Digraph RELATION."
1595 (wisent-with-context digraph
1596 (setq infinity (+ ngotos 2)
1597 INDEX (make-vector (1+ ngotos) 0)
1598 VERTICES (make-vector (1+ ngotos) 0)
1599 top 0
1600 R relation)
1601 (let ((i 0))
1602 (while (< i ngotos)
1603 (if (and (= (aref INDEX i) 0) (aref R i))
1604 (wisent-traverse i))
1605 (setq i (1+ i))))))
1606
1607 (defun wisent-set-state-table ()
1608 "Build state table."
1609 (let (sp)
1610 (setq state-table (make-vector nstates nil)
1611 sp first-state)
1612 (while sp
1613 (aset state-table (core-number sp) sp)
1614 (setq sp (core-next sp)))))
1615
1616 (defun wisent-set-accessing-symbol ()
1617 "Build accessing symbol table."
1618 (let (sp)
1619 (setq accessing-symbol (make-vector nstates 0)
1620 sp first-state)
1621 (while sp
1622 (aset accessing-symbol (core-number sp) (core-accessing-symbol sp))
1623 (setq sp (core-next sp)))))
1624
1625 (defun wisent-set-shift-table ()
1626 "Build shift table."
1627 (let (sp)
1628 (setq shift-table (make-vector nstates nil)
1629 sp first-shift)
1630 (while sp
1631 (aset shift-table (shifts-number sp) sp)
1632 (setq sp (shifts-next sp)))))
1633
1634 (defun wisent-set-reduction-table ()
1635 "Build reduction table."
1636 (let (rp)
1637 (setq reduction-table (make-vector nstates nil)
1638 rp first-reduction)
1639 (while rp
1640 (aset reduction-table (reductions-number rp) rp)
1641 (setq rp (reductions-next rp)))))
1642
1643 (defun wisent-set-maxrhs ()
1644 "Setup MAXRHS length."
1645 (let (i len max)
1646 (setq len 0
1647 max 0
1648 i 0)
1649 (while (aref ritem i)
1650 (if (> (aref ritem i) 0)
1651 (setq len (1+ len))
1652 (if (> len max)
1653 (setq max len))
1654 (setq len 0))
1655 (setq i (1+ i)))
1656 (setq maxrhs max)))
1657
1658 (defun wisent-initialize-LA ()
1659 "Set up LA."
1660 (let (i j k count rp sp np v)
1661 (setq consistent (make-vector nstates nil)
1662 lookaheads (make-vector (1+ nstates) 0)
1663 count 0
1664 i 0)
1665 (while (< i nstates)
1666 (aset lookaheads i count)
1667 (setq rp (aref reduction-table i)
1668 sp (aref shift-table i))
1669 ;; if (rp &&
1670 ;; (rp->nreds > 1
1671 ;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]]))))
1672 (if (and rp
1673 (or (> (reductions-nreds rp) 1)
1674 (and sp
1675 (not (wisent-ISVAR
1676 (aref accessing-symbol
1677 (aref (shifts-shifts sp) 0)))))))
1678 (setq count (+ count (reductions-nreds rp)))
1679 (aset consistent i t))
1680
1681 (when sp
1682 (setq k 0
1683 j (shifts-nshifts sp)
1684 v (shifts-shifts sp))
1685 (while (< k j)
1686 (when (= (aref accessing-symbol (aref v k))
1687 error-token-number)
1688 (aset consistent i nil)
1689 (setq k j)) ;; break
1690 (setq k (1+ k))))
1691 (setq i (1+ i)))
1692
1693 (aset lookaheads nstates count)
1694
1695 (if (zerop count)
1696 (progn
1697 (setq LA (make-vector 1 nil)
1698 LAruleno (make-vector 1 0)
1699 lookback (make-vector 1 nil)))
1700 (setq LA (make-vector count nil)
1701 LAruleno (make-vector count 0)
1702 lookback (make-vector count nil)))
1703 (setq i 0 j (length LA))
1704 (while (< i j)
1705 (aset LA i (make-vector tokensetsize 0))
1706 (setq i (1+ i)))
1707
1708 (setq np 0
1709 i 0)
1710 (while (< i nstates)
1711 (when (not (aref consistent i))
1712 (setq rp (aref reduction-table i))
1713 (when rp
1714 (setq j 0
1715 k (reductions-nreds rp)
1716 v (reductions-rules rp))
1717 (while (< j k)
1718 (aset LAruleno np (aref v j))
1719 (setq np (1+ np)
1720 j (1+ j)))))
1721 (setq i (1+ i)))))
1722
1723 (defun wisent-set-goto-map ()
1724 "Set up GOTO-MAP."
1725 (let (sp i j symbol k temp-map state1 state2 v)
1726 (setq goto-map (make-vector (1+ nvars) 0)
1727 temp-map (make-vector (1+ nvars) 0))
1728
1729 (setq ngotos 0
1730 sp first-shift)
1731 (while sp
1732 (setq i (1- (shifts-nshifts sp))
1733 v (shifts-shifts sp))
1734 (while (>= i 0)
1735 (setq symbol (aref accessing-symbol (aref v i)))
1736 (if (wisent-ISTOKEN symbol)
1737 (setq i 0) ;; break
1738 (setq ngotos (1+ ngotos))
1739 ;; goto-map[symbol]++;
1740 (aset goto-map (- symbol ntokens)
1741 (1+ (aref goto-map (- symbol ntokens)))))
1742 (setq i (1- i)))
1743 (setq sp (shifts-next sp)))
1744
1745 (setq k 0
1746 i ntokens
1747 j 0)
1748 (while (< i nsyms)
1749 (aset temp-map j k)
1750 (setq k (+ k (aref goto-map j))
1751 i (1+ i)
1752 j (1+ j)))
1753 (setq i ntokens
1754 j 0)
1755 (while (< i nsyms)
1756 (aset goto-map j (aref temp-map j))
1757 (setq i (1+ i)
1758 j (1+ j)))
1759 ;; goto-map[nsyms] = ngotos;
1760 ;; temp-map[nsyms] = ngotos;
1761 (aset goto-map j ngotos)
1762 (aset temp-map j ngotos)
1763
1764 (setq from-state (make-vector ngotos 0)
1765 to-state (make-vector ngotos 0)
1766 sp first-shift)
1767 (while sp
1768 (setq state1 (shifts-number sp)
1769 v (shifts-shifts sp)
1770 i (1- (shifts-nshifts sp)))
1771 (while (>= i 0)
1772 (setq state2 (aref v i)
1773 symbol (aref accessing-symbol state2))
1774 (if (wisent-ISTOKEN symbol)
1775 (setq i 0) ;; break
1776 ;; k = temp-map[symbol]++;
1777 (setq k (aref temp-map (- symbol ntokens)))
1778 (aset temp-map (- symbol ntokens) (1+ k))
1779 (aset from-state k state1)
1780 (aset to-state k state2))
1781 (setq i (1- i)))
1782 (setq sp (shifts-next sp)))
1783 ))
1784
1785 (defun wisent-map-goto (state symbol)
1786 "Map a STATE/SYMBOL pair into its numeric representation."
1787 (let (high low middle s result)
1788 ;; low = goto-map[symbol];
1789 ;; high = goto-map[symbol + 1] - 1;
1790 (setq low (aref goto-map (- symbol ntokens))
1791 high (1- (aref goto-map (- (1+ symbol) ntokens))))
1792 (while (and (not result) (<= low high))
1793 (setq middle (/ (+ low high) 2)
1794 s (aref from-state middle))
1795 (cond
1796 ((= s state)
1797 (setq result middle))
1798 ((< s state)
1799 (setq low (1+ middle)))
1800 (t
1801 (setq high (1- middle)))))
1802 (or result
1803 (error "Internal error in `wisent-map-goto'"))
1804 ))
1805
1806 (defun wisent-initialize-F ()
1807 "Set up F."
1808 (let (i j k sp edge rowp rp reads nedges stateno symbol v break)
1809 (setq F (make-vector ngotos nil)
1810 i 0)
1811 (while (< i ngotos)
1812 (aset F i (make-vector tokensetsize 0))
1813 (setq i (1+ i)))
1814
1815 (setq reads (make-vector ngotos nil)
1816 edge (make-vector (1+ ngotos) 0)
1817 nedges 0
1818 rowp 0 ;; rowp = F
1819 i 0)
1820 (while (< i ngotos)
1821 (setq stateno (aref to-state i)
1822 sp (aref shift-table stateno))
1823 (when sp
1824 (setq k (shifts-nshifts sp)
1825 v (shifts-shifts sp)
1826 j 0
1827 break nil)
1828 (while (and (not break) (< j k))
1829 ;; symbol = accessing-symbol[sp->shifts[j]];
1830 (setq symbol (aref accessing-symbol (aref v j)))
1831 (if (wisent-ISVAR symbol)
1832 (setq break t) ;; break
1833 (wisent-SETBIT (aref F rowp) symbol)
1834 (setq j (1+ j))))
1835
1836 (while (< j k)
1837 ;; symbol = accessing-symbol[sp->shifts[j]];
1838 (setq symbol (aref accessing-symbol (aref v j)))
1839 (when (aref nullable (- symbol ntokens))
1840 (aset edge nedges (wisent-map-goto stateno symbol))
1841 (setq nedges (1+ nedges)))
1842 (setq j (1+ j)))
1843
1844 (when (> nedges 0)
1845 ;; reads[i] = rp = NEW2(nedges + 1, short);
1846 (setq rp (make-vector (1+ nedges) 0)
1847 j 0)
1848 (aset reads i rp)
1849 (while (< j nedges)
1850 ;; rp[j] = edge[j];
1851 (aset rp j (aref edge j))
1852 (setq j (1+ j)))
1853 (aset rp nedges -1)
1854 (setq nedges 0)))
1855 (setq rowp (1+ rowp))
1856 (setq i (1+ i)))
1857 (wisent-digraph reads)
1858 ))
1859
1860 (defun wisent-add-lookback-edge (stateno ruleno gotono)
1861 "Add a lookback edge.
1862 STATENO, RULENO, GOTONO are self-explanatory."
1863 (let (i k found)
1864 (setq i (aref lookaheads stateno)
1865 k (aref lookaheads (1+ stateno))
1866 found nil)
1867 (while (and (not found) (< i k))
1868 (if (= (aref LAruleno i) ruleno)
1869 (setq found t)
1870 (setq i (1+ i))))
1871
1872 (or found
1873 (error "Internal error in `wisent-add-lookback-edge'"))
1874
1875 ;; value . next
1876 ;; lookback[i] = (gotono . lookback[i])
1877 (aset lookback i (cons gotono (aref lookback i)))))
1878
1879 (defun wisent-transpose (R-arg n)
1880 "Return the transpose of R-ARG, of size N.
1881 Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or
1882 a -1 terminated list of numbers. RESULT[NUM] is nil or the -1
1883 terminated list of the I such as NUM is in R-ARG[I]."
1884 (let (i j new-R end-R nedges v sp)
1885 (setq new-R (make-vector n nil)
1886 end-R (make-vector n nil)
1887 nedges (make-vector n 0))
1888
1889 ;; Count.
1890 (setq i 0)
1891 (while (< i n)
1892 (setq v (aref R-arg i))
1893 (when v
1894 (setq j 0)
1895 (while (>= (aref v j) 0)
1896 (aset nedges (aref v j) (1+ (aref nedges (aref v j))))
1897 (setq j (1+ j))))
1898 (setq i (1+ i)))
1899
1900 ;; Allocate.
1901 (setq i 0)
1902 (while (< i n)
1903 (when (> (aref nedges i) 0)
1904 (setq sp (make-vector (1+ (aref nedges i)) 0))
1905 (aset sp (aref nedges i) -1)
1906 (aset new-R i sp)
1907 (aset end-R i 0))
1908 (setq i (1+ i)))
1909
1910 ;; Store.
1911 (setq i 0)
1912 (while (< i n)
1913 (setq v (aref R-arg i))
1914 (when v
1915 (setq j 0)
1916 (while (>= (aref v j) 0)
1917 (aset (aref new-R (aref v j)) (aref end-R (aref v j)) i)
1918 (aset end-R (aref v j) (1+ (aref end-R (aref v j))))
1919 (setq j (1+ j))))
1920 (setq i (1+ i)))
1921
1922 new-R))
1923
1924 (defun wisent-build-relations ()
1925 "Build relations."
1926 (let (i j k rulep rp sp length nedges done state1 stateno
1927 symbol1 symbol2 edge states v)
1928 (setq includes (make-vector ngotos nil)
1929 edge (make-vector (1+ ngotos) 0)
1930 states (make-vector (1+ maxrhs) 0)
1931 i 0)
1932
1933 (while (< i ngotos)
1934 (setq nedges 0
1935 state1 (aref from-state i)
1936 symbol1 (aref accessing-symbol (aref to-state i))
1937 rulep (aref derives (- symbol1 ntokens)))
1938
1939 (while (> (car rulep) 0)
1940 (aset states 0 state1)
1941 (setq length 1
1942 stateno state1
1943 rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep]
1944 (while (> (aref ritem rp) 0) ;; *rp > 0
1945 (setq symbol2 (aref ritem rp)
1946 sp (aref shift-table stateno)
1947 k (shifts-nshifts sp)
1948 v (shifts-shifts sp)
1949 j 0)
1950 (while (< j k)
1951 (setq stateno (aref v j))
1952 (if (= (aref accessing-symbol stateno) symbol2)
1953 (setq j k) ;; break
1954 (setq j (1+ j))))
1955 ;; states[length++] = stateno;
1956 (aset states length stateno)
1957 (setq length (1+ length))
1958 (setq rp (1+ rp)))
1959
1960 (if (not (aref consistent stateno))
1961 (wisent-add-lookback-edge stateno (car rulep) i))
1962
1963 (setq length (1- length)
1964 done nil)
1965 (while (not done)
1966 (setq done t
1967 rp (1- rp))
1968 (when (and (>= rp 0) (wisent-ISVAR (aref ritem rp)))
1969 ;; stateno = states[--length];
1970 (setq length (1- length)
1971 stateno (aref states length))
1972 (aset edge nedges (wisent-map-goto stateno (aref ritem rp)))
1973 (setq nedges (1+ nedges))
1974 (if (aref nullable (- (aref ritem rp) ntokens))
1975 (setq done nil))))
1976 (setq rulep (cdr rulep)))
1977
1978 (when (> nedges 0)
1979 (setq v (make-vector (1+ nedges) 0)
1980 j 0)
1981 (aset includes i v)
1982 (while (< j nedges)
1983 (aset v j (aref edge j))
1984 (setq j (1+ j)))
1985 (aset v nedges -1))
1986 (setq i (1+ i)))
1987
1988 (setq includes (wisent-transpose includes ngotos))
1989 ))
1990
1991 (defun wisent-compute-FOLLOWS ()
1992 "Compute follows."
1993 (wisent-digraph includes))
1994
1995 (defun wisent-compute-lookaheads ()
1996 "Compute lookaheads."
1997 (let (i j n v1 v2 sp)
1998 (setq n (aref lookaheads nstates)
1999 i 0)
2000 (while (< i n)
2001 (setq sp (aref lookback i))
2002 (while sp
2003 (setq v1 (aref LA i)
2004 v2 (aref F (car sp))
2005 j 0)
2006 (while (< j tokensetsize)
2007 ;; LA (i)[j] |= F (sp->value)[j]
2008 (aset v1 j (logior (aref v1 j) (aref v2 j)))
2009 (setq j (1+ j)))
2010 (setq sp (cdr sp)))
2011 (setq i (1+ i)))))
2012
2013 (defun wisent-lalr ()
2014 "Make the nondeterministic finite state machine deterministic."
2015 (setq tokensetsize (wisent-WORDSIZE ntokens))
2016 (wisent-set-state-table)
2017 (wisent-set-accessing-symbol)
2018 (wisent-set-shift-table)
2019 (wisent-set-reduction-table)
2020 (wisent-set-maxrhs)
2021 (wisent-initialize-LA)
2022 (wisent-set-goto-map)
2023 (wisent-initialize-F)
2024 (wisent-build-relations)
2025 (wisent-compute-FOLLOWS)
2026 (wisent-compute-lookaheads))
2027 \f
2028 ;;;; -----------------------------------------------
2029 ;;;; Find and resolve or report look-ahead conflicts
2030 ;;;; -----------------------------------------------
2031
2032 (defsubst wisent-log-resolution (state LAno token resolution)
2033 "Log a shift-reduce conflict resolution.
2034 In specified STATE between rule pointed by lookahead number LANO and
2035 TOKEN, resolved as RESOLUTION."
2036 (if (or wisent-verbose-flag wisent-debug-flag)
2037 (wisent-log
2038 "Conflict in state %d between rule %d and token %s resolved as %s.\n"
2039 state (aref LAruleno LAno) (wisent-tag token) resolution)))
2040
2041 (defun wisent-flush-shift (state token)
2042 "Turn off the shift recorded in the specified STATE for TOKEN.
2043 Used when we resolve a shift-reduce conflict in favor of the reduction."
2044 (let (shiftp i k v)
2045 (when (setq shiftp (aref shift-table state))
2046 (setq k (shifts-nshifts shiftp)
2047 v (shifts-shifts shiftp)
2048 i 0)
2049 (while (< i k)
2050 (if (and (not (zerop (aref v i)))
2051 (= token (aref accessing-symbol (aref v i))))
2052 (aset v i 0))
2053 (setq i (1+ i))))))
2054
2055 (defun wisent-resolve-sr-conflict (state lookaheadnum)
2056 "Attempt to resolve shift-reduce conflict for one rule.
2057 Resolve by means of precedence declarations. The conflict occurred in
2058 specified STATE for the rule pointed by the lookahead symbol
2059 LOOKAHEADNUM. It has already been checked that the rule has a
2060 precedence. A conflict is resolved by modifying the shift or reduce
2061 tables so that there is no longer a conflict."
2062 (let (i redprec errp errs nerrs token sprec sassoc)
2063 ;; Find the rule to reduce by to get precedence of reduction
2064 (setq token (aref tags (aref rprec (aref LAruleno lookaheadnum)))
2065 redprec (wisent-prec token)
2066 errp (make-errs)
2067 errs (make-vector ntokens 0)
2068 nerrs 0
2069 i 0)
2070 (set-errs-errs errp errs)
2071 (while (< i ntokens)
2072 (setq token (aref tags i))
2073 (when (and (wisent-BITISSET (aref LA lookaheadnum) i)
2074 (wisent-BITISSET lookaheadset i)
2075 (setq sprec (wisent-prec token)))
2076 ;; Shift-reduce conflict occurs for token number I and it has
2077 ;; a precedence. The precedence of shifting is that of token
2078 ;; I.
2079 (cond
2080 ((< sprec redprec)
2081 (wisent-log-resolution state lookaheadnum i "reduce")
2082 ;; Flush the shift for this token
2083 (wisent-RESETBIT lookaheadset i)
2084 (wisent-flush-shift state i)
2085 )
2086 ((> sprec redprec)
2087 (wisent-log-resolution state lookaheadnum i "shift")
2088 ;; Flush the reduce for this token
2089 (wisent-RESETBIT (aref LA lookaheadnum) i)
2090 )
2091 (t
2092 ;; Matching precedence levels.
2093 ;; For left association, keep only the reduction.
2094 ;; For right association, keep only the shift.
2095 ;; For nonassociation, keep neither.
2096 (setq sassoc (wisent-assoc token))
2097 (cond
2098 ((eq sassoc 'right)
2099 (wisent-log-resolution state lookaheadnum i "shift"))
2100 ((eq sassoc 'left)
2101 (wisent-log-resolution state lookaheadnum i "reduce"))
2102 ((eq sassoc 'nonassoc)
2103 (wisent-log-resolution state lookaheadnum i "an error"))
2104 )
2105 (when (not (eq sassoc 'right))
2106 ;; Flush the shift for this token
2107 (wisent-RESETBIT lookaheadset i)
2108 (wisent-flush-shift state i))
2109 (when (not (eq sassoc 'left))
2110 ;; Flush the reduce for this token
2111 (wisent-RESETBIT (aref LA lookaheadnum) i))
2112 (when (eq sassoc 'nonassoc)
2113 ;; Record an explicit error for this token
2114 (aset errs nerrs i)
2115 (setq nerrs (1+ nerrs)))
2116 )))
2117 (setq i (1+ i)))
2118 (when (> nerrs 0)
2119 (set-errs-nerrs errp nerrs)
2120 (aset err-table state errp))
2121 ))
2122
2123 (defun wisent-set-conflicts (state)
2124 "Find and attempt to resolve conflicts in specified STATE."
2125 (let (i j k v shiftp symbol)
2126 (unless (aref consistent state)
2127 (fillarray lookaheadset 0)
2128
2129 (when (setq shiftp (aref shift-table state))
2130 (setq k (shifts-nshifts shiftp)
2131 v (shifts-shifts shiftp)
2132 i 0)
2133 (while (and (< i k)
2134 (wisent-ISTOKEN
2135 (setq symbol (aref accessing-symbol (aref v i)))))
2136 (or (zerop (aref v i))
2137 (wisent-SETBIT lookaheadset symbol))
2138 (setq i (1+ i))))
2139
2140 ;; Loop over all rules which require lookahead in this state
2141 ;; first check for shift-reduce conflict, and try to resolve
2142 ;; using precedence
2143 (setq i (aref lookaheads state)
2144 k (aref lookaheads (1+ state)))
2145 (while (< i k)
2146 (when (aref rprec (aref LAruleno i))
2147 (setq v (aref LA i)
2148 j 0)
2149 (while (< j tokensetsize)
2150 (if (zerop (logand (aref v j) (aref lookaheadset j)))
2151 (setq j (1+ j))
2152 ;; if (LA (i)[j] & lookaheadset[j])
2153 (wisent-resolve-sr-conflict state i)
2154 (setq j tokensetsize)))) ;; break
2155 (setq i (1+ i)))
2156
2157 ;; Loop over all rules which require lookahead in this state
2158 ;; Check for conflicts not resolved above.
2159 (setq i (aref lookaheads state))
2160 (while (< i k)
2161 (setq v (aref LA i)
2162 j 0)
2163 (while (< j tokensetsize)
2164 ;; if (LA (i)[j] & lookaheadset[j])
2165 (if (not (zerop (logand (aref v j) (aref lookaheadset j))))
2166 (aset conflicts state t))
2167 (setq j (1+ j)))
2168 (setq j 0)
2169 (while (< j tokensetsize)
2170 ;; lookaheadset[j] |= LA (i)[j];
2171 (aset lookaheadset j (logior (aref lookaheadset j)
2172 (aref v j)))
2173 (setq j (1+ j)))
2174 (setq i (1+ i)))
2175 )))
2176
2177 (defun wisent-resolve-conflicts ()
2178 "Find and resolve conflicts."
2179 (let (i)
2180 (setq conflicts (make-vector nstates nil)
2181 shiftset (make-vector tokensetsize 0)
2182 lookaheadset (make-vector tokensetsize 0)
2183 err-table (make-vector nstates nil)
2184 i 0)
2185 (while (< i nstates)
2186 (wisent-set-conflicts i)
2187 (setq i (1+ i)))))
2188
2189 (defun wisent-count-sr-conflicts (state)
2190 "Count the number of shift/reduce conflicts in specified STATE."
2191 (let (i j k shiftp symbol v)
2192 (setq src-count 0
2193 shiftp (aref shift-table state))
2194 (when shiftp
2195 (fillarray shiftset 0)
2196 (fillarray lookaheadset 0)
2197 (setq k (shifts-nshifts shiftp)
2198 v (shifts-shifts shiftp)
2199 i 0)
2200 (while (< i k)
2201 (when (not (zerop (aref v i)))
2202 (setq symbol (aref accessing-symbol (aref v i)))
2203 (if (wisent-ISVAR symbol)
2204 (setq i k) ;; break
2205 (wisent-SETBIT shiftset symbol)))
2206 (setq i (1+ i)))
2207
2208 (setq k (aref lookaheads (1+ state))
2209 i (aref lookaheads state))
2210 (while (< i k)
2211 (setq v (aref LA i)
2212 j 0)
2213 (while (< j tokensetsize)
2214 ;; lookaheadset[j] |= LA (i)[j]
2215 (aset lookaheadset j (logior (aref lookaheadset j)
2216 (aref v j)))
2217 (setq j (1+ j)))
2218 (setq i (1+ i)))
2219
2220 (setq k 0)
2221 (while (< k tokensetsize)
2222 ;; lookaheadset[k] &= shiftset[k];
2223 (aset lookaheadset k (logand (aref lookaheadset k)
2224 (aref shiftset k)))
2225 (setq k (1+ k)))
2226
2227 (setq i 0)
2228 (while (< i ntokens)
2229 (if (wisent-BITISSET lookaheadset i)
2230 (setq src-count (1+ src-count)))
2231 (setq i (1+ i))))
2232 src-count))
2233
2234 (defun wisent-count-rr-conflicts (state)
2235 "Count the number of reduce/reduce conflicts in specified STATE."
2236 (let (i j count n m)
2237 (setq rrc-count 0
2238 m (aref lookaheads state)
2239 n (aref lookaheads (1+ state)))
2240 (when (>= (- n m) 2)
2241 (setq i 0)
2242 (while (< i ntokens)
2243 (setq count 0
2244 j m)
2245 (while (< j n)
2246 (if (wisent-BITISSET (aref LA j) i)
2247 (setq count (1+ count)))
2248 (setq j (1+ j)))
2249
2250 (if (>= count 2)
2251 (setq rrc-count (1+ rrc-count)))
2252 (setq i (1+ i))))
2253 rrc-count))
2254
2255 (defvar wisent-expected-conflicts nil
2256 "*If non-nil suppress the warning about shift/reduce conflicts.
2257 It is a decimal integer N that says there should be no warning if
2258 there are N shift/reduce conflicts and no reduce/reduce conflicts. A
2259 warning is given if there are either more or fewer conflicts, or if
2260 there are any reduce/reduce conflicts.")
2261
2262 (defun wisent-total-conflicts ()
2263 "Report the total number of conflicts."
2264 (unless (and (zerop rrc-total)
2265 (or (zerop src-total)
2266 (= src-total (or wisent-expected-conflicts 0))))
2267 (let* ((src (wisent-source))
2268 (src (if src (concat " in " src) ""))
2269 (msg (format "Grammar%s contains" src)))
2270 (if (> src-total 0)
2271 (setq msg (format "%s %d shift/reduce conflict%s"
2272 msg src-total (if (> src-total 1)
2273 "s" ""))))
2274 (if (and (> src-total 0) (> rrc-total 0))
2275 (setq msg (format "%s and" msg)))
2276 (if (> rrc-total 0)
2277 (setq msg (format "%s %d reduce/reduce conflict%s"
2278 msg rrc-total (if (> rrc-total 1)
2279 "s" ""))))
2280 (message msg))))
2281
2282 (defun wisent-print-conflicts ()
2283 "Report conflicts."
2284 (let (i)
2285 (setq src-total 0
2286 rrc-total 0
2287 i 0)
2288 (while (< i nstates)
2289 (when (aref conflicts i)
2290 (wisent-count-sr-conflicts i)
2291 (wisent-count-rr-conflicts i)
2292 (setq src-total (+ src-total src-count)
2293 rrc-total (+ rrc-total rrc-count))
2294 (when (or wisent-verbose-flag wisent-debug-flag)
2295 (wisent-log "State %d contains" i)
2296 (if (> src-count 0)
2297 (wisent-log " %d shift/reduce conflict%s"
2298 src-count (if (> src-count 1) "s" "")))
2299
2300 (if (and (> src-count 0) (> rrc-count 0))
2301 (wisent-log " and"))
2302
2303 (if (> rrc-count 0)
2304 (wisent-log " %d reduce/reduce conflict%s"
2305 rrc-count (if (> rrc-count 1) "s" "")))
2306
2307 (wisent-log ".\n")))
2308 (setq i (1+ i)))
2309 (wisent-total-conflicts)))
2310 \f
2311 ;;;; --------------------------------------
2312 ;;;; Report information on generated parser
2313 ;;;; --------------------------------------
2314 (defun wisent-print-grammar ()
2315 "Print grammar."
2316 (let (i j r break left-count right-count)
2317
2318 (wisent-log "\n\nGrammar\n\n Number, Rule\n")
2319 (setq i 1)
2320 (while (<= i nrules)
2321 ;; Don't print rules disabled in `wisent-reduce-grammar-tables'.
2322 (when (aref ruseful i)
2323 (wisent-log " %s %s ->"
2324 (wisent-pad-string (number-to-string i) 6)
2325 (wisent-tag (aref rlhs i)))
2326 (setq r (aref rrhs i))
2327 (if (> (aref ritem r) 0)
2328 (while (> (aref ritem r) 0)
2329 (wisent-log " %s" (wisent-tag (aref ritem r)))
2330 (setq r (1+ r)))
2331 (wisent-log " /* empty */"))
2332 (wisent-log "\n"))
2333 (setq i (1+ i)))
2334
2335 (wisent-log "\n\nTerminals, with rules where they appear\n\n")
2336 (wisent-log "%s (-1)\n" (wisent-tag 0))
2337 (setq i 1)
2338 (while (< i ntokens)
2339 (wisent-log "%s (%d)" (wisent-tag i) i)
2340 (setq j 1)
2341 (while (<= j nrules)
2342 (setq r (aref rrhs j)
2343 break nil)
2344 (while (and (not break) (> (aref ritem r) 0))
2345 (if (setq break (= (aref ritem r) i))
2346 (wisent-log " %d" j)
2347 (setq r (1+ r))))
2348 (setq j (1+ j)))
2349 (wisent-log "\n")
2350 (setq i (1+ i)))
2351
2352 (wisent-log "\n\nNonterminals, with rules where they appear\n\n")
2353 (setq i ntokens)
2354 (while (< i nsyms)
2355 (setq left-count 0
2356 right-count 0
2357 j 1)
2358 (while (<= j nrules)
2359 (if (= (aref rlhs j) i)
2360 (setq left-count (1+ left-count)))
2361 (setq r (aref rrhs j)
2362 break nil)
2363 (while (and (not break) (> (aref ritem r) 0))
2364 (if (= (aref ritem r) i)
2365 (setq right-count (1+ right-count)
2366 break t)
2367 (setq r (1+ r))))
2368 (setq j (1+ j)))
2369 (wisent-log "%s (%d)\n " (wisent-tag i) i)
2370 (when (> left-count 0)
2371 (wisent-log " on left:")
2372 (setq j 1)
2373 (while (<= j nrules)
2374 (if (= (aref rlhs j) i)
2375 (wisent-log " %d" j))
2376 (setq j (1+ j))))
2377 (when (> right-count 0)
2378 (if (> left-count 0)
2379 (wisent-log ","))
2380 (wisent-log " on right:")
2381 (setq j 1)
2382 (while (<= j nrules)
2383 (setq r (aref rrhs j)
2384 break nil)
2385 (while (and (not break) (> (aref ritem r) 0))
2386 (if (setq break (= (aref ritem r) i))
2387 (wisent-log " %d" j)
2388 (setq r (1+ r))))
2389 (setq j (1+ j))))
2390 (wisent-log "\n")
2391 (setq i (1+ i)))
2392 ))
2393
2394 (defun wisent-print-reductions (state)
2395 "Print reductions on STATE."
2396 (let (i j k v symbol m n defaulted
2397 default-LA default-rule cmax count shiftp errp nodefault)
2398 (setq nodefault nil
2399 i 0)
2400 (fillarray shiftset 0)
2401
2402 (setq shiftp (aref shift-table state))
2403 (when shiftp
2404 (setq k (shifts-nshifts shiftp)
2405 v (shifts-shifts shiftp)
2406 i 0)
2407 (while (< i k)
2408 (when (not (zerop (aref v i)))
2409 (setq symbol (aref accessing-symbol (aref v i)))
2410 (if (wisent-ISVAR symbol)
2411 (setq i k) ;; break
2412 ;; If this state has a shift for the error token, don't
2413 ;; use a default rule.
2414 (if (= symbol error-token-number)
2415 (setq nodefault t))
2416 (wisent-SETBIT shiftset symbol)))
2417 (setq i (1+ i))))
2418
2419 (setq errp (aref err-table state))
2420 (when errp
2421 (setq k (errs-nerrs errp)
2422 v (errs-errs errp)
2423 i 0)
2424 (while (< i k)
2425 (if (not (zerop (setq symbol (aref v i))))
2426 (wisent-SETBIT shiftset symbol))
2427 (setq i (1+ i))))
2428
2429 (setq m (aref lookaheads state)
2430 n (aref lookaheads (1+ state)))
2431
2432 (cond
2433 ((and (= (- n m) 1) (not nodefault))
2434 (setq default-rule (aref LAruleno m)
2435 v (aref LA m)
2436 k 0)
2437 (while (< k tokensetsize)
2438 (aset lookaheadset k (logand (aref v k)
2439 (aref shiftset k)))
2440 (setq k (1+ k)))
2441
2442 (setq i 0)
2443 (while (< i ntokens)
2444 (if (wisent-BITISSET lookaheadset i)
2445 (wisent-log " %s\t[reduce using rule %d (%s)]\n"
2446 (wisent-tag i) default-rule
2447 (wisent-tag (aref rlhs default-rule))))
2448 (setq i (1+ i)))
2449 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2450 default-rule
2451 (wisent-tag (aref rlhs default-rule)))
2452 )
2453 ((>= (- n m) 1)
2454 (setq cmax 0
2455 default-LA -1
2456 default-rule 0)
2457 (when (not nodefault)
2458 (setq i m)
2459 (while (< i n)
2460 (setq v (aref LA i)
2461 count 0
2462 k 0)
2463 (while (< k tokensetsize)
2464 ;; lookaheadset[k] = LA (i)[k] & ~shiftset[k]
2465 (aset lookaheadset k
2466 (logand (aref v k)
2467 (lognot (aref shiftset k))))
2468 (setq k (1+ k)))
2469 (setq j 0)
2470 (while (< j ntokens)
2471 (if (wisent-BITISSET lookaheadset j)
2472 (setq count (1+ count)))
2473 (setq j (1+ j)))
2474 (if (> count cmax)
2475 (setq cmax count
2476 default-LA i
2477 default-rule (aref LAruleno i)))
2478 (setq k 0)
2479 (while (< k tokensetsize)
2480 (aset shiftset k (logior (aref shiftset k)
2481 (aref lookaheadset k)))
2482 (setq k (1+ k)))
2483 (setq i (1+ i))))
2484
2485 (fillarray shiftset 0)
2486
2487 (when shiftp
2488 (setq k (shifts-nshifts shiftp)
2489 v (shifts-shifts shiftp)
2490 i 0)
2491 (while (< i k)
2492 (when (not (zerop (aref v i)))
2493 (setq symbol (aref accessing-symbol (aref v i)))
2494 (if (wisent-ISVAR symbol)
2495 (setq i k) ;; break
2496 (wisent-SETBIT shiftset symbol)))
2497 (setq i (1+ i))))
2498
2499 (setq i 0)
2500 (while (< i ntokens)
2501 (setq defaulted nil
2502 count (if (wisent-BITISSET shiftset i) 1 0)
2503 j m)
2504 (while (< j n)
2505 (when (wisent-BITISSET (aref LA j) i)
2506 (if (zerop count)
2507 (progn
2508 (if (not (= j default-LA))
2509 (wisent-log
2510 " %s\treduce using rule %d (%s)\n"
2511 (wisent-tag i) (aref LAruleno j)
2512 (wisent-tag (aref rlhs (aref LAruleno j))))
2513 (setq defaulted t))
2514 (setq count (1+ count)))
2515 (if defaulted
2516 (wisent-log
2517 " %s\treduce using rule %d (%s)\n"
2518 (wisent-tag i) (aref LAruleno default-LA)
2519 (wisent-tag (aref rlhs (aref LAruleno default-LA)))))
2520 (setq defaulted nil)
2521 (wisent-log
2522 " %s\t[reduce using rule %d (%s)]\n"
2523 (wisent-tag i) (aref LAruleno j)
2524 (wisent-tag (aref rlhs (aref LAruleno j))))))
2525 (setq j (1+ j)))
2526 (setq i (1+ i)))
2527
2528 (if (>= default-LA 0)
2529 (wisent-log
2530 " $default\treduce using rule %d (%s)\n"
2531 default-rule
2532 (wisent-tag (aref rlhs default-rule))))
2533 ))))
2534
2535 (defun wisent-print-actions (state)
2536 "Print actions on STATE."
2537 (let (i j k v state1 symbol shiftp errp redp rule nerrs break)
2538 (setq shiftp (aref shift-table state)
2539 redp (aref reduction-table state)
2540 errp (aref err-table state))
2541 (if (and (not shiftp) (not redp))
2542 (if (= final-state state)
2543 (wisent-log " $default\taccept\n")
2544 (wisent-log " NO ACTIONS\n"))
2545 (if (not shiftp)
2546 (setq i 0
2547 k 0)
2548 (setq k (shifts-nshifts shiftp)
2549 v (shifts-shifts shiftp)
2550 i 0
2551 break nil)
2552 (while (and (not break) (< i k))
2553 (if (zerop (setq state1 (aref v i)))
2554 (setq i (1+ i))
2555 (setq symbol (aref accessing-symbol state1))
2556 ;; The following line used to be turned off.
2557 (if (wisent-ISVAR symbol)
2558 (setq break t) ;; break
2559 (wisent-log " %s\tshift, and go to state %d\n"
2560 (wisent-tag symbol) state1)
2561 (setq i (1+ i)))))
2562 (if (> i 0)
2563 (wisent-log "\n")))
2564
2565 (when errp
2566 (setq nerrs (errs-nerrs errp)
2567 v (errs-errs errp)
2568 j 0)
2569 (while (< j nerrs)
2570 (if (aref v j)
2571 (wisent-log " %s\terror (nonassociative)\n"
2572 (wisent-tag (aref v j))))
2573 (setq j (1+ j)))
2574 (if (> j 0)
2575 (wisent-log "\n")))
2576
2577 (cond
2578 ((and (aref consistent state) redp)
2579 (setq rule (aref (reductions-rules redp) 0)
2580 symbol (aref rlhs rule))
2581 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2582 rule (wisent-tag symbol))
2583 )
2584 (redp
2585 (wisent-print-reductions state)
2586 ))
2587
2588 (when (< i k)
2589 (setq v (shifts-shifts shiftp))
2590 (while (< i k)
2591 (when (setq state1 (aref v i))
2592 (setq symbol (aref accessing-symbol state1))
2593 (wisent-log " %s\tgo to state %d\n"
2594 (wisent-tag symbol) state1))
2595 (setq i (1+ i)))
2596 (wisent-log "\n"))
2597 )))
2598
2599 (defun wisent-print-core (state)
2600 "Print STATE core."
2601 (let (i k rule statep sp sp1)
2602 (setq statep (aref state-table state)
2603 k (core-nitems statep))
2604 (when (> k 0)
2605 (setq i 0)
2606 (while (< i k)
2607 ;; sp1 = sp = ritem + statep->items[i];
2608 (setq sp1 (aref (core-items statep) i)
2609 sp sp1)
2610 (while (> (aref ritem sp) 0)
2611 (setq sp (1+ sp)))
2612
2613 (setq rule (- (aref ritem sp)))
2614 (wisent-log " %s -> " (wisent-tag (aref rlhs rule)))
2615
2616 (setq sp (aref rrhs rule))
2617 (while (< sp sp1)
2618 (wisent-log "%s " (wisent-tag (aref ritem sp)))
2619 (setq sp (1+ sp)))
2620 (wisent-log ".")
2621 (while (> (aref ritem sp) 0)
2622 (wisent-log " %s" (wisent-tag (aref ritem sp)))
2623 (setq sp (1+ sp)))
2624 (wisent-log " (rule %d)\n" rule)
2625 (setq i (1+ i)))
2626 (wisent-log "\n"))))
2627
2628 (defun wisent-print-state (state)
2629 "Print information on STATE."
2630 (wisent-log "\n\nstate %d\n\n" state)
2631 (wisent-print-core state)
2632 (wisent-print-actions state))
2633
2634 (defun wisent-print-states ()
2635 "Print information on states."
2636 (let ((i 0))
2637 (while (< i nstates)
2638 (wisent-print-state i)
2639 (setq i (1+ i)))))
2640
2641 (defun wisent-print-results ()
2642 "Print information on generated parser.
2643 Report detailed information if `wisent-verbose-flag' or
2644 `wisent-debug-flag' are non-nil."
2645 (when (or wisent-verbose-flag wisent-debug-flag)
2646 (wisent-print-useless))
2647 (wisent-print-conflicts)
2648 (when (or wisent-verbose-flag wisent-debug-flag)
2649 (wisent-print-grammar)
2650 (wisent-print-states))
2651 ;; Append output to log file when running in batch mode
2652 (when (wisent-noninteractive)
2653 (wisent-append-to-log-file)
2654 (wisent-clear-log)))
2655 \f
2656 ;;;; ---------------------------------
2657 ;;;; Build the generated parser tables
2658 ;;;; ---------------------------------
2659
2660 (defun wisent-action-row (state actrow)
2661 "Figure out the actions for the specified STATE.
2662 Decide what to do for each type of token if seen as the lookahead
2663 token in specified state. The value returned is used as the default
2664 action for the state. In addition, ACTROW is filled with what to do
2665 for each kind of token, index by symbol number, with nil meaning do
2666 the default action. The value 'error, means this situation is an
2667 error. The parser recognizes this value specially.
2668
2669 This is where conflicts are resolved. The loop over lookahead rules
2670 considered lower-numbered rules last, and the last rule considered
2671 that likes a token gets to handle it."
2672 (let (i j k m n v default-rule nreds rule max count
2673 shift-state symbol redp shiftp errp nodefault)
2674
2675 (fillarray actrow nil)
2676
2677 (setq default-rule 0
2678 nodefault nil ;; nil inhibit having any default reduction
2679 nreds 0
2680 m 0
2681 n 0
2682 redp (aref reduction-table state))
2683
2684 (when redp
2685 (setq nreds (reductions-nreds redp))
2686 (when (>= nreds 1)
2687 ;; loop over all the rules available here which require
2688 ;; lookahead
2689 (setq m (aref lookaheads state)
2690 n (aref lookaheads (1+ state))
2691 i (1- n))
2692 (while (>= i m)
2693 ;; and find each token which the rule finds acceptable to
2694 ;; come next
2695 (setq j 0)
2696 (while (< j ntokens)
2697 ;; and record this rule as the rule to use if that token
2698 ;; follows.
2699 (if (wisent-BITISSET (aref LA i) j)
2700 (aset actrow j (- (aref LAruleno i)))
2701 )
2702 (setq j (1+ j)))
2703 (setq i (1- i)))))
2704
2705 ;; Now see which tokens are allowed for shifts in this state. For
2706 ;; them, record the shift as the thing to do. So shift is
2707 ;; preferred to reduce.
2708 (setq shiftp (aref shift-table state))
2709 (when shiftp
2710 (setq k (shifts-nshifts shiftp)
2711 v (shifts-shifts shiftp)
2712 i 0)
2713 (while (< i k)
2714 (setq shift-state (aref v i))
2715 (if (zerop shift-state)
2716 nil ;; continue
2717 (setq symbol (aref accessing-symbol shift-state))
2718 (if (wisent-ISVAR symbol)
2719 (setq i k) ;; break
2720 (aset actrow symbol shift-state)
2721 ;; Do not use any default reduction if there is a shift
2722 ;; for error
2723 (if (= symbol error-token-number)
2724 (setq nodefault t))))
2725 (setq i (1+ i))))
2726
2727 ;; See which tokens are an explicit error in this state (due to
2728 ;; %nonassoc). For them, record error as the action.
2729 (setq errp (aref err-table state))
2730 (when errp
2731 (setq k (errs-nerrs errp)
2732 v (errs-errs errp)
2733 i 0)
2734 (while (< i k)
2735 (aset actrow (aref v i) wisent-error-tag)
2736 (setq i (1+ i))))
2737
2738 ;; Now find the most common reduction and make it the default
2739 ;; action for this state.
2740 (when (and (>= nreds 1) (not nodefault))
2741 (if (aref consistent state)
2742 (setq default-rule (- (aref (reductions-rules redp) 0)))
2743 (setq max 0
2744 i m)
2745 (while (< i n)
2746 (setq count 0
2747 rule (- (aref LAruleno i))
2748 j 0)
2749 (while (< j ntokens)
2750 (if (and (numberp (aref actrow j))
2751 (= (aref actrow j) rule))
2752 (setq count (1+ count)))
2753 (setq j (1+ j)))
2754 (if (> count max)
2755 (setq max count
2756 default-rule rule))
2757 (setq i (1+ i)))
2758 ;; actions which match the default are replaced with zero,
2759 ;; which means "use the default"
2760 (when (> max 0)
2761 (setq j 0)
2762 (while (< j ntokens)
2763 (if (and (numberp (aref actrow j))
2764 (= (aref actrow j) default-rule))
2765 (aset actrow j nil))
2766 (setq j (1+ j)))
2767 )))
2768
2769 ;; If have no default rule, if this is the final state the default
2770 ;; is accept else it is an error. So replace any action which
2771 ;; says "error" with "use default".
2772 (when (zerop default-rule)
2773 (if (= final-state state)
2774 (setq default-rule wisent-accept-tag)
2775 (setq j 0)
2776 (while (< j ntokens)
2777 (if (eq (aref actrow j) wisent-error-tag)
2778 (aset actrow j nil))
2779 (setq j (1+ j)))
2780 (setq default-rule wisent-error-tag)))
2781 default-rule))
2782
2783 (defconst wisent-default-tag 'default
2784 "Tag used in an action table to indicate a default action.")
2785
2786 ;; These variables only exist locally in the function
2787 ;; `wisent-state-actions' and are shared by all other nested callees.
2788 (wisent-defcontext semantic-actions
2789 ;; Uninterned symbols used in code generation.
2790 stack sp gotos state
2791 ;; Name of the current semantic action
2792 NAME)
2793
2794 (defun wisent-state-actions ()
2795 "Figure out the actions for every state.
2796 Return the action table."
2797 ;; Store the semantic action obarray in (unused) RCODE[0].
2798 (aset rcode 0 (make-vector 13 0))
2799 (let (i j action-table actrow action)
2800 (setq action-table (make-vector nstates nil)
2801 actrow (make-vector ntokens nil)
2802 i 0)
2803 (wisent-with-context semantic-actions
2804 (setq stack (make-symbol "stack")
2805 sp (make-symbol "sp")
2806 gotos (make-symbol "gotos")
2807 state (make-symbol "state"))
2808 (while (< i nstates)
2809 (setq action (wisent-action-row i actrow))
2810 ;; Translate a reduction into semantic action
2811 (and (integerp action) (< action 0)
2812 (setq action (wisent-semantic-action (- action))))
2813 (aset action-table i (list (cons wisent-default-tag action)))
2814 (setq j 0)
2815 (while (< j ntokens)
2816 (when (setq action (aref actrow j))
2817 ;; Translate a reduction into semantic action
2818 (and (integerp action) (< action 0)
2819 (setq action (wisent-semantic-action (- action))))
2820 (aset action-table i (cons (cons (aref tags j) action)
2821 (aref action-table i)))
2822 )
2823 (setq j (1+ j)))
2824 (aset action-table i (nreverse (aref action-table i)))
2825 (setq i (1+ i)))
2826 action-table)))
2827
2828 (defun wisent-goto-actions ()
2829 "Figure out what to do after reducing with each rule.
2830 Depending on the saved state from before the beginning of parsing the
2831 data that matched this rule. Return the goto table."
2832 (let (i j m n symbol state goto-table)
2833 (setq goto-table (make-vector nstates nil)
2834 i ntokens)
2835 (while (< i nsyms)
2836 (setq symbol (- i ntokens)
2837 m (aref goto-map symbol)
2838 n (aref goto-map (1+ symbol))
2839 j m)
2840 (while (< j n)
2841 (setq state (aref from-state j))
2842 (aset goto-table state
2843 (cons (cons (aref tags i) (aref to-state j))
2844 (aref goto-table state)))
2845 (setq j (1+ j)))
2846 (setq i (1+ i)))
2847 goto-table))
2848
2849 (defsubst wisent-quote-p (sym)
2850 "Return non-nil if SYM is bound to the `quote' function."
2851 (condition-case nil
2852 (eq (indirect-function sym)
2853 (indirect-function 'quote))
2854 (error nil)))
2855
2856 (defsubst wisent-backquote-p (sym)
2857 "Return non-nil if SYM is bound to the `backquote' function."
2858 (condition-case nil
2859 (eq (indirect-function sym)
2860 (indirect-function 'backquote))
2861 (error nil)))
2862
2863 (defun wisent-check-$N (x m)
2864 "Return non-nil if X is a valid $N or $regionN symbol.
2865 That is if X is a $N or $regionN symbol with N >= 1 and N <= M.
2866 Also warn if X is a $N or $regionN symbol with N < 1 or N > M."
2867 (when (symbolp x)
2868 (let* ((n (symbol-name x))
2869 (i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n)
2870 (string-to-number (match-string 2 n)))))
2871 (when i
2872 (if (and (>= i 1) (<= i m))
2873 t
2874 (message
2875 "*** In %s, %s might be a free variable (rule has %s)"
2876 NAME x (format (cond ((< m 1) "no component")
2877 ((= m 1) "%d component")
2878 ("%d components"))
2879 m))
2880 nil)))))
2881
2882 (defun wisent-semantic-action-expand-body (body n &optional found)
2883 "Parse BODY of semantic action.
2884 N is the maximum number of $N variables that can be referenced in
2885 BODY. Warn on references out of permitted range.
2886 Optional argument FOUND is the accumulated list of '$N' references
2887 encountered so far.
2888 Return a cons (FOUND . XBODY), where FOUND is the list of $N
2889 references found in BODY, and XBODY is BODY expression with
2890 `backquote' forms expanded."
2891 (if (not (listp body))
2892 ;; BODY is an atom, no expansion needed
2893 (progn
2894 (if (wisent-check-$N body n)
2895 ;; Accumulate $i symbol
2896 (add-to-list 'found body))
2897 (cons found body))
2898 ;; BODY is a list, expand inside it
2899 (let (xbody sexpr)
2900 ;; If backquote expand it first
2901 (if (wisent-backquote-p (car body))
2902 (setq body (macroexpand body)))
2903 (while body
2904 (setq sexpr (car body)
2905 body (cdr body))
2906 (cond
2907 ;; Function call excepted quote expression
2908 ((and (consp sexpr)
2909 (not (wisent-quote-p (car sexpr))))
2910 (setq sexpr (wisent-semantic-action-expand-body sexpr n found)
2911 found (car sexpr)
2912 sexpr (cdr sexpr)))
2913 ;; $i symbol
2914 ((wisent-check-$N sexpr n)
2915 ;; Accumulate $i symbol
2916 (add-to-list 'found sexpr))
2917 )
2918 ;; Accumulate expanded forms
2919 (setq xbody (nconc xbody (list sexpr))))
2920 (cons found xbody))))
2921
2922 (defun wisent-semantic-action (r)
2923 "Set up the Elisp function for semantic action at rule R.
2924 On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the
2925 body of the semantic action, N is the maximum number of values
2926 available in the parser's stack, NTERM is the nonterminal the semantic
2927 action belongs to, and I is the index of the semantic action inside
2928 NTERM definition. Return the semantic action symbol.
2929 The semantic action function accepts three arguments:
2930
2931 - the state/value stack
2932 - the top-of-stack index
2933 - the goto table
2934
2935 And returns the updated top-of-stack index."
2936 (if (not (aref ruseful r))
2937 (aset rcode r nil)
2938 (let* ((actn (aref rcode r))
2939 (n (aref actn 1)) ; nb of val avail. in stack
2940 (NAME (apply 'format "%s:%d" (aref actn 2)))
2941 (form (wisent-semantic-action-expand-body (aref actn 0) n))
2942 ($l (car form)) ; list of $vars used in body
2943 (form (cdr form)) ; expanded form of body
2944 (nt (aref rlhs r)) ; nonterminal item no.
2945 (bl nil) ; `let*' binding list
2946 $v i j)
2947
2948 ;; Compute $N and $regionN bindings
2949 (setq i n)
2950 (while (> i 0)
2951 (setq j (1+ (* 2 (- n i))))
2952 ;; Only bind $regionI if used in action
2953 (setq $v (intern (format "$region%d" i)))
2954 (if (memq $v $l)
2955 (setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl)))
2956 ;; Only bind $I if used in action
2957 (setq $v (intern (format "$%d" i)))
2958 (if (memq $v $l)
2959 (setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl)))
2960 (setq i (1- i)))
2961
2962 ;; Compute J, the length of rule's RHS. It will give the
2963 ;; current parser state at STACK[SP - 2*J], and where to push
2964 ;; the new semantic value and the next state, respectively at:
2965 ;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N,
2966 ;; the maximum number of values available in the stack, is equal
2967 ;; to J. But, for mid-rule actions, N is the number of rule
2968 ;; elements before the action and J is always 0 (empty rule).
2969 (setq i (aref rrhs r)
2970 j 0)
2971 (while (> (aref ritem i) 0)
2972 (setq j (1+ j)
2973 i (1+ i)))
2974
2975 ;; Create the semantic action symbol.
2976 (setq actn (intern NAME (aref rcode 0)))
2977
2978 ;; Store source code in function cell of the semantic action
2979 ;; symbol. It will be byte-compiled at automaton's compilation
2980 ;; time. Using a byte-compiled automaton can significantly
2981 ;; speed up parsing!
2982 (fset actn
2983 `(lambda (,stack ,sp ,gotos)
2984 (let* (,@bl
2985 ($region
2986 ,(cond
2987 ((= n 1)
2988 (if (assq '$region1 bl)
2989 '$region1
2990 `(cdr (aref ,stack (1- ,sp)))))
2991 ((> n 1)
2992 `(wisent-production-bounds
2993 ,stack (- ,sp ,(1- (* 2 n))) (1- ,sp)))))
2994 ($action ,NAME)
2995 ($nterm ',(aref tags nt))
2996 ,@(and (> j 0) `((,sp (- ,sp ,(* j 2)))))
2997 (,state (cdr (assq $nterm
2998 (aref ,gotos
2999 (aref ,stack ,sp))))))
3000 (setq ,sp (+ ,sp 2))
3001 ;; push semantic value
3002 (aset ,stack (1- ,sp) (cons ,form $region))
3003 ;; push next state
3004 (aset ,stack ,sp ,state)
3005 ;; return new top of stack
3006 ,sp)))
3007
3008 ;; Return the semantic action symbol
3009 actn)))
3010 \f
3011 ;;;; ----------------------------
3012 ;;;; Build parser LALR automaton.
3013 ;;;; ----------------------------
3014
3015 (defun wisent-parser-automaton ()
3016 "Compute and return LALR(1) automaton from GRAMMAR.
3017 GRAMMAR is in internal format. GRAM/ACTS are grammar rules
3018 in internal format. STARTS defines the start symbols."
3019 ;; Check for useless stuff
3020 (wisent-reduce-grammar)
3021
3022 (wisent-set-derives)
3023 (wisent-set-nullable)
3024 ;; convert to nondeterministic finite state machine.
3025 (wisent-generate-states)
3026 ;; make it deterministic.
3027 (wisent-lalr)
3028 ;; Find and record any conflicts: places where one token of
3029 ;; lookahead is not enough to disambiguate the parsing. Also
3030 ;; resolve s/r conflicts based on precedence declarations.
3031 (wisent-resolve-conflicts)
3032 (wisent-print-results)
3033
3034 (vector (wisent-state-actions) ; action table
3035 (wisent-goto-actions) ; goto table
3036 start-table ; start symbols
3037 (aref rcode 0) ; sem. action symbol obarray
3038 )
3039 )
3040 \f
3041 ;;;; -------------------
3042 ;;;; Parse input grammar
3043 ;;;; -------------------
3044
3045 (defconst wisent-reserved-symbols (list wisent-error-term)
3046 "The list of reserved symbols.
3047 Also all symbols starting with a character defined in
3048 `wisent-reserved-capitals' are reserved for internal use.")
3049
3050 (defconst wisent-reserved-capitals '(?\$ ?\@)
3051 "The list of reserved capital letters.
3052 All symbol starting with one of these letters are reserved for
3053 internal use.")
3054
3055 (defconst wisent-starts-nonterm '$STARTS
3056 "Main start symbol.
3057 It gives the rules for start symbols.")
3058
3059 (defvar wisent-single-start-flag nil
3060 "Non-nil means allows only one start symbol like in Bison.
3061 That is don't add extra start rules to the grammar. This is
3062 useful to compare the Wisent's generated automaton with the Bison's
3063 one.")
3064
3065 (defsubst wisent-ISVALID-VAR (x)
3066 "Return non-nil if X is a character or an allowed symbol."
3067 (and x (symbolp x)
3068 (not (memq (aref (symbol-name x) 0) wisent-reserved-capitals))
3069 (not (memq x wisent-reserved-symbols))))
3070
3071 (defsubst wisent-ISVALID-TOKEN (x)
3072 "Return non-nil if X is a character or an allowed symbol."
3073 (or (wisent-char-p x)
3074 (wisent-ISVALID-VAR x)))
3075
3076 (defun wisent-push-token (symbol &optional nocheck)
3077 "Push a new SYMBOL in the list of tokens.
3078 Bypass checking if NOCHECK is non-nil."
3079 ;; Check
3080 (or nocheck (wisent-ISVALID-TOKEN symbol)
3081 (error "Invalid terminal symbol: %S" symbol))
3082 (if (memq symbol token-list)
3083 (message "*** duplicate terminal `%s' ignored" symbol)
3084 ;; Set up properties
3085 (wisent-set-prec symbol nil)
3086 (wisent-set-assoc symbol nil)
3087 (wisent-set-item-number symbol ntokens)
3088 ;; Add
3089 (setq ntokens (1+ ntokens)
3090 token-list (cons symbol token-list))))
3091
3092 (defun wisent-push-var (symbol &optional nocheck)
3093 "Push a new SYMBOL in the list of nonterminals.
3094 Bypass checking if NOCHECK is non-nil."
3095 ;; Check
3096 (unless nocheck
3097 (or (wisent-ISVALID-VAR symbol)
3098 (error "Invalid nonterminal symbol: %S" symbol))
3099 (if (memq symbol var-list)
3100 (error "Nonterminal `%s' already defined" symbol)))
3101 ;; Set up properties
3102 (wisent-set-item-number symbol nvars)
3103 ;; Add
3104 (setq nvars (1+ nvars)
3105 var-list (cons symbol var-list)))
3106
3107 (defun wisent-parse-nonterminals (defs)
3108 "Parse nonterminal definitions in DEFS.
3109 Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with
3110 respectively rule precedence level, semantic action code and
3111 usefulness flag. Return a list of rules of the form (LHS . RHS) where
3112 LHS and RHS are respectively the Left Hand Side and Right Hand Side of
3113 the rule."
3114 (setq rprec nil
3115 rcode nil
3116 nitems 0
3117 nrules 0)
3118 (let (def nonterm rlist rule rules rhs rest item items
3119 rhl plevel semact @n @count iactn)
3120 (setq @count 0)
3121 (while defs
3122 (setq def (car defs)
3123 defs (cdr defs)
3124 nonterm (car def)
3125 rlist (cdr def)
3126 iactn 0)
3127 (or (consp rlist)
3128 (error "Invalid nonterminal definition syntax: %S" def))
3129 (while rlist
3130 (setq rule (car rlist)
3131 rlist (cdr rlist)
3132 items (car rule)
3133 rest (cdr rule)
3134 rhl 0
3135 rhs nil)
3136
3137 ;; Check & count items
3138 (setq nitems (1+ nitems)) ;; LHS item
3139 (while items
3140 (setq item (car items)
3141 items (cdr items)
3142 nitems (1+ nitems)) ;; RHS items
3143 (if (listp item)
3144 ;; Mid-rule action
3145 (progn
3146 (setq @count (1+ @count)
3147 @n (intern (format "@%d" @count)))
3148 (wisent-push-var @n t)
3149 ;; Push a new empty rule with the mid-rule action
3150 (setq semact (vector item rhl (list nonterm iactn))
3151 iactn (1+ iactn)
3152 plevel nil
3153 rcode (cons semact rcode)
3154 rprec (cons plevel rprec)
3155 item @n ;; Replace action by @N nonterminal
3156 rules (cons (list item) rules)
3157 nitems (1+ nitems)
3158 nrules (1+ nrules)))
3159 ;; Check terminal or nonterminal symbol
3160 (cond
3161 ((or (memq item token-list) (memq item var-list)))
3162 ;; Create new literal character token
3163 ((wisent-char-p item) (wisent-push-token item t))
3164 ((error "Symbol `%s' is used, but is not defined as a token and has no rules"
3165 item))))
3166 (setq rhl (1+ rhl)
3167 rhs (cons item rhs)))
3168
3169 ;; Check & collect rule precedence level
3170 (setq plevel (when (vectorp (car rest))
3171 (setq item (car rest)
3172 rest (cdr rest))
3173 (if (and (= (length item) 1)
3174 (memq (aref item 0) token-list)
3175 (wisent-prec (aref item 0)))
3176 (wisent-item-number (aref item 0))
3177 (error "Invalid rule precedence level syntax: %S" item)))
3178 rprec (cons plevel rprec))
3179
3180 ;; Check & collect semantic action body
3181 (setq semact (vector
3182 (if rest
3183 (if (cdr rest)
3184 (error "Invalid semantic action syntax: %S" rest)
3185 (car rest))
3186 ;; Give a default semantic action body: nil
3187 ;; for an empty rule or $1, the value of the
3188 ;; first symbol in the rule, otherwise.
3189 (if (> rhl 0) '$1 '()))
3190 rhl
3191 (list nonterm iactn))
3192 iactn (1+ iactn)
3193 rcode (cons semact rcode))
3194 (setq rules (cons (cons nonterm (nreverse rhs)) rules)
3195 nrules (1+ nrules))))
3196
3197 (setq ruseful (make-vector (1+ nrules) t)
3198 rprec (vconcat (cons nil (nreverse rprec)))
3199 rcode (vconcat (cons nil (nreverse rcode))))
3200 (nreverse rules)
3201 ))
3202
3203 (defun wisent-parse-grammar (grammar &optional start-list)
3204 "Parse GRAMMAR and build a suitable internal representation.
3205 Optional argument START-LIST defines the start symbols.
3206 GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS)
3207
3208 TOKENS is a list of terminal symbols (tokens).
3209
3210 ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3211 describing the associativity of TOKENS. ASSOC-TYPE must be one of the
3212 `default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE
3213 is `default-prec', ASSOC-VALUE must be nil or t (the default).
3214 Otherwise it is a list of tokens which must have been previously
3215 declared in TOKENS.
3216
3217 NONTERMS is the list of non terminal definitions (see function
3218 `wisent-parse-nonterminals')."
3219 (or (and (consp grammar) (> (length grammar) 2))
3220 (error "Bad input grammar"))
3221
3222 (let (i r rhs pre dpre lst start-var assoc rules item
3223 token var def tokens defs ep-token ep-var ep-def)
3224
3225 ;; Built-in tokens
3226 (setq ntokens 0 nvars 0)
3227 (wisent-push-token wisent-eoi-term t)
3228 (wisent-push-token wisent-error-term t)
3229
3230 ;; Check/collect terminals
3231 (setq lst (car grammar))
3232 (while lst
3233 (wisent-push-token (car lst))
3234 (setq lst (cdr lst)))
3235
3236 ;; Check/Set up tokens precedence & associativity
3237 (setq lst (nth 1 grammar)
3238 pre 0
3239 defs nil
3240 dpre nil
3241 default-prec t)
3242 (while lst
3243 (setq def (car lst)
3244 assoc (car def)
3245 tokens (cdr def)
3246 lst (cdr lst))
3247 (if (eq assoc 'default-prec)
3248 (progn
3249 (or (null (cdr tokens))
3250 (memq (car tokens) '(t nil))
3251 (error "Invalid default-prec value: %S" tokens))
3252 (setq default-prec (car tokens))
3253 (if dpre
3254 (message "*** redefining default-prec to %s"
3255 default-prec))
3256 (setq dpre t))
3257 (or (memq assoc '(left right nonassoc))
3258 (error "Invalid associativity syntax: %S" assoc))
3259 (setq pre (1+ pre))
3260 (while tokens
3261 (setq token (car tokens)
3262 tokens (cdr tokens))
3263 (if (memq token defs)
3264 (message "*** redefining precedence of `%s'" token))
3265 (or (memq token token-list)
3266 ;; Define token not previously declared.
3267 (wisent-push-token token))
3268 (setq defs (cons token defs))
3269 ;; Record the precedence and associativity of the terminal.
3270 (wisent-set-prec token pre)
3271 (wisent-set-assoc token assoc))))
3272
3273 ;; Check/Collect nonterminals
3274 (setq lst (nthcdr 2 grammar)
3275 defs nil)
3276 (while lst
3277 (setq def (car lst)
3278 lst (cdr lst))
3279 (or (consp def)
3280 (error "Invalid nonterminal definition: %S" def))
3281 (if (memq (car def) token-list)
3282 (error "Nonterminal `%s' already defined as token" (car def)))
3283 (wisent-push-var (car def))
3284 (setq defs (cons def defs)))
3285 (or defs
3286 (error "No input grammar"))
3287 (setq defs (nreverse defs))
3288
3289 ;; Set up the start symbol.
3290 (setq start-table nil)
3291 (cond
3292
3293 ;; 1. START-LIST is nil, the start symbol is the first
3294 ;; nonterminal defined in the grammar (Bison like).
3295 ((null start-list)
3296 (setq start-var (caar defs)))
3297
3298 ;; 2. START-LIST contains only one element, it is the start
3299 ;; symbol (Bison like).
3300 ((or wisent-single-start-flag (null (cdr start-list)))
3301 (setq start-var (car start-list))
3302 (or (assq start-var defs)
3303 (error "Start symbol `%s' has no rule" start-var)))
3304
3305 ;; 3. START-LIST contains more than one element. All defines
3306 ;; potential start symbols. One of them (the first one by
3307 ;; default) will be given at parse time to be the parser goal.
3308 ;; If `wisent-single-start-flag' is non-nil that feature is
3309 ;; disabled and the first nonterminal in START-LIST defines
3310 ;; the start symbol, like in case 2 above.
3311 ((not wisent-single-start-flag)
3312
3313 ;; START-LIST is a list of nonterminals '(nt0 ... ntN).
3314 ;; Build and push ad hoc start rules in the grammar:
3315
3316 ;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1))
3317 ;; ($nt1 (($$nt1 nt1) $2))
3318 ;; ...
3319 ;; ($ntN (($$ntN ntN) $2))
3320
3321 ;; Where internal symbols $ntI and $$ntI are respectively
3322 ;; nonterminals and terminals.
3323
3324 ;; The internal start symbol $STARTS is used to build the
3325 ;; LALR(1) automaton. The true default start symbol used by the
3326 ;; parser is the first nonterminal in START-LIST (nt0).
3327 (setq start-var wisent-starts-nonterm
3328 lst (nreverse start-list))
3329 (while lst
3330 (setq var (car lst)
3331 lst (cdr lst))
3332 (or (memq var var-list)
3333 (error "Start symbol `%s' has no rule" var))
3334 (unless (assq var start-table) ;; Ignore duplicates
3335 ;; For each nt start symbol
3336 (setq ep-var (intern (format "$%s" var))
3337 ep-token (intern (format "$$%s" var)))
3338 (wisent-push-token ep-token t)
3339 (wisent-push-var ep-var t)
3340 (setq
3341 ;; Add entry (nt . $$nt) to start-table
3342 start-table (cons (cons var ep-token) start-table)
3343 ;; Add rule ($nt (($$nt nt) $2))
3344 defs (cons (list ep-var (list (list ep-token var) '$2)) defs)
3345 ;; Add start rule (($nt) $1)
3346 ep-def (cons (list (list ep-var) '$1) ep-def))
3347 ))
3348 (wisent-push-var start-var t)
3349 (setq defs (cons (cons start-var ep-def) defs))))
3350
3351 ;; Set up rules main data structure & RPREC, RCODE, RUSEFUL
3352 (setq rules (wisent-parse-nonterminals defs))
3353
3354 ;; Set up the terminal & nonterminal lists.
3355 (setq nsyms (+ ntokens nvars)
3356 token-list (nreverse token-list)
3357 lst var-list
3358 var-list nil)
3359 (while lst
3360 (setq var (car lst)
3361 lst (cdr lst)
3362 var-list (cons var var-list))
3363 (wisent-set-item-number ;; adjust nonterminal item number to
3364 var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS
3365
3366 ;; Store special item numbers
3367 (setq error-token-number (wisent-item-number wisent-error-term)
3368 start-symbol (wisent-item-number start-var))
3369
3370 ;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol
3371 ;; associated to item number I.
3372 (setq tags (vconcat token-list var-list))
3373 ;; Set up RLHS RRHS & RITEM data structures from list of rules
3374 ;; (LHS . RHS) received from `wisent-parse-nonterminals'.
3375 (setq rlhs (make-vector (1+ nrules) nil)
3376 rrhs (make-vector (1+ nrules) nil)
3377 ritem (make-vector (1+ nitems) nil)
3378 i 0
3379 r 1)
3380 (while rules
3381 (aset rlhs r (wisent-item-number (caar rules)))
3382 (aset rrhs r i)
3383 (setq rhs (cdar rules)
3384 pre nil)
3385 (while rhs
3386 (setq item (wisent-item-number (car rhs)))
3387 ;; Get default precedence level of rule, that is the
3388 ;; precedence of the last terminal in it.
3389 (and (wisent-ISTOKEN item)
3390 default-prec
3391 (setq pre item))
3392
3393 (aset ritem i item)
3394 (setq i (1+ i)
3395 rhs (cdr rhs)))
3396 ;; Setup the precedence level of the rule, that is the one
3397 ;; specified by %prec or the default one.
3398 (and (not (aref rprec r)) ;; Already set by %prec
3399 pre
3400 (wisent-prec (aref tags pre))
3401 (aset rprec r pre))
3402 (aset ritem i (- r))
3403 (setq i (1+ i)
3404 r (1+ r))
3405 (setq rules (cdr rules)))
3406 ))
3407 \f
3408 ;;;; ---------------------
3409 ;;;; Compile input grammar
3410 ;;;; ---------------------
3411
3412 (defun wisent-compile-grammar (grammar &optional start-list)
3413 "Compile the LALR(1) GRAMMAR.
3414
3415 GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where:
3416
3417 - TOKENS is a list of terminal symbols (tokens).
3418
3419 - ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3420 describing the associativity of TOKENS. ASSOC-TYPE must be one of
3421 the `default-prec' `nonassoc', `left' or `right' symbols. When
3422 ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the
3423 default). Otherwise it is a list of tokens which must have been
3424 previously declared in TOKENS.
3425
3426 - NONTERMS is a list of nonterminal definitions.
3427
3428 Optional argument START-LIST specify the possible grammar start
3429 symbols. This is a list of nonterminals which must have been
3430 previously declared in GRAMMAR's NONTERMS form. By default, the start
3431 symbol is the first nonterminal defined. When START-LIST contains
3432 only one element, it is the start symbol. Otherwise, all elements are
3433 possible start symbols, unless `wisent-single-start-flag' is non-nil.
3434 In that case, the first element is the start symbol, and others are
3435 ignored.
3436
3437 Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS]
3438 where:
3439
3440 - ACTIONS is a state/token matrix telling the parser what to do at
3441 every state based on the current lookahead token. That is shift,
3442 reduce, accept or error.
3443
3444 - GOTOS is a state/nonterminal matrix telling the parser the next
3445 state to go to after reducing with each rule.
3446
3447 - STARTS is an alist which maps the allowed start nonterminal symbols
3448 to tokens that will be first shifted into the parser stack.
3449
3450 - FUNCTIONS is an obarray of semantic action symbols. Each symbol's
3451 function definition is the semantic action lambda expression."
3452 (if (wisent-automaton-p grammar)
3453 grammar ;; Grammar already compiled just return it
3454 (wisent-with-context compile-grammar
3455 (let* ((gc-cons-threshold 1000000))
3456 (garbage-collect)
3457 (setq wisent-new-log-flag t)
3458 ;; Parse input grammar
3459 (wisent-parse-grammar grammar start-list)
3460 ;; Generate the LALR(1) automaton
3461 (wisent-parser-automaton)))))
3462 \f
3463 ;;;; --------------------------
3464 ;;;; Byte compile input grammar
3465 ;;;; --------------------------
3466
3467 (require 'bytecomp)
3468
3469 (defun wisent-byte-compile-grammar (form)
3470 "Byte compile the `wisent-compile-grammar' FORM.
3471 Automatically called by the Emacs Lisp byte compiler as a
3472 `byte-compile' handler."
3473 ;; Eval the `wisent-compile-grammar' form to obtain an LALR
3474 ;; automaton internal data structure. Then, because the internal
3475 ;; data structure contains an obarray, convert it to a lisp form so
3476 ;; it can be byte-compiled.
3477 (byte-compile-form
3478 ;; FIXME: we macroexpand here since `byte-compile-form' expects
3479 ;; macroexpanded code, but that's just a workaround: for lexical-binding
3480 ;; the lisp form should have to pass through closure-conversion and
3481 ;; `wisent-byte-compile-grammar' is called much too late for that.
3482 ;; Why isn't this `wisent-automaton-lisp-form' performed at
3483 ;; macroexpansion time? --Stef
3484 (macroexpand-all
3485 (wisent-automaton-lisp-form (eval form)))))
3486
3487 ;; FIXME: We shouldn't use a `byte-compile' handler. Maybe using a hash-table
3488 ;; instead of an obarray would work around the problem that obarrays
3489 ;; aren't printable. Then (put 'wisent-compile-grammar 'side-effect-free t).
3490 (put 'wisent-compile-grammar 'byte-compile 'wisent-byte-compile-grammar)
3491
3492 (defun wisent-automaton-lisp-form (automaton)
3493 "Return a Lisp form that produces AUTOMATON.
3494 See also `wisent-compile-grammar' for more details on AUTOMATON."
3495 (or (wisent-automaton-p automaton)
3496 (signal 'wrong-type-argument
3497 (list 'wisent-automaton-p automaton)))
3498 (let ((obn (make-symbol "ob")) ; Generated obarray name
3499 (obv (aref automaton 3)) ; Semantic actions obarray
3500 )
3501 `(let ((,obn (make-vector 13 0)))
3502 ;; Generate code to initialize the semantic actions obarray,
3503 ;; in local variable OBN.
3504 ,@(let (obcode)
3505 (mapatoms
3506 #'(lambda (s)
3507 (setq obcode
3508 (cons `(fset (intern ,(symbol-name s) ,obn)
3509 #',(symbol-function s))
3510 obcode)))
3511 obv)
3512 obcode)
3513 ;; Generate code to create the automaton.
3514 (vector
3515 ;; In code generated to initialize the action table, take
3516 ;; care of symbols that are interned in the semantic actions
3517 ;; obarray.
3518 (vector
3519 ,@(mapcar
3520 #'(lambda (state) ;; for each state
3521 `(list
3522 ,@(mapcar
3523 #'(lambda (tr) ;; for each transition
3524 (let ((k (car tr)) ; token
3525 (a (cdr tr))) ; action
3526 (if (and (symbolp a)
3527 (intern-soft (symbol-name a) obv))
3528 `(cons ,(if (symbolp k) `(quote ,k) k)
3529 (intern-soft ,(symbol-name a) ,obn))
3530 `(quote ,tr))))
3531 state)))
3532 (aref automaton 0)))
3533 ;; The code of the goto table is unchanged.
3534 ,(aref automaton 1)
3535 ;; The code of the alist of start symbols is unchanged.
3536 ',(aref automaton 2)
3537 ;; The semantic actions obarray is in the local variable OBN.
3538 ,obn))))
3539
3540 (provide 'semantic/wisent/comp)
3541
3542 ;;; semantic/wisent/comp.el ends here