]> code.delx.au - gnu-emacs/blob - lisp/cedet/semantic/wisent/comp.el
585c11a05d39984cba8fbdbb56c8c2e1377640b6
[gnu-emacs] / lisp / cedet / semantic / wisent / comp.el
1 ;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler
2
3 ;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000-2007, 2009-2015 Free
4 ;; Software Foundation, Inc.
5
6 ;; Author: David Ponce <david@dponce.com>
7 ;; Maintainer: David Ponce <david@dponce.com>
8 ;; Created: 30 January 2002
9 ;; Keywords: syntax
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25
26 ;;; Commentary:
27 ;;
28 ;; Grammar compiler that produces Wisent's LALR automatons.
29 ;;
30 ;; Wisent (the European Bison ;-) is an Elisp implementation of the
31 ;; GNU Compiler Compiler Bison. The Elisp code is a port of the C
32 ;; code of GNU Bison 1.28 & 1.31.
33 ;;
34 ;; For more details on the basic concepts for understanding Wisent,
35 ;; read the Bison manual ;)
36 ;;
37 ;; For more details on Wisent itself read the Wisent manual.
38
39 ;;; History:
40 ;;
41
42 ;;; Code:
43 (require 'semantic/wisent)
44 (eval-when-compile (require 'cl))
45 \f
46 ;;;; -------------------
47 ;;;; Misc. useful things
48 ;;;; -------------------
49
50 ;; As much as possible I would like to keep the name of global
51 ;; variables used in Bison without polluting too much the Elisp global
52 ;; name space. Elisp dynamic binding allows that ;-)
53
54 ;; Here are simple macros to easily define and use set of variables
55 ;; bound locally, without all these "reference to free variable"
56 ;; compiler warnings!
57
58 (defmacro wisent-context-name (name)
59 "Return the context name from NAME."
60 `(if (and ,name (symbolp ,name))
61 (intern (format "wisent-context-%s" ,name))
62 (error "Invalid context name: %S" ,name)))
63
64 (defmacro wisent-context-bindings (name)
65 "Return the variables in context NAME."
66 `(symbol-value (wisent-context-name ,name)))
67
68 (defmacro wisent-defcontext (name &rest vars)
69 "Define a context NAME that will bind variables VARS."
70 (declare (indent 1))
71 (let* ((context (wisent-context-name name))
72 (declarations (mapcar #'(lambda (v) (list 'defvar v)) vars)))
73 `(progn
74 ,@declarations
75 (eval-when-compile
76 (defvar ,context ',vars)))))
77
78 (defmacro wisent-with-context (name &rest body)
79 "Bind variables in context NAME then eval BODY."
80 (declare (indent 1))
81 (let ((bindings (wisent-context-bindings name)))
82 `(progn
83 ,@(mapcar (lambda (binding) `(defvar ,(or (car-safe binding) binding)))
84 bindings)
85 (let* ,bindings
86 ,@body))))
87
88 ;; A naive implementation of data structures! But it suffice here ;-)
89
90 (defmacro wisent-struct (name &rest fields)
91 "Define a simple data structure called NAME.
92 Which contains data stored in FIELDS. FIELDS is a list of symbols
93 which are field names or pairs (FIELD INITIAL-VALUE) where
94 INITIAL-VALUE is a constant used as the initial value of FIELD when
95 the data structure is created. INITIAL-VALUE defaults to nil.
96
97 This defines a `make-NAME' constructor, get-able `NAME-FIELD' and
98 set-able `set-NAME-FIELD' accessors."
99 (let ((size (length fields))
100 (i 0)
101 accors field sufx fun ivals)
102 (while (< i size)
103 (setq field (car fields)
104 fields (cdr fields))
105 (if (consp field)
106 (setq ivals (cons (cadr field) ivals)
107 field (car field))
108 (setq ivals (cons nil ivals)))
109 (setq sufx (format "%s-%s" name field)
110 fun (intern (format "%s" sufx))
111 accors (cons `(defmacro ,fun (s)
112 (list 'aref s ,i))
113 accors)
114 fun (intern (format "set-%s" sufx))
115 accors (cons `(defmacro ,fun (s v)
116 (list 'aset s ,i v))
117 accors)
118 i (1+ i)))
119 `(progn
120 (defmacro ,(intern (format "make-%s" name)) ()
121 (cons 'vector ',(nreverse ivals)))
122 ,@accors)))
123 (put 'wisent-struct 'lisp-indent-function 1)
124
125 ;; Other utilities
126
127 (defsubst wisent-pad-string (s n &optional left)
128 "Fill string S with spaces.
129 Return a new string of at least N characters. Insert spaces on right.
130 If optional LEFT is non-nil insert spaces on left."
131 (let ((i (length s)))
132 (if (< i n)
133 (if left
134 (concat (make-string (- n i) ?\ ) s)
135 (concat s (make-string (- n i) ?\ )))
136 s)))
137 \f
138 ;;;; ------------------------
139 ;;;; Environment dependencies
140 ;;;; ------------------------
141
142 (defconst wisent-BITS-PER-WORD
143 (let ((i 1)
144 (do-shift (if (boundp 'most-positive-fixnum)
145 (lambda (i) (lsh most-positive-fixnum (- i)))
146 (lambda (i) (lsh 1 i)))))
147 (while (not (zerop (funcall do-shift i)))
148 (setq i (1+ i)))
149 i))
150
151 (defsubst wisent-WORDSIZE (n)
152 "(N + BITS-PER-WORD - 1) / BITS-PER-WORD."
153 (/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD))
154
155 (defsubst wisent-SETBIT (x i)
156 "X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)."
157 (let ((k (/ i wisent-BITS-PER-WORD)))
158 (aset x k (logior (aref x k)
159 (lsh 1 (% i wisent-BITS-PER-WORD))))))
160
161 (defsubst wisent-RESETBIT (x i)
162 "X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))."
163 (let ((k (/ i wisent-BITS-PER-WORD)))
164 (aset x k (logand (aref x k)
165 (lognot (lsh 1 (% i wisent-BITS-PER-WORD)))))))
166
167 (defsubst wisent-BITISSET (x i)
168 "(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0."
169 (not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD))
170 (lsh 1 (% i wisent-BITS-PER-WORD))))))
171
172 (defsubst wisent-noninteractive ()
173 "Return non-nil if running without interactive terminal."
174 (if (featurep 'xemacs)
175 (noninteractive)
176 noninteractive))
177
178 (defvar wisent-debug-flag nil
179 "Non-nil means enable some debug stuff.")
180 \f
181 ;;;; --------------
182 ;;;; Logging/Output
183 ;;;; --------------
184 (defconst wisent-log-buffer-name "*wisent-log*"
185 "Name of the log buffer.")
186
187 (defvar wisent-new-log-flag nil
188 "Non-nil means to start a new report.")
189
190 (defvar wisent-verbose-flag nil
191 "*Non-nil means to report verbose information on generated parser.")
192
193 (defun wisent-toggle-verbose-flag ()
194 "Toggle whether to report verbose information on generated parser."
195 (interactive)
196 (setq wisent-verbose-flag (not wisent-verbose-flag))
197 (when (called-interactively-p 'interactive)
198 (message "Verbose report %sabled"
199 (if wisent-verbose-flag "en" "dis"))))
200
201 (defmacro wisent-log-buffer ()
202 "Return the log buffer.
203 Its name is defined in constant `wisent-log-buffer-name'."
204 `(get-buffer-create wisent-log-buffer-name))
205
206 (defmacro wisent-clear-log ()
207 "Delete the entire contents of the log buffer."
208 `(with-current-buffer (wisent-log-buffer)
209 (erase-buffer)))
210
211 (defvar byte-compile-current-file)
212
213 (defun wisent-source ()
214 "Return the current source file name or nil."
215 (let ((source (or (and (boundp 'byte-compile-current-file)
216 byte-compile-current-file)
217 load-file-name (buffer-file-name))))
218 (if source
219 (file-relative-name source))))
220
221 (defun wisent-new-log ()
222 "Start a new entry into the log buffer."
223 (setq wisent-new-log-flag nil)
224 (let ((text (format "\n\n*** Wisent %s - %s\n\n"
225 (or (wisent-source) (buffer-name))
226 (format-time-string "%Y-%m-%d %R"))))
227 (with-current-buffer (wisent-log-buffer)
228 (goto-char (point-max))
229 (insert text))))
230
231 (defsubst wisent-log (&rest args)
232 "Insert text into the log buffer.
233 `format-message' is applied to ARGS and the result string is inserted into the
234 log buffer returned by the function `wisent-log-buffer'."
235 (and wisent-new-log-flag (wisent-new-log))
236 (with-current-buffer (wisent-log-buffer)
237 (insert (apply #'format-message args))))
238
239 (defconst wisent-log-file "wisent.output"
240 "The log file.
241 Used when running without interactive terminal.")
242
243 (defun wisent-append-to-log-file ()
244 "Append contents of logging buffer to `wisent-log-file'."
245 (if (get-buffer wisent-log-buffer-name)
246 (condition-case err
247 (with-current-buffer (wisent-log-buffer)
248 (widen)
249 (if (> (point-max) (point-min))
250 (write-region (point-min) (point-max)
251 wisent-log-file t)))
252 (error
253 (message "*** %s" (error-message-string err))))))
254 \f
255 ;;;; -----------------------------------
256 ;;;; Representation of the grammar rules
257 ;;;; -----------------------------------
258
259 ;; ntokens is the number of tokens, and nvars is the number of
260 ;; variables (nonterminals). nsyms is the total number, ntokens +
261 ;; nvars.
262
263 ;; Each symbol (either token or variable) receives a symbol number.
264 ;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are
265 ;; for variables. Symbol number zero is the end-of-input token. This
266 ;; token is counted in ntokens.
267
268 ;; The rules receive rule numbers 1 to nrules in the order they are
269 ;; written. Actions and guards are accessed via the rule number.
270
271 ;; The rules themselves are described by three arrays: rrhs, rlhs and
272 ;; ritem. rlhs[R] is the symbol number of the left hand side of rule
273 ;; R. The right hand side is stored as symbol numbers in a portion of
274 ;; ritem. rrhs[R] contains the index in ritem of the beginning of the
275 ;; portion for rule R.
276
277 ;; The length of the portion is one greater than the number of symbols
278 ;; in the rule's right hand side. The last element in the portion
279 ;; contains minus R, which identifies it as the end of a portion and
280 ;; says which rule it is for.
281
282 ;; The portions of ritem come in order of increasing rule number and
283 ;; are followed by an element which is nil to mark the end. nitems is
284 ;; the total length of ritem, not counting the final nil. Each
285 ;; element of ritem is called an "item" and its index in ritem is an
286 ;; item number.
287
288 ;; Item numbers are used in the finite state machine to represent
289 ;; places that parsing can get to.
290
291 ;; The vector rprec contains for each rule, the item number of the
292 ;; symbol giving its precedence level to this rule. The precedence
293 ;; level and associativity of each symbol is recorded in respectively
294 ;; the properties 'wisent--prec and 'wisent--assoc.
295
296 ;; Precedence levels are assigned in increasing order starting with 1
297 ;; so that numerically higher precedence values mean tighter binding
298 ;; as they ought to. nil as a symbol or rule's precedence means none
299 ;; is assigned.
300
301 (defcustom wisent-state-table-size 1009
302 "The size of the state table."
303 :type 'integer
304 :group 'wisent)
305
306 ;; These variables only exist locally in the function
307 ;; `wisent-compile-grammar' and are shared by all other nested
308 ;; callees.
309 (wisent-defcontext compile-grammar
310 F LA LAruleno accessing-symbol conflicts consistent default-prec
311 derives err-table fderives final-state first-reduction first-shift
312 first-state firsts from-state goto-map includes itemset nitemset
313 kernel-base kernel-end kernel-items last-reduction last-shift
314 last-state lookaheads lookaheadset lookback maxrhs ngotos nitems
315 nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset
316 reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful
317 rcode ruleset rulesetsize shift-symbol shift-table shiftset
318 src-count src-total start-table state-table tags this-state to-state
319 tokensetsize ;; nb of words req. to hold a bit for each rule
320 varsetsize ;; nb of words req. to hold a bit for each variable
321 error-token-number start-symbol token-list var-list
322 N P V V1 nuseless-nonterminals nuseless-productions
323 ptable ;; symbols & characters properties
324 )
325
326 (defmacro wisent-ISTOKEN (s)
327 "Return non-nil if item number S defines a token (terminal).
328 That is if S < `ntokens'."
329 `(< ,s ntokens))
330
331 (defmacro wisent-ISVAR(s)
332 "Return non-nil if item number S defines a nonterminal.
333 That is if S >= `ntokens'."
334 `(>= ,s ntokens))
335
336 (defsubst wisent-tag (s)
337 "Return printable form of item number S."
338 (wisent-item-to-string (aref tags s)))
339
340 ;; Symbol and character properties
341
342 (defsubst wisent-put (object propname value)
343 "Store OBJECT's PROPNAME property with value VALUE.
344 Use `eq' to locate OBJECT."
345 (let ((entry (assq object ptable)))
346 (or entry (setq entry (list object) ptable (cons entry ptable)))
347 (setcdr entry (plist-put (cdr entry) propname value))))
348
349 (defsubst wisent-get (object propname)
350 "Return the value of OBJECT's PROPNAME property.
351 Use `eq' to locate OBJECT."
352 (plist-get (cdr (assq object ptable)) propname))
353
354 (defsubst wisent-item-number (x)
355 "Return the item number of symbol X."
356 (wisent-get x 'wisent--item-no))
357
358 (defsubst wisent-set-item-number (x n)
359 "Set the item number of symbol X to N."
360 (wisent-put x 'wisent--item-no n))
361
362 (defsubst wisent-assoc (x)
363 "Return the associativity of symbol X."
364 (wisent-get x 'wisent--assoc))
365
366 (defsubst wisent-set-assoc (x a)
367 "Set the associativity of symbol X to A."
368 (wisent-put x 'wisent--assoc a))
369
370 (defsubst wisent-prec (x)
371 "Return the precedence level of symbol X."
372 (wisent-get x 'wisent--prec))
373
374 (defsubst wisent-set-prec (x p)
375 "Set the precedence level of symbol X to P."
376 (wisent-put x 'wisent--prec p))
377 \f
378 ;;;; ----------------------------------------------------------
379 ;;;; Type definitions for nondeterministic finite state machine
380 ;;;; ----------------------------------------------------------
381
382 ;; These type definitions are used to represent a nondeterministic
383 ;; finite state machine that parses the specified grammar. This
384 ;; information is generated by the function `wisent-generate-states'.
385
386 ;; Each state of the machine is described by a set of items --
387 ;; particular positions in particular rules -- that are the possible
388 ;; places where parsing could continue when the machine is in this
389 ;; state. These symbols at these items are the allowable inputs that
390 ;; can follow now.
391
392 ;; A core represents one state. States are numbered in the number
393 ;; field. When `wisent-generate-states' is finished, the starting
394 ;; state is state 0 and `nstates' is the number of states. (A
395 ;; transition to a state whose state number is `nstates' indicates
396 ;; termination.) All the cores are chained together and `first-state'
397 ;; points to the first one (state 0).
398
399 ;; For each state there is a particular symbol which must have been
400 ;; the last thing accepted to reach that state. It is the
401 ;; accessing-symbol of the core.
402
403 ;; Each core contains a vector of `nitems' items which are the indices
404 ;; in the `ritems' vector of the items that are selected in this
405 ;; state.
406
407 ;; The link field is used for chaining buckets that hash states by
408 ;; their itemsets. This is for recognizing equivalent states and
409 ;; combining them when the states are generated.
410
411 ;; The two types of transitions are shifts (push the lookahead token
412 ;; and read another) and reductions (combine the last n things on the
413 ;; stack via a rule, replace them with the symbol that the rule
414 ;; derives, and leave the lookahead token alone). When the states are
415 ;; generated, these transitions are represented in two other lists.
416
417 ;; Each shifts structure describes the possible shift transitions out
418 ;; of one state, the state whose number is in the number field. The
419 ;; shifts structures are linked through next and first-shift points to
420 ;; them. Each contains a vector of numbers of the states that shift
421 ;; transitions can go to. The accessing-symbol fields of those
422 ;; states' cores say what kind of input leads to them.
423
424 ;; A shift to state zero should be ignored. Conflict resolution
425 ;; deletes shifts by changing them to zero.
426
427 ;; Each reductions structure describes the possible reductions at the
428 ;; state whose number is in the number field. The data is a list of
429 ;; nreds rules, represented by their rule numbers. `first-reduction'
430 ;; points to the list of these structures.
431
432 ;; Conflict resolution can decide that certain tokens in certain
433 ;; states should explicitly be errors (for implementing %nonassoc).
434 ;; For each state, the tokens that are errors for this reason are
435 ;; recorded in an errs structure, which has the state number in its
436 ;; number field. The rest of the errs structure is full of token
437 ;; numbers.
438
439 ;; There is at least one shift transition present in state zero. It
440 ;; leads to a next-to-final state whose accessing-symbol is the
441 ;; grammar's start symbol. The next-to-final state has one shift to
442 ;; the final state, whose accessing-symbol is zero (end of input).
443 ;; The final state has one shift, which goes to the termination state
444 ;; (whose number is `nstates'-1).
445 ;; The reason for the extra state at the end is to placate the
446 ;; parser's strategy of making all decisions one token ahead of its
447 ;; actions.
448
449 (wisent-struct core
450 next ; -> core
451 link ; -> core
452 (number 0)
453 (accessing-symbol 0)
454 (nitems 0)
455 (items [0]))
456
457 (wisent-struct shifts
458 next ; -> shifts
459 (number 0)
460 (nshifts 0)
461 (shifts [0]))
462
463 (wisent-struct reductions
464 next ; -> reductions
465 (number 0)
466 (nreds 0)
467 (rules [0]))
468
469 (wisent-struct errs
470 (nerrs 0)
471 (errs [0]))
472 \f
473 ;;;; --------------------------------------------------------
474 ;;;; Find unreachable terminals, nonterminals and productions
475 ;;;; --------------------------------------------------------
476
477 (defun wisent-bits-equal (L R n)
478 "Visit L and R and return non-nil if their first N elements are `='.
479 L and R must be vectors of integers."
480 (let* ((i (1- n))
481 (iseq t))
482 (while (and iseq (natnump i))
483 (setq iseq (= (aref L i) (aref R i))
484 i (1- i)))
485 iseq))
486
487 (defun wisent-nbits (i)
488 "Return number of bits set in integer I."
489 (let ((count 0))
490 (while (not (zerop i))
491 ;; i ^= (i & ((unsigned) (-(int) i)))
492 (setq i (logxor i (logand i (- i)))
493 count (1+ count)))
494 count))
495
496 (defun wisent-bits-size (S n)
497 "In vector S count the total of bits set in first N elements.
498 S must be a vector of integers."
499 (let* ((i (1- n))
500 (count 0))
501 (while (natnump i)
502 (setq count (+ count (wisent-nbits (aref S i)))
503 i (1- i)))
504 count))
505
506 (defun wisent-useful-production (i N0)
507 "Return non-nil if production I is in useful set N0."
508 (let* ((useful t)
509 (r (aref rrhs i))
510 n)
511 (while (and useful (> (setq n (aref ritem r)) 0))
512 (if (wisent-ISVAR n)
513 (setq useful (wisent-BITISSET N0 (- n ntokens))))
514 (setq r (1+ r)))
515 useful))
516
517 (defun wisent-useless-nonterminals ()
518 "Find out which nonterminals are used."
519 (let (Np Ns i n break)
520 ;; N is set as built. Np is set being built this iteration. P is
521 ;; set of all productions which have a RHS all in N.
522 (setq n (wisent-WORDSIZE nvars)
523 Np (make-vector n 0))
524
525 ;; The set being computed is a set of nonterminals which can
526 ;; derive the empty string or strings consisting of all
527 ;; terminals. At each iteration a nonterminal is added to the set
528 ;; if there is a production with that nonterminal as its LHS for
529 ;; which all the nonterminals in its RHS are already in the set.
530 ;; Iterate until the set being computed remains unchanged. Any
531 ;; nonterminals not in the set at that point are useless in that
532 ;; they will never be used in deriving a sentence of the language.
533
534 ;; This iteration doesn't use any special traversal over the
535 ;; productions. A set is kept of all productions for which all
536 ;; the nonterminals in the RHS are in useful. Only productions
537 ;; not in this set are scanned on each iteration. At the end,
538 ;; this set is saved to be used when finding useful productions:
539 ;; only productions in this set will appear in the final grammar.
540
541 (while (not break)
542 (setq i (1- n))
543 (while (natnump i)
544 ;; Np[i] = N[i]
545 (aset Np i (aref N i))
546 (setq i (1- i)))
547
548 (setq i 1)
549 (while (<= i nrules)
550 (if (not (wisent-BITISSET P i))
551 (when (wisent-useful-production i N)
552 (wisent-SETBIT Np (- (aref rlhs i) ntokens))
553 (wisent-SETBIT P i)))
554 (setq i (1+ i)))
555 (if (wisent-bits-equal N Np n)
556 (setq break t)
557 (setq Ns Np
558 Np N
559 N Ns)))
560 (setq N Np)))
561
562 (defun wisent-inaccessible-symbols ()
563 "Find out which productions are reachable and which symbols are used."
564 ;; Starting with an empty set of productions and a set of symbols
565 ;; which only has the start symbol in it, iterate over all
566 ;; productions until the set of productions remains unchanged for an
567 ;; iteration. For each production which has a LHS in the set of
568 ;; reachable symbols, add the production to the set of reachable
569 ;; productions, and add all of the nonterminals in the RHS of the
570 ;; production to the set of reachable symbols.
571
572 ;; Consider only the (partially) reduced grammar which has only
573 ;; nonterminals in N and productions in P.
574
575 ;; The result is the set P of productions in the reduced grammar,
576 ;; and the set V of symbols in the reduced grammar.
577
578 ;; Although this algorithm also computes the set of terminals which
579 ;; are reachable, no terminal will be deleted from the grammar. Some
580 ;; terminals might not be in the grammar but might be generated by
581 ;; semantic routines, and so the user might want them available with
582 ;; specified numbers. (Is this true?) However, the non reachable
583 ;; terminals are printed (if running in verbose mode) so that the
584 ;; user can know.
585 (let (Vp Vs Pp i tt r n m break)
586 (setq n (wisent-WORDSIZE nsyms)
587 m (wisent-WORDSIZE (1+ nrules))
588 Vp (make-vector n 0)
589 Pp (make-vector m 0))
590
591 ;; If the start symbol isn't useful, then nothing will be useful.
592 (when (wisent-BITISSET N (- start-symbol ntokens))
593 (wisent-SETBIT V start-symbol)
594 (while (not break)
595 (setq i (1- n))
596 (while (natnump i)
597 (aset Vp i (aref V i))
598 (setq i (1- i)))
599 (setq i 1)
600 (while (<= i nrules)
601 (when (and (not (wisent-BITISSET Pp i))
602 (wisent-BITISSET P i)
603 (wisent-BITISSET V (aref rlhs i)))
604 (setq r (aref rrhs i))
605 (while (natnump (setq tt (aref ritem r)))
606 (if (or (wisent-ISTOKEN tt)
607 (wisent-BITISSET N (- tt ntokens)))
608 (wisent-SETBIT Vp tt))
609 (setq r (1+ r)))
610 (wisent-SETBIT Pp i))
611 (setq i (1+ i)))
612 (if (wisent-bits-equal V Vp n)
613 (setq break t)
614 (setq Vs Vp
615 Vp V
616 V Vs))))
617 (setq V Vp)
618
619 ;; Tokens 0, 1 are internal to Wisent. Consider them useful.
620 (wisent-SETBIT V 0) ;; end-of-input token
621 (wisent-SETBIT V 1) ;; error token
622 (setq P Pp)
623
624 (setq nuseless-productions (- nrules (wisent-bits-size P m))
625 nuseless-nonterminals nvars
626 i ntokens)
627 (while (< i nsyms)
628 (if (wisent-BITISSET V i)
629 (setq nuseless-nonterminals (1- nuseless-nonterminals)))
630 (setq i (1+ i)))
631
632 ;; A token that was used in %prec should not be warned about.
633 (setq i 1)
634 (while (<= i nrules)
635 (if (aref rprec i)
636 (wisent-SETBIT V1 (aref rprec i)))
637 (setq i (1+ i)))
638 ))
639
640 (defun wisent-reduce-grammar-tables ()
641 "Disable useless productions."
642 (if (> nuseless-productions 0)
643 (let ((pn 1))
644 (while (<= pn nrules)
645 (aset ruseful pn (wisent-BITISSET P pn))
646 (setq pn (1+ pn))))))
647
648 (defun wisent-nonterminals-reduce ()
649 "Remove useless nonterminals."
650 (let (i n r item nontermmap tags-sorted)
651 ;; Map the nonterminals to their new index: useful first, useless
652 ;; afterwards. Kept for later report.
653 (setq nontermmap (make-vector nvars 0)
654 n ntokens
655 i ntokens)
656 (while (< i nsyms)
657 (when (wisent-BITISSET V i)
658 (aset nontermmap (- i ntokens) n)
659 (setq n (1+ n)))
660 (setq i (1+ i)))
661 (setq i ntokens)
662 (while (< i nsyms)
663 (unless (wisent-BITISSET V i)
664 (aset nontermmap (- i ntokens) n)
665 (setq n (1+ n)))
666 (setq i (1+ i)))
667 ;; Shuffle elements of tables indexed by symbol number
668 (setq tags-sorted (make-vector nvars nil)
669 i ntokens)
670 (while (< i nsyms)
671 (setq n (aref nontermmap (- i ntokens)))
672 (aset tags-sorted (- n ntokens) (aref tags i))
673 (setq i (1+ i)))
674 (setq i ntokens)
675 (while (< i nsyms)
676 (aset tags i (aref tags-sorted (- i ntokens)))
677 (setq i (1+ i)))
678 ;; Replace all symbol numbers in valid data structures.
679 (setq i 1)
680 (while (<= i nrules)
681 (aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens)))
682 (setq i (1+ i)))
683 (setq r 0)
684 (while (setq item (aref ritem r))
685 (if (wisent-ISVAR item)
686 (aset ritem r (aref nontermmap (- item ntokens))))
687 (setq r (1+ r)))
688 (setq start-symbol (aref nontermmap (- start-symbol ntokens))
689 nsyms (- nsyms nuseless-nonterminals)
690 nvars (- nvars nuseless-nonterminals))
691 ))
692
693 (defun wisent-total-useless ()
694 "Report number of useless nonterminals and productions."
695 (let* ((src (wisent-source))
696 (src (if src (concat " in " src) ""))
697 (msg (format "Grammar%s contains" src)))
698 (if (> nuseless-nonterminals 0)
699 (setq msg (format "%s %d useless nonterminal%s"
700 msg nuseless-nonterminals
701 (if (> nuseless-nonterminals 0) "s" ""))))
702 (if (and (> nuseless-nonterminals 0) (> nuseless-productions 0))
703 (setq msg (format "%s and" msg)))
704 (if (> nuseless-productions 0)
705 (setq msg (format "%s %d useless rule%s"
706 msg nuseless-productions
707 (if (> nuseless-productions 0) "s" ""))))
708 (message msg)))
709
710 (defun wisent-reduce-grammar ()
711 "Find unreachable terminals, nonterminals and productions."
712 ;; Allocate the global sets used to compute the reduced grammar
713 (setq N (make-vector (wisent-WORDSIZE nvars) 0)
714 P (make-vector (wisent-WORDSIZE (1+ nrules)) 0)
715 V (make-vector (wisent-WORDSIZE nsyms) 0)
716 V1 (make-vector (wisent-WORDSIZE nsyms) 0)
717 nuseless-nonterminals 0
718 nuseless-productions 0)
719
720 (wisent-useless-nonterminals)
721 (wisent-inaccessible-symbols)
722
723 (when (> (+ nuseless-nonterminals nuseless-productions) 0)
724 (wisent-total-useless)
725 (or (wisent-BITISSET N (- start-symbol ntokens))
726 (error "Start symbol `%s' does not derive any sentence"
727 (wisent-tag start-symbol)))
728 (wisent-reduce-grammar-tables)
729 (if (> nuseless-nonterminals 0)
730 (wisent-nonterminals-reduce))))
731
732 (defun wisent-print-useless ()
733 "Output the detailed results of the reductions."
734 (let (i b r)
735 (when (> nuseless-nonterminals 0)
736 ;; Useless nonterminals have been moved after useful ones.
737 (wisent-log "\n\nUseless nonterminals:\n\n")
738 (setq i 0)
739 (while (< i nuseless-nonterminals)
740 (wisent-log " %s\n" (wisent-tag (+ nsyms i)))
741 (setq i (1+ i))))
742 (setq b nil
743 i 0)
744 (while (< i ntokens)
745 (unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i))
746 (or b
747 (wisent-log "\n\nTerminals which are not used:\n\n"))
748 (setq b t)
749 (wisent-log " %s\n" (wisent-tag i)))
750 (setq i (1+ i)))
751 (when (> nuseless-productions 0)
752 (wisent-log "\n\nUseless rules:\n\n")
753 (setq i 1)
754 (while (<= i nrules)
755 (unless (aref ruseful i)
756 (wisent-log "#%s " (wisent-pad-string (format "%d" i) 4))
757 (wisent-log "%s:" (wisent-tag (aref rlhs i)))
758 (setq r (aref rrhs i))
759 (while (natnump (aref ritem r))
760 (wisent-log " %s" (wisent-tag (aref ritem r)))
761 (setq r (1+ r)))
762 (wisent-log ";\n"))
763 (setq i (1+ i))))
764 (if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0))
765 (wisent-log "\n\n"))
766 ))
767 \f
768 ;;;; -----------------------------
769 ;;;; Match rules with nonterminals
770 ;;;; -----------------------------
771
772 (defun wisent-set-derives ()
773 "Find, for each variable (nonterminal), which rules can derive it.
774 It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to
775 a list of rule numbers, terminated with -1."
776 (let (i lhs p q dset delts)
777 (setq dset (make-vector nvars nil)
778 delts (make-vector (1+ nrules) 0))
779 (setq p 0 ;; p = delts
780 i nrules)
781 (while (> i 0)
782 (when (aref ruseful i)
783 (setq lhs (aref rlhs i))
784 ;; p->next = dset[lhs];
785 ;; p->value = i;
786 (aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next)
787 (aset dset (- lhs ntokens) p) ;; dset[lhs] = p
788 (setq p (1+ p)) ;; p++
789 )
790 (setq i (1- i)))
791
792 (setq derives (make-vector nvars nil)
793 i ntokens)
794
795 (while (< i nsyms)
796 (setq q nil
797 p (aref dset (- i ntokens))) ;; p = dset[i]
798
799 (while p
800 (setq p (aref delts p)
801 q (cons (car p) q) ;;q++ = p->value
802 p (cdr p))) ;; p = p->next
803 (setq q (nreverse (cons -1 q))) ;; *q++ = -1
804 (aset derives (- i ntokens) q) ;; derives[i] = q
805 (setq i (1+ i)))
806 ))
807 \f
808 ;;;; --------------------------------------------------------
809 ;;;; Find which nonterminals can expand into the null string.
810 ;;;; --------------------------------------------------------
811
812 (defun wisent-print-nullable ()
813 "Print NULLABLE."
814 (let (i)
815 (wisent-log "NULLABLE\n")
816 (setq i ntokens)
817 (while (< i nsyms)
818 (wisent-log "\t%s: %s\n" (wisent-tag i)
819 (if (aref nullable (- i ntokens))
820 "yes" : "no"))
821 (setq i (1+ i)))
822 (wisent-log "\n\n")))
823
824 (defun wisent-set-nullable ()
825 "Set up NULLABLE.
826 A vector saying which nonterminals can expand into the null string.
827 NULLABLE[i - NTOKENS] is nil if symbol I can do so."
828 (let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens)
829 (setq squeue (make-vector nvars 0)
830 rcount (make-vector (1+ nrules) 0)
831 rsets (make-vector nvars nil) ;; - ntokens
832 relts (make-vector (+ nitems nvars 1) nil)
833 nullable (make-vector nvars nil)) ;; - ntokens
834 (setq s1 0 s2 0 ;; s1 = s2 = squeue
835 p 0 ;; p = relts
836 ruleno 1)
837 (while (<= ruleno nrules)
838 (when (aref ruseful ruleno)
839 (if (> (aref ritem (aref rrhs ruleno)) 0)
840 (progn
841 ;; This rule has a non empty RHS.
842 (setq any-tokens nil
843 r (aref rrhs ruleno))
844 (while (> (aref ritem r) 0)
845 (if (wisent-ISTOKEN (aref ritem r))
846 (setq any-tokens t))
847 (setq r (1+ r)))
848
849 ;; This rule has only nonterminals: schedule it for the
850 ;; second pass.
851 (unless any-tokens
852 (setq r (aref rrhs ruleno))
853 (while (> (setq item (aref ritem r)) 0)
854 (aset rcount ruleno (1+ (aref rcount ruleno)))
855 ;; p->next = rsets[item];
856 ;; p->value = ruleno;
857 (aset relts p (cons ruleno (aref rsets (- item ntokens))))
858 ;; rsets[item] = p;
859 (aset rsets (- item ntokens) p)
860 (setq p (1+ p)
861 r (1+ r)))))
862 ;; This rule has an empty RHS.
863 ;; assert (ritem[rrhs[ruleno]] == -ruleno)
864 (when (and (aref ruseful ruleno)
865 (setq item (aref rlhs ruleno))
866 (not (aref nullable (- item ntokens))))
867 (aset nullable (- item ntokens) t)
868 (aset squeue s2 item)
869 (setq s2 (1+ s2)))
870 )
871 )
872 (setq ruleno (1+ ruleno)))
873
874 (while (< s1 s2)
875 ;; p = rsets[*s1++]
876 (setq p (aref rsets (- (aref squeue s1) ntokens))
877 s1 (1+ s1))
878 (while p
879 (setq p (aref relts p)
880 ruleno (car p)
881 p (cdr p)) ;; p = p->next
882 ;; if (--rcount[ruleno] == 0)
883 (when (zerop (aset rcount ruleno (1- (aref rcount ruleno))))
884 (setq item (aref rlhs ruleno))
885 (aset nullable (- item ntokens) t)
886 (aset squeue s2 item)
887 (setq s2 (1+ s2)))))
888
889 (if wisent-debug-flag
890 (wisent-print-nullable))
891 ))
892 \f
893 ;;;; -----------
894 ;;;; Subroutines
895 ;;;; -----------
896
897 (defun wisent-print-fderives ()
898 "Print FDERIVES."
899 (let (i j rp)
900 (wisent-log "\n\n\nFDERIVES\n")
901 (setq i ntokens)
902 (while (< i nsyms)
903 (wisent-log "\n\n%s derives\n\n" (wisent-tag i))
904 (setq rp (aref fderives (- i ntokens))
905 j 0)
906 (while (<= j nrules)
907 (if (wisent-BITISSET rp j)
908 (wisent-log " %d\n" j))
909 (setq j (1+ j)))
910 (setq i (1+ i)))))
911
912 (defun wisent-set-fderives ()
913 "Set up FDERIVES.
914 An NVARS by NRULES matrix of bits indicating which rules can help
915 derive the beginning of the data for each nonterminal. For example,
916 if symbol 5 can be derived as the sequence of symbols 8 3 20, and one
917 of the rules for deriving symbol 8 is rule 4, then the
918 [5 - NTOKENS, 4] bit in FDERIVES is set."
919 (let (i j k)
920 (setq fderives (make-vector nvars nil))
921 (setq i 0)
922 (while (< i nvars)
923 (aset fderives i (make-vector rulesetsize 0))
924 (setq i (1+ i)))
925
926 (wisent-set-firsts)
927
928 (setq i ntokens)
929 (while (< i nsyms)
930 (setq j ntokens)
931 (while (< j nsyms)
932 ;; if (BITISSET (FIRSTS (i), j - ntokens))
933 (when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens))
934 (setq k (aref derives (- j ntokens)))
935 (while (> (car k) 0) ;; derives[j][k] > 0
936 ;; SETBIT (FDERIVES (i), derives[j][k]);
937 (wisent-SETBIT (aref fderives (- i ntokens)) (car k))
938 (setq k (cdr k))))
939 (setq j (1+ j)))
940 (setq i (1+ i)))
941
942 (if wisent-debug-flag
943 (wisent-print-fderives))
944 ))
945
946 (defun wisent-print-firsts ()
947 "Print FIRSTS."
948 (let (i j v)
949 (wisent-log "\n\n\nFIRSTS\n\n")
950 (setq i ntokens)
951 (while (< i nsyms)
952 (wisent-log "\n\n%s firsts\n\n" (wisent-tag i))
953 (setq v (aref firsts (- i ntokens))
954 j 0)
955 (while (< j nvars)
956 (if (wisent-BITISSET v j)
957 (wisent-log "\t\t%d (%s)\n"
958 (+ j ntokens) (wisent-tag (+ j ntokens))))
959 (setq j (1+ j)))
960 (setq i (1+ i)))))
961
962 (defun wisent-TC (R n)
963 "Transitive closure.
964 Given R an N by N matrix of bits, modify its contents to be the
965 transitive closure of what was given."
966 (let (i j k)
967 ;; R (J, I) && R (I, K) => R (J, K).
968 ;; I *must* be the outer loop.
969 (setq i 0)
970 (while (< i n)
971 (setq j 0)
972 (while (< j n)
973 (when (wisent-BITISSET (aref R j) i)
974 (setq k 0)
975 (while (< k n)
976 (if (wisent-BITISSET (aref R i) k)
977 (wisent-SETBIT (aref R j) k))
978 (setq k (1+ k))))
979 (setq j (1+ j)))
980 (setq i (1+ i)))))
981
982 (defun wisent-RTC (R n)
983 "Reflexive Transitive Closure.
984 Same as `wisent-TC' and then set all the bits on the diagonal of R, an
985 N by N matrix of bits."
986 (let (i)
987 (wisent-TC R n)
988 (setq i 0)
989 (while (< i n)
990 (wisent-SETBIT (aref R i) i)
991 (setq i (1+ i)))))
992
993 (defun wisent-set-firsts ()
994 "Set up FIRSTS.
995 An NVARS by NVARS bit matrix indicating which items can represent the
996 beginning of the input corresponding to which other items. For
997 example, if some rule expands symbol 5 into the sequence of symbols 8
998 3 20, the symbol 8 can be the beginning of the data for symbol 5, so
999 the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set."
1000 (let (row symbol sp rowsize i)
1001 (setq rowsize (wisent-WORDSIZE nvars)
1002 varsetsize rowsize
1003 firsts (make-vector nvars nil)
1004 i 0)
1005 (while (< i nvars)
1006 (aset firsts i (make-vector rowsize 0))
1007 (setq i (1+ i)))
1008
1009 (setq row 0 ;; row = firsts
1010 i ntokens)
1011 (while (< i nsyms)
1012 (setq sp (aref derives (- i ntokens)))
1013 (while (>= (car sp) 0)
1014 (setq symbol (aref ritem (aref rrhs (car sp)))
1015 sp (cdr sp))
1016 (when (wisent-ISVAR symbol)
1017 (setq symbol (- symbol ntokens))
1018 (wisent-SETBIT (aref firsts row) symbol)
1019 ))
1020 (setq row (1+ row)
1021 i (1+ i)))
1022
1023 (wisent-RTC firsts nvars)
1024
1025 (if wisent-debug-flag
1026 (wisent-print-firsts))
1027 ))
1028
1029 (defun wisent-initialize-closure (n)
1030 "Allocate the ITEMSET and RULESET vectors.
1031 And precompute useful data so that `wisent-closure' can be called.
1032 N is the number of elements to allocate for ITEMSET."
1033 (setq itemset (make-vector n 0)
1034 rulesetsize (wisent-WORDSIZE (1+ nrules))
1035 ruleset (make-vector rulesetsize 0))
1036
1037 (wisent-set-fderives))
1038
1039 (defun wisent-print-closure ()
1040 "Print ITEMSET."
1041 (let (i)
1042 (wisent-log "\n\nclosure n = %d\n\n" nitemset)
1043 (setq i 0) ;; isp = itemset
1044 (while (< i nitemset)
1045 (wisent-log " %d\n" (aref itemset i))
1046 (setq i (1+ i)))))
1047
1048 (defun wisent-closure (core n)
1049 "Set up RULESET and ITEMSET for the transitions out of CORE state.
1050 Given a vector of item numbers items, of length N, set up RULESET and
1051 ITEMSET to indicate what rules could be run and which items could be
1052 accepted when those items are the active ones.
1053
1054 RULESET contains a bit for each rule. `wisent-closure' sets the bits
1055 for all rules which could potentially describe the next input to be
1056 read.
1057
1058 ITEMSET is a vector of item numbers; NITEMSET is the number of items
1059 in ITEMSET. `wisent-closure' places there the indices of all items
1060 which represent units of input that could arrive next."
1061 (let (c r v symbol ruleno itemno)
1062 (if (zerop n)
1063 (progn
1064 (setq r 0
1065 v (aref fderives (- start-symbol ntokens)))
1066 (while (< r rulesetsize)
1067 ;; ruleset[r] = FDERIVES (start-symbol)[r];
1068 (aset ruleset r (aref v r))
1069 (setq r (1+ r)))
1070 )
1071 (fillarray ruleset 0)
1072 (setq c 0)
1073 (while (< c n)
1074 (setq symbol (aref ritem (aref core c)))
1075 (when (wisent-ISVAR symbol)
1076 (setq r 0
1077 v (aref fderives (- symbol ntokens)))
1078 (while (< r rulesetsize)
1079 ;; ruleset[r] |= FDERIVES (ritem[core[c]])[r];
1080 (aset ruleset r (logior (aref ruleset r) (aref v r)))
1081 (setq r (1+ r))))
1082 (setq c (1+ c)))
1083 )
1084 (setq nitemset 0
1085 c 0
1086 ruleno 0
1087 r (* rulesetsize wisent-BITS-PER-WORD))
1088 (while (< ruleno r)
1089 (when (wisent-BITISSET ruleset ruleno)
1090 (setq itemno (aref rrhs ruleno))
1091 (while (and (< c n) (< (aref core c) itemno))
1092 (aset itemset nitemset (aref core c))
1093 (setq nitemset (1+ nitemset)
1094 c (1+ c)))
1095 (aset itemset nitemset itemno)
1096 (setq nitemset (1+ nitemset)))
1097 (setq ruleno (1+ ruleno)))
1098
1099 (while (< c n)
1100 (aset itemset nitemset (aref core c))
1101 (setq nitemset (1+ nitemset)
1102 c (1+ c)))
1103
1104 (if wisent-debug-flag
1105 (wisent-print-closure))
1106 ))
1107 \f
1108 ;;;; --------------------------------------------------
1109 ;;;; Generate the nondeterministic finite state machine
1110 ;;;; --------------------------------------------------
1111
1112 (defun wisent-allocate-itemsets ()
1113 "Allocate storage for itemsets."
1114 (let (symbol i count symbol-count)
1115 ;; Count the number of occurrences of all the symbols in RITEMS.
1116 ;; Note that useless productions (hence useless nonterminals) are
1117 ;; browsed too, hence we need to allocate room for _all_ the
1118 ;; symbols.
1119 (setq count 0
1120 symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0)
1121 i 0)
1122 (while (setq symbol (aref ritem i))
1123 (when (> symbol 0)
1124 (setq count (1+ count))
1125 (aset symbol-count symbol (1+ (aref symbol-count symbol))))
1126 (setq i (1+ i)))
1127 ;; See comments before `wisent-new-itemsets'. All the vectors of
1128 ;; items live inside kernel-items. The number of active items
1129 ;; after some symbol cannot be more than the number of times that
1130 ;; symbol appears as an item, which is symbol-count[symbol]. We
1131 ;; allocate that much space for each symbol.
1132 (setq kernel-base (make-vector nsyms nil)
1133 kernel-items (make-vector count 0)
1134 count 0
1135 i 0)
1136 (while (< i nsyms)
1137 (aset kernel-base i count)
1138 (setq count (+ count (aref symbol-count i))
1139 i (1+ i)))
1140 (setq shift-symbol symbol-count
1141 kernel-end (make-vector nsyms nil))
1142 ))
1143
1144 (defun wisent-allocate-storage ()
1145 "Allocate storage for the state machine."
1146 (wisent-allocate-itemsets)
1147 (setq shiftset (make-vector nsyms 0)
1148 redset (make-vector (1+ nrules) 0)
1149 state-table (make-vector wisent-state-table-size nil)))
1150
1151 (defun wisent-new-itemsets ()
1152 "Find which symbols can be shifted in the current state.
1153 And for each one record which items would be active after that shift.
1154 Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the
1155 symbols that can be shifted. For each symbol in the grammar,
1156 KERNEL-BASE[symbol] points to a vector of item numbers activated if
1157 that symbol is shifted, and KERNEL-END[symbol] points after the end of
1158 that vector."
1159 (let (i shiftcount isp ksp symbol)
1160 (fillarray kernel-end nil)
1161 (setq shiftcount 0
1162 isp 0)
1163 (while (< isp nitemset)
1164 (setq i (aref itemset isp)
1165 isp (1+ isp)
1166 symbol (aref ritem i))
1167 (when (> symbol 0)
1168 (setq ksp (aref kernel-end symbol))
1169 (when (not ksp)
1170 ;; shift-symbol[shiftcount++] = symbol;
1171 (aset shift-symbol shiftcount symbol)
1172 (setq shiftcount (1+ shiftcount)
1173 ksp (aref kernel-base symbol)))
1174 ;; *ksp++ = i + 1;
1175 (aset kernel-items ksp (1+ i))
1176 (setq ksp (1+ ksp))
1177 (aset kernel-end symbol ksp)))
1178 (setq nshifts shiftcount)))
1179
1180 (defun wisent-new-state (symbol)
1181 "Create a new state for those items, if necessary.
1182 SYMBOL is the core accessing-symbol.
1183 Subroutine of `wisent-get-state'."
1184 (let (n p isp1 isp2 iend items)
1185 (setq isp1 (aref kernel-base symbol)
1186 iend (aref kernel-end symbol)
1187 n (- iend isp1)
1188 p (make-core)
1189 items (make-vector n 0))
1190 (set-core-accessing-symbol p symbol)
1191 (set-core-number p nstates)
1192 (set-core-nitems p n)
1193 (set-core-items p items)
1194 (setq isp2 0) ;; isp2 = p->items
1195 (while (< isp1 iend)
1196 ;; *isp2++ = *isp1++;
1197 (aset items isp2 (aref kernel-items isp1))
1198 (setq isp1 (1+ isp1)
1199 isp2 (1+ isp2)))
1200 (set-core-next last-state p)
1201 (setq last-state p
1202 nstates (1+ nstates))
1203 p))
1204
1205 (defun wisent-get-state (symbol)
1206 "Find the state we would get to by shifting SYMBOL.
1207 Return the state number for the state we would get to (from the
1208 current state) by shifting SYMBOL. Create a new state if no
1209 equivalent one exists already. Used by `wisent-append-states'."
1210 (let (key isp1 isp2 iend sp sp2 found n)
1211 (setq isp1 (aref kernel-base symbol)
1212 iend (aref kernel-end symbol)
1213 n (- iend isp1)
1214 key 0)
1215 ;; Add up the target state's active item numbers to get a hash key
1216 (while (< isp1 iend)
1217 (setq key (+ key (aref kernel-items isp1))
1218 isp1 (1+ isp1)))
1219 (setq key (% key wisent-state-table-size)
1220 sp (aref state-table key))
1221 (if sp
1222 (progn
1223 (setq found nil)
1224 (while (not found)
1225 (when (= (core-nitems sp) n)
1226 (setq found t
1227 isp1 (aref kernel-base symbol)
1228 ;; isp2 = sp->items;
1229 sp2 (core-items sp)
1230 isp2 0)
1231
1232 (while (and found (< isp1 iend))
1233 ;; if (*isp1++ != *isp2++)
1234 (if (not (= (aref kernel-items isp1)
1235 (aref sp2 isp2)))
1236 (setq found nil))
1237 (setq isp1 (1+ isp1)
1238 isp2 (1+ isp2))))
1239 (if (not found)
1240 (if (core-link sp)
1241 (setq sp (core-link sp))
1242 ;; sp = sp->link = new-state(symbol)
1243 (setq sp (set-core-link sp (wisent-new-state symbol))
1244 found t)))))
1245 ;; bucket is empty
1246 ;; state-table[key] = sp = new-state(symbol)
1247 (setq sp (wisent-new-state symbol))
1248 (aset state-table key sp))
1249 ;; return (sp->number);
1250 (core-number sp)))
1251
1252 (defun wisent-append-states ()
1253 "Find or create the core structures for states.
1254 Use the information computed by `wisent-new-itemsets' to find the
1255 state numbers reached by each shift transition from the current state.
1256 SHIFTSET is set up as a vector of state numbers of those states."
1257 (let (i j symbol)
1258 ;; First sort shift-symbol into increasing order
1259 (setq i 1)
1260 (while (< i nshifts)
1261 (setq symbol (aref shift-symbol i)
1262 j i)
1263 (while (and (> j 0) (> (aref shift-symbol (1- j)) symbol))
1264 (aset shift-symbol j (aref shift-symbol (1- j)))
1265 (setq j (1- j)))
1266 (aset shift-symbol j symbol)
1267 (setq i (1+ i)))
1268 (setq i 0)
1269 (while (< i nshifts)
1270 (setq symbol (aref shift-symbol i))
1271 (aset shiftset i (wisent-get-state symbol))
1272 (setq i (1+ i)))
1273 ))
1274
1275 (defun wisent-initialize-states ()
1276 "Initialize states."
1277 (let ((p (make-core)))
1278 (setq first-state p
1279 last-state p
1280 this-state p
1281 nstates 1)))
1282
1283 (defun wisent-save-shifts ()
1284 "Save the NSHIFTS of SHIFTSET into the current linked list."
1285 (let (p i shifts)
1286 (setq p (make-shifts)
1287 shifts (make-vector nshifts 0)
1288 i 0)
1289 (set-shifts-number p (core-number this-state))
1290 (set-shifts-nshifts p nshifts)
1291 (set-shifts-shifts p shifts)
1292 (while (< i nshifts)
1293 ;; (p->shifts)[i] = shiftset[i];
1294 (aset shifts i (aref shiftset i))
1295 (setq i (1+ i)))
1296
1297 (if last-shift
1298 (set-shifts-next last-shift p)
1299 (setq first-shift p))
1300 (setq last-shift p)))
1301
1302 (defun wisent-insert-start-shift ()
1303 "Create the next-to-final state.
1304 That is the state to which a shift has already been made in the
1305 initial state. Subroutine of `wisent-augment-automaton'."
1306 (let (statep sp)
1307 (setq statep (make-core))
1308 (set-core-number statep nstates)
1309 (set-core-accessing-symbol statep start-symbol)
1310 (set-core-next last-state statep)
1311 (setq last-state statep)
1312 ;; Make a shift from this state to (what will be) the final state.
1313 (setq sp (make-shifts))
1314 (set-shifts-number sp nstates)
1315 (setq nstates (1+ nstates))
1316 (set-shifts-nshifts sp 1)
1317 (set-shifts-shifts sp (vector nstates))
1318 (set-shifts-next last-shift sp)
1319 (setq last-shift sp)))
1320
1321 (defun wisent-augment-automaton ()
1322 "Set up initial and final states as parser wants them.
1323 Make sure that the initial state has a shift that accepts the
1324 grammar's start symbol and goes to the next-to-final state, which has
1325 a shift going to the final state, which has a shift to the termination
1326 state. Create such states and shifts if they don't happen to exist
1327 already."
1328 (let (i k statep sp sp2 sp1 shifts)
1329 (setq sp first-shift)
1330 (if sp
1331 (progn
1332 (if (zerop (shifts-number sp))
1333 (progn
1334 (setq k (shifts-nshifts sp)
1335 statep (core-next first-state))
1336 ;; The states reached by shifts from first-state are
1337 ;; numbered 1...K. Look for one reached by
1338 ;; START-SYMBOL.
1339 (while (and (< (core-accessing-symbol statep) start-symbol)
1340 (< (core-number statep) k))
1341 (setq statep (core-next statep)))
1342 (if (= (core-accessing-symbol statep) start-symbol)
1343 (progn
1344 ;; We already have a next-to-final state. Make
1345 ;; sure it has a shift to what will be the final
1346 ;; state.
1347 (setq k (core-number statep))
1348 (while (and sp (< (shifts-number sp) k))
1349 (setq sp1 sp
1350 sp (shifts-next sp)))
1351 (if (and sp (= (shifts-number sp) k))
1352 (progn
1353 (setq i (shifts-nshifts sp)
1354 sp2 (make-shifts)
1355 shifts (make-vector (1+ i) 0))
1356 (set-shifts-number sp2 k)
1357 (set-shifts-nshifts sp2 (1+ i))
1358 (set-shifts-shifts sp2 shifts)
1359 (aset shifts 0 nstates)
1360 (while (> i 0)
1361 ;; sp2->shifts[i] = sp->shifts[i - 1];
1362 (aset shifts i (aref (shifts-shifts sp) (1- i)))
1363 (setq i (1- i)))
1364 ;; Patch sp2 into the chain of shifts in
1365 ;; place of sp, following sp1.
1366 (set-shifts-next sp2 (shifts-next sp))
1367 (set-shifts-next sp1 sp2)
1368 (if (eq sp last-shift)
1369 (setq last-shift sp2))
1370 )
1371 (setq sp2 (make-shifts))
1372 (set-shifts-number sp2 k)
1373 (set-shifts-nshifts sp2 1)
1374 (set-shifts-shifts sp2 (vector nstates))
1375 ;; Patch sp2 into the chain of shifts between
1376 ;; sp1 and sp.
1377 (set-shifts-next sp2 sp)
1378 (set-shifts-next sp1 sp2)
1379 (if (not sp)
1380 (setq last-shift sp2))
1381 )
1382 )
1383 ;; There is no next-to-final state as yet.
1384 ;; Add one more shift in FIRST-SHIFT, going to the
1385 ;; next-to-final state (yet to be made).
1386 (setq sp first-shift
1387 sp2 (make-shifts)
1388 i (shifts-nshifts sp)
1389 shifts (make-vector (1+ i) 0))
1390 (set-shifts-nshifts sp2 (1+ i))
1391 (set-shifts-shifts sp2 shifts)
1392 ;; Stick this shift into the vector at the proper place.
1393 (setq statep (core-next first-state)
1394 k 0
1395 i 0)
1396 (while (< i (shifts-nshifts sp))
1397 (when (and (> (core-accessing-symbol statep) start-symbol)
1398 (= i k))
1399 (aset shifts k nstates)
1400 (setq k (1+ k)))
1401 (aset shifts k (aref (shifts-shifts sp) i))
1402 (setq statep (core-next statep))
1403 (setq i (1+ i)
1404 k (1+ k)))
1405 (when (= i k)
1406 (aset shifts k nstates)
1407 (setq k (1+ k)))
1408 ;; Patch sp2 into the chain of shifts in place of
1409 ;; sp, at the beginning.
1410 (set-shifts-next sp2 (shifts-next sp))
1411 (setq first-shift sp2)
1412 (if (eq last-shift sp)
1413 (setq last-shift sp2))
1414 ;; Create the next-to-final state, with shift to
1415 ;; what will be the final state.
1416 (wisent-insert-start-shift)))
1417 ;; The initial state didn't even have any shifts. Give it
1418 ;; one shift, to the next-to-final state.
1419 (setq sp (make-shifts))
1420 (set-shifts-nshifts sp 1)
1421 (set-shifts-shifts sp (vector nstates))
1422 ;; Patch sp into the chain of shifts at the beginning.
1423 (set-shifts-next sp first-shift)
1424 (setq first-shift sp)
1425 ;; Create the next-to-final state, with shift to what will
1426 ;; be the final state.
1427 (wisent-insert-start-shift)))
1428 ;; There are no shifts for any state. Make one shift, from the
1429 ;; initial state to the next-to-final state.
1430 (setq sp (make-shifts))
1431 (set-shifts-nshifts sp 1)
1432 (set-shifts-shifts sp (vector nstates))
1433 ;; Initialize the chain of shifts with sp.
1434 (setq first-shift sp
1435 last-shift sp)
1436 ;; Create the next-to-final state, with shift to what will be
1437 ;; the final state.
1438 (wisent-insert-start-shift))
1439 ;; Make the final state--the one that follows a shift from the
1440 ;; next-to-final state. The symbol for that shift is 0
1441 ;; (end-of-file).
1442 (setq statep (make-core))
1443 (set-core-number statep nstates)
1444 (set-core-next last-state statep)
1445 (setq last-state statep)
1446 ;; Make the shift from the final state to the termination state.
1447 (setq sp (make-shifts))
1448 (set-shifts-number sp nstates)
1449 (setq nstates (1+ nstates))
1450 (set-shifts-nshifts sp 1)
1451 (set-shifts-shifts sp (vector nstates))
1452 (set-shifts-next last-shift sp)
1453 (setq last-shift sp)
1454 ;; Note that the variable FINAL-STATE refers to what we sometimes
1455 ;; call the termination state.
1456 (setq final-state nstates)
1457 ;; Make the termination state.
1458 (setq statep (make-core))
1459 (set-core-number statep nstates)
1460 (setq nstates (1+ nstates))
1461 (set-core-next last-state statep)
1462 (setq last-state statep)))
1463
1464 (defun wisent-save-reductions ()
1465 "Make a reductions structure.
1466 Find which rules can be used for reduction transitions from the
1467 current state and make a reductions structure for the state to record
1468 their rule numbers."
1469 (let (i item count p rules)
1470 ;; Find and count the active items that represent ends of rules.
1471 (setq count 0
1472 i 0)
1473 (while (< i nitemset)
1474 (setq item (aref ritem (aref itemset i)))
1475 (when (< item 0)
1476 (aset redset count (- item))
1477 (setq count (1+ count)))
1478 (setq i (1+ i)))
1479 ;; Make a reductions structure and copy the data into it.
1480 (when (> count 0)
1481 (setq p (make-reductions)
1482 rules (make-vector count 0))
1483 (set-reductions-number p (core-number this-state))
1484 (set-reductions-nreds p count)
1485 (set-reductions-rules p rules)
1486 (setq i 0)
1487 (while (< i count)
1488 ;; (p->rules)[i] = redset[i]
1489 (aset rules i (aref redset i))
1490 (setq i (1+ i)))
1491 (if last-reduction
1492 (set-reductions-next last-reduction p)
1493 (setq first-reduction p))
1494 (setq last-reduction p))))
1495
1496 (defun wisent-generate-states ()
1497 "Compute the nondeterministic finite state machine from the grammar."
1498 (wisent-allocate-storage)
1499 (wisent-initialize-closure nitems)
1500 (wisent-initialize-states)
1501 (while this-state
1502 ;; Set up RULESET and ITEMSET for the transitions out of this
1503 ;; state. RULESET gets a 1 bit for each rule that could reduce
1504 ;; now. ITEMSET gets a vector of all the items that could be
1505 ;; accepted next.
1506 (wisent-closure (core-items this-state) (core-nitems this-state))
1507 ;; Record the reductions allowed out of this state.
1508 (wisent-save-reductions)
1509 ;; Find the itemsets of the states that shifts can reach.
1510 (wisent-new-itemsets)
1511 ;; Find or create the core structures for those states.
1512 (wisent-append-states)
1513 ;; Create the shifts structures for the shifts to those states,
1514 ;; now that the state numbers transitioning to are known.
1515 (if (> nshifts 0)
1516 (wisent-save-shifts))
1517 ;; States are queued when they are created; process them all.
1518 (setq this-state (core-next this-state)))
1519 ;; Set up initial and final states as parser wants them.
1520 (wisent-augment-automaton))
1521 \f
1522 ;;;; ---------------------------
1523 ;;;; Compute look-ahead criteria
1524 ;;;; ---------------------------
1525
1526 ;; Compute how to make the finite state machine deterministic; find
1527 ;; which rules need lookahead in each state, and which lookahead
1528 ;; tokens they accept.
1529
1530 ;; `wisent-lalr', the entry point, builds these data structures:
1531
1532 ;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition
1533 ;; which accepts a variable (a nonterminal). NGOTOS is the number of
1534 ;; such transitions.
1535 ;; FROM-STATE[t] is the state number which a transition leads from and
1536 ;; TO-STATE[t] is the state number it leads to.
1537 ;; All the transitions that accept a particular variable are grouped
1538 ;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and
1539 ;; TO-STATE of the first of them.
1540
1541 ;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what
1542 ;; to do in state s.
1543
1544 ;; LARULENO is a vector which records the rules that need lookahead in
1545 ;; various states. The elements of LARULENO that apply to state s are
1546 ;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element
1547 ;; of LARULENO is a rule number.
1548
1549 ;; If LR is the length of LARULENO, then a number from 0 to LR-1 can
1550 ;; specify both a rule and a state where the rule might be applied.
1551 ;; LA is a LR by NTOKENS matrix of bits.
1552 ;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the
1553 ;; appropriate state when the next token is symbol i.
1554 ;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
1555
1556 (wisent-defcontext digraph
1557 INDEX R VERTICES
1558 infinity top)
1559
1560 (defun wisent-traverse (i)
1561 "Traverse I."
1562 (let (j k height Ri Fi break)
1563 (setq top (1+ top)
1564 height top)
1565 (aset VERTICES top i) ;; VERTICES[++top] = i
1566 (aset INDEX i top) ;; INDEX[i] = height = top
1567
1568 (setq Ri (aref R i))
1569 (when Ri
1570 (setq j 0)
1571 (while (>= (aref Ri j) 0)
1572 (if (zerop (aref INDEX (aref Ri j)))
1573 (wisent-traverse (aref Ri j)))
1574 ;; if (INDEX[i] > INDEX[R[i][j]])
1575 (if (> (aref INDEX i) (aref INDEX (aref Ri j)))
1576 ;; INDEX[i] = INDEX[R[i][j]];
1577 (aset INDEX i (aref INDEX (aref Ri j))))
1578 (setq Fi (aref F i)
1579 k 0)
1580 (while (< k tokensetsize)
1581 ;; F (i)[k] |= F (R[i][j])[k];
1582 (aset Fi k (logior (aref Fi k)
1583 (aref (aref F (aref Ri j)) k)))
1584 (setq k (1+ k)))
1585 (setq j (1+ j))))
1586
1587 (when (= (aref INDEX i) height)
1588 (setq break nil)
1589 (while (not break)
1590 (setq j (aref VERTICES top) ;; j = VERTICES[top--]
1591 top (1- top))
1592 (aset INDEX j infinity)
1593 (if (= i j)
1594 (setq break t)
1595 (setq k 0)
1596 (while (< k tokensetsize)
1597 ;; F (j)[k] = F (i)[k];
1598 (aset (aref F j) k (aref (aref F i) k))
1599 (setq k (1+ k))))))
1600 ))
1601
1602 (defun wisent-digraph (relation)
1603 "Digraph RELATION."
1604 (wisent-with-context digraph
1605 (setq infinity (+ ngotos 2)
1606 INDEX (make-vector (1+ ngotos) 0)
1607 VERTICES (make-vector (1+ ngotos) 0)
1608 top 0
1609 R relation)
1610 (let ((i 0))
1611 (while (< i ngotos)
1612 (if (and (= (aref INDEX i) 0) (aref R i))
1613 (wisent-traverse i))
1614 (setq i (1+ i))))))
1615
1616 (defun wisent-set-state-table ()
1617 "Build state table."
1618 (let (sp)
1619 (setq state-table (make-vector nstates nil)
1620 sp first-state)
1621 (while sp
1622 (aset state-table (core-number sp) sp)
1623 (setq sp (core-next sp)))))
1624
1625 (defun wisent-set-accessing-symbol ()
1626 "Build accessing symbol table."
1627 (let (sp)
1628 (setq accessing-symbol (make-vector nstates 0)
1629 sp first-state)
1630 (while sp
1631 (aset accessing-symbol (core-number sp) (core-accessing-symbol sp))
1632 (setq sp (core-next sp)))))
1633
1634 (defun wisent-set-shift-table ()
1635 "Build shift table."
1636 (let (sp)
1637 (setq shift-table (make-vector nstates nil)
1638 sp first-shift)
1639 (while sp
1640 (aset shift-table (shifts-number sp) sp)
1641 (setq sp (shifts-next sp)))))
1642
1643 (defun wisent-set-reduction-table ()
1644 "Build reduction table."
1645 (let (rp)
1646 (setq reduction-table (make-vector nstates nil)
1647 rp first-reduction)
1648 (while rp
1649 (aset reduction-table (reductions-number rp) rp)
1650 (setq rp (reductions-next rp)))))
1651
1652 (defun wisent-set-maxrhs ()
1653 "Setup MAXRHS length."
1654 (let (i len max)
1655 (setq len 0
1656 max 0
1657 i 0)
1658 (while (aref ritem i)
1659 (if (> (aref ritem i) 0)
1660 (setq len (1+ len))
1661 (if (> len max)
1662 (setq max len))
1663 (setq len 0))
1664 (setq i (1+ i)))
1665 (setq maxrhs max)))
1666
1667 (defun wisent-initialize-LA ()
1668 "Set up LA."
1669 (let (i j k count rp sp np v)
1670 (setq consistent (make-vector nstates nil)
1671 lookaheads (make-vector (1+ nstates) 0)
1672 count 0
1673 i 0)
1674 (while (< i nstates)
1675 (aset lookaheads i count)
1676 (setq rp (aref reduction-table i)
1677 sp (aref shift-table i))
1678 ;; if (rp &&
1679 ;; (rp->nreds > 1
1680 ;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]]))))
1681 (if (and rp
1682 (or (> (reductions-nreds rp) 1)
1683 (and sp
1684 (not (wisent-ISVAR
1685 (aref accessing-symbol
1686 (aref (shifts-shifts sp) 0)))))))
1687 (setq count (+ count (reductions-nreds rp)))
1688 (aset consistent i t))
1689
1690 (when sp
1691 (setq k 0
1692 j (shifts-nshifts sp)
1693 v (shifts-shifts sp))
1694 (while (< k j)
1695 (when (= (aref accessing-symbol (aref v k))
1696 error-token-number)
1697 (aset consistent i nil)
1698 (setq k j)) ;; break
1699 (setq k (1+ k))))
1700 (setq i (1+ i)))
1701
1702 (aset lookaheads nstates count)
1703
1704 (if (zerop count)
1705 (progn
1706 (setq LA (make-vector 1 nil)
1707 LAruleno (make-vector 1 0)
1708 lookback (make-vector 1 nil)))
1709 (setq LA (make-vector count nil)
1710 LAruleno (make-vector count 0)
1711 lookback (make-vector count nil)))
1712 (setq i 0 j (length LA))
1713 (while (< i j)
1714 (aset LA i (make-vector tokensetsize 0))
1715 (setq i (1+ i)))
1716
1717 (setq np 0
1718 i 0)
1719 (while (< i nstates)
1720 (when (not (aref consistent i))
1721 (setq rp (aref reduction-table i))
1722 (when rp
1723 (setq j 0
1724 k (reductions-nreds rp)
1725 v (reductions-rules rp))
1726 (while (< j k)
1727 (aset LAruleno np (aref v j))
1728 (setq np (1+ np)
1729 j (1+ j)))))
1730 (setq i (1+ i)))))
1731
1732 (defun wisent-set-goto-map ()
1733 "Set up GOTO-MAP."
1734 (let (sp i j symbol k temp-map state1 state2 v)
1735 (setq goto-map (make-vector (1+ nvars) 0)
1736 temp-map (make-vector (1+ nvars) 0))
1737
1738 (setq ngotos 0
1739 sp first-shift)
1740 (while sp
1741 (setq i (1- (shifts-nshifts sp))
1742 v (shifts-shifts sp))
1743 (while (>= i 0)
1744 (setq symbol (aref accessing-symbol (aref v i)))
1745 (if (wisent-ISTOKEN symbol)
1746 (setq i 0) ;; break
1747 (setq ngotos (1+ ngotos))
1748 ;; goto-map[symbol]++;
1749 (aset goto-map (- symbol ntokens)
1750 (1+ (aref goto-map (- symbol ntokens)))))
1751 (setq i (1- i)))
1752 (setq sp (shifts-next sp)))
1753
1754 (setq k 0
1755 i ntokens
1756 j 0)
1757 (while (< i nsyms)
1758 (aset temp-map j k)
1759 (setq k (+ k (aref goto-map j))
1760 i (1+ i)
1761 j (1+ j)))
1762 (setq i ntokens
1763 j 0)
1764 (while (< i nsyms)
1765 (aset goto-map j (aref temp-map j))
1766 (setq i (1+ i)
1767 j (1+ j)))
1768 ;; goto-map[nsyms] = ngotos;
1769 ;; temp-map[nsyms] = ngotos;
1770 (aset goto-map j ngotos)
1771 (aset temp-map j ngotos)
1772
1773 (setq from-state (make-vector ngotos 0)
1774 to-state (make-vector ngotos 0)
1775 sp first-shift)
1776 (while sp
1777 (setq state1 (shifts-number sp)
1778 v (shifts-shifts sp)
1779 i (1- (shifts-nshifts sp)))
1780 (while (>= i 0)
1781 (setq state2 (aref v i)
1782 symbol (aref accessing-symbol state2))
1783 (if (wisent-ISTOKEN symbol)
1784 (setq i 0) ;; break
1785 ;; k = temp-map[symbol]++;
1786 (setq k (aref temp-map (- symbol ntokens)))
1787 (aset temp-map (- symbol ntokens) (1+ k))
1788 (aset from-state k state1)
1789 (aset to-state k state2))
1790 (setq i (1- i)))
1791 (setq sp (shifts-next sp)))
1792 ))
1793
1794 (defun wisent-map-goto (state symbol)
1795 "Map a STATE/SYMBOL pair into its numeric representation."
1796 (let (high low middle s result)
1797 ;; low = goto-map[symbol];
1798 ;; high = goto-map[symbol + 1] - 1;
1799 (setq low (aref goto-map (- symbol ntokens))
1800 high (1- (aref goto-map (- (1+ symbol) ntokens))))
1801 (while (and (not result) (<= low high))
1802 (setq middle (/ (+ low high) 2)
1803 s (aref from-state middle))
1804 (cond
1805 ((= s state)
1806 (setq result middle))
1807 ((< s state)
1808 (setq low (1+ middle)))
1809 (t
1810 (setq high (1- middle)))))
1811 (or result
1812 (error "Internal error in `wisent-map-goto'"))
1813 ))
1814
1815 (defun wisent-initialize-F ()
1816 "Set up F."
1817 (let (i j k sp edge rowp rp reads nedges stateno symbol v break)
1818 (setq F (make-vector ngotos nil)
1819 i 0)
1820 (while (< i ngotos)
1821 (aset F i (make-vector tokensetsize 0))
1822 (setq i (1+ i)))
1823
1824 (setq reads (make-vector ngotos nil)
1825 edge (make-vector (1+ ngotos) 0)
1826 nedges 0
1827 rowp 0 ;; rowp = F
1828 i 0)
1829 (while (< i ngotos)
1830 (setq stateno (aref to-state i)
1831 sp (aref shift-table stateno))
1832 (when sp
1833 (setq k (shifts-nshifts sp)
1834 v (shifts-shifts sp)
1835 j 0
1836 break nil)
1837 (while (and (not break) (< j k))
1838 ;; symbol = accessing-symbol[sp->shifts[j]];
1839 (setq symbol (aref accessing-symbol (aref v j)))
1840 (if (wisent-ISVAR symbol)
1841 (setq break t) ;; break
1842 (wisent-SETBIT (aref F rowp) symbol)
1843 (setq j (1+ j))))
1844
1845 (while (< j k)
1846 ;; symbol = accessing-symbol[sp->shifts[j]];
1847 (setq symbol (aref accessing-symbol (aref v j)))
1848 (when (aref nullable (- symbol ntokens))
1849 (aset edge nedges (wisent-map-goto stateno symbol))
1850 (setq nedges (1+ nedges)))
1851 (setq j (1+ j)))
1852
1853 (when (> nedges 0)
1854 ;; reads[i] = rp = NEW2(nedges + 1, short);
1855 (setq rp (make-vector (1+ nedges) 0)
1856 j 0)
1857 (aset reads i rp)
1858 (while (< j nedges)
1859 ;; rp[j] = edge[j];
1860 (aset rp j (aref edge j))
1861 (setq j (1+ j)))
1862 (aset rp nedges -1)
1863 (setq nedges 0)))
1864 (setq rowp (1+ rowp))
1865 (setq i (1+ i)))
1866 (wisent-digraph reads)
1867 ))
1868
1869 (defun wisent-add-lookback-edge (stateno ruleno gotono)
1870 "Add a lookback edge.
1871 STATENO, RULENO, GOTONO are self-explanatory."
1872 (let (i k found)
1873 (setq i (aref lookaheads stateno)
1874 k (aref lookaheads (1+ stateno))
1875 found nil)
1876 (while (and (not found) (< i k))
1877 (if (= (aref LAruleno i) ruleno)
1878 (setq found t)
1879 (setq i (1+ i))))
1880
1881 (or found
1882 (error "Internal error in `wisent-add-lookback-edge'"))
1883
1884 ;; value . next
1885 ;; lookback[i] = (gotono . lookback[i])
1886 (aset lookback i (cons gotono (aref lookback i)))))
1887
1888 (defun wisent-transpose (R-arg n)
1889 "Return the transpose of R-ARG, of size N.
1890 Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or
1891 a -1 terminated list of numbers. RESULT[NUM] is nil or the -1
1892 terminated list of the I such as NUM is in R-ARG[I]."
1893 (let (i j new-R end-R nedges v sp)
1894 (setq new-R (make-vector n nil)
1895 end-R (make-vector n nil)
1896 nedges (make-vector n 0))
1897
1898 ;; Count.
1899 (setq i 0)
1900 (while (< i n)
1901 (setq v (aref R-arg i))
1902 (when v
1903 (setq j 0)
1904 (while (>= (aref v j) 0)
1905 (aset nedges (aref v j) (1+ (aref nedges (aref v j))))
1906 (setq j (1+ j))))
1907 (setq i (1+ i)))
1908
1909 ;; Allocate.
1910 (setq i 0)
1911 (while (< i n)
1912 (when (> (aref nedges i) 0)
1913 (setq sp (make-vector (1+ (aref nedges i)) 0))
1914 (aset sp (aref nedges i) -1)
1915 (aset new-R i sp)
1916 (aset end-R i 0))
1917 (setq i (1+ i)))
1918
1919 ;; Store.
1920 (setq i 0)
1921 (while (< i n)
1922 (setq v (aref R-arg i))
1923 (when v
1924 (setq j 0)
1925 (while (>= (aref v j) 0)
1926 (aset (aref new-R (aref v j)) (aref end-R (aref v j)) i)
1927 (aset end-R (aref v j) (1+ (aref end-R (aref v j))))
1928 (setq j (1+ j))))
1929 (setq i (1+ i)))
1930
1931 new-R))
1932
1933 (defun wisent-build-relations ()
1934 "Build relations."
1935 (let (i j k rulep rp sp length nedges done state1 stateno
1936 symbol1 symbol2 edge states v)
1937 (setq includes (make-vector ngotos nil)
1938 edge (make-vector (1+ ngotos) 0)
1939 states (make-vector (1+ maxrhs) 0)
1940 i 0)
1941
1942 (while (< i ngotos)
1943 (setq nedges 0
1944 state1 (aref from-state i)
1945 symbol1 (aref accessing-symbol (aref to-state i))
1946 rulep (aref derives (- symbol1 ntokens)))
1947
1948 (while (> (car rulep) 0)
1949 (aset states 0 state1)
1950 (setq length 1
1951 stateno state1
1952 rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep]
1953 (while (> (aref ritem rp) 0) ;; *rp > 0
1954 (setq symbol2 (aref ritem rp)
1955 sp (aref shift-table stateno)
1956 k (shifts-nshifts sp)
1957 v (shifts-shifts sp)
1958 j 0)
1959 (while (< j k)
1960 (setq stateno (aref v j))
1961 (if (= (aref accessing-symbol stateno) symbol2)
1962 (setq j k) ;; break
1963 (setq j (1+ j))))
1964 ;; states[length++] = stateno;
1965 (aset states length stateno)
1966 (setq length (1+ length))
1967 (setq rp (1+ rp)))
1968
1969 (if (not (aref consistent stateno))
1970 (wisent-add-lookback-edge stateno (car rulep) i))
1971
1972 (setq length (1- length)
1973 done nil)
1974 (while (not done)
1975 (setq done t
1976 rp (1- rp))
1977 (when (and (>= rp 0) (wisent-ISVAR (aref ritem rp)))
1978 ;; stateno = states[--length];
1979 (setq length (1- length)
1980 stateno (aref states length))
1981 (aset edge nedges (wisent-map-goto stateno (aref ritem rp)))
1982 (setq nedges (1+ nedges))
1983 (if (aref nullable (- (aref ritem rp) ntokens))
1984 (setq done nil))))
1985 (setq rulep (cdr rulep)))
1986
1987 (when (> nedges 0)
1988 (setq v (make-vector (1+ nedges) 0)
1989 j 0)
1990 (aset includes i v)
1991 (while (< j nedges)
1992 (aset v j (aref edge j))
1993 (setq j (1+ j)))
1994 (aset v nedges -1))
1995 (setq i (1+ i)))
1996
1997 (setq includes (wisent-transpose includes ngotos))
1998 ))
1999
2000 (defun wisent-compute-FOLLOWS ()
2001 "Compute follows."
2002 (wisent-digraph includes))
2003
2004 (defun wisent-compute-lookaheads ()
2005 "Compute lookaheads."
2006 (let (i j n v1 v2 sp)
2007 (setq n (aref lookaheads nstates)
2008 i 0)
2009 (while (< i n)
2010 (setq sp (aref lookback i))
2011 (while sp
2012 (setq v1 (aref LA i)
2013 v2 (aref F (car sp))
2014 j 0)
2015 (while (< j tokensetsize)
2016 ;; LA (i)[j] |= F (sp->value)[j]
2017 (aset v1 j (logior (aref v1 j) (aref v2 j)))
2018 (setq j (1+ j)))
2019 (setq sp (cdr sp)))
2020 (setq i (1+ i)))))
2021
2022 (defun wisent-lalr ()
2023 "Make the nondeterministic finite state machine deterministic."
2024 (setq tokensetsize (wisent-WORDSIZE ntokens))
2025 (wisent-set-state-table)
2026 (wisent-set-accessing-symbol)
2027 (wisent-set-shift-table)
2028 (wisent-set-reduction-table)
2029 (wisent-set-maxrhs)
2030 (wisent-initialize-LA)
2031 (wisent-set-goto-map)
2032 (wisent-initialize-F)
2033 (wisent-build-relations)
2034 (wisent-compute-FOLLOWS)
2035 (wisent-compute-lookaheads))
2036 \f
2037 ;;;; -----------------------------------------------
2038 ;;;; Find and resolve or report look-ahead conflicts
2039 ;;;; -----------------------------------------------
2040
2041 (defsubst wisent-log-resolution (state LAno token resolution)
2042 "Log a shift-reduce conflict resolution.
2043 In specified STATE between rule pointed by lookahead number LANO and
2044 TOKEN, resolved as RESOLUTION."
2045 (if (or wisent-verbose-flag wisent-debug-flag)
2046 (wisent-log
2047 "Conflict in state %d between rule %d and token %s resolved as %s.\n"
2048 state (aref LAruleno LAno) (wisent-tag token) resolution)))
2049
2050 (defun wisent-flush-shift (state token)
2051 "Turn off the shift recorded in the specified STATE for TOKEN.
2052 Used when we resolve a shift-reduce conflict in favor of the reduction."
2053 (let (shiftp i k v)
2054 (when (setq shiftp (aref shift-table state))
2055 (setq k (shifts-nshifts shiftp)
2056 v (shifts-shifts shiftp)
2057 i 0)
2058 (while (< i k)
2059 (if (and (not (zerop (aref v i)))
2060 (= token (aref accessing-symbol (aref v i))))
2061 (aset v i 0))
2062 (setq i (1+ i))))))
2063
2064 (defun wisent-resolve-sr-conflict (state lookaheadnum)
2065 "Attempt to resolve shift-reduce conflict for one rule.
2066 Resolve by means of precedence declarations. The conflict occurred in
2067 specified STATE for the rule pointed by the lookahead symbol
2068 LOOKAHEADNUM. It has already been checked that the rule has a
2069 precedence. A conflict is resolved by modifying the shift or reduce
2070 tables so that there is no longer a conflict."
2071 (let (i redprec errp errs nerrs token sprec sassoc)
2072 ;; Find the rule to reduce by to get precedence of reduction
2073 (setq token (aref tags (aref rprec (aref LAruleno lookaheadnum)))
2074 redprec (wisent-prec token)
2075 errp (make-errs)
2076 errs (make-vector ntokens 0)
2077 nerrs 0
2078 i 0)
2079 (set-errs-errs errp errs)
2080 (while (< i ntokens)
2081 (setq token (aref tags i))
2082 (when (and (wisent-BITISSET (aref LA lookaheadnum) i)
2083 (wisent-BITISSET lookaheadset i)
2084 (setq sprec (wisent-prec token)))
2085 ;; Shift-reduce conflict occurs for token number I and it has
2086 ;; a precedence. The precedence of shifting is that of token
2087 ;; I.
2088 (cond
2089 ((< sprec redprec)
2090 (wisent-log-resolution state lookaheadnum i "reduce")
2091 ;; Flush the shift for this token
2092 (wisent-RESETBIT lookaheadset i)
2093 (wisent-flush-shift state i)
2094 )
2095 ((> sprec redprec)
2096 (wisent-log-resolution state lookaheadnum i "shift")
2097 ;; Flush the reduce for this token
2098 (wisent-RESETBIT (aref LA lookaheadnum) i)
2099 )
2100 (t
2101 ;; Matching precedence levels.
2102 ;; For left association, keep only the reduction.
2103 ;; For right association, keep only the shift.
2104 ;; For nonassociation, keep neither.
2105 (setq sassoc (wisent-assoc token))
2106 (cond
2107 ((eq sassoc 'right)
2108 (wisent-log-resolution state lookaheadnum i "shift"))
2109 ((eq sassoc 'left)
2110 (wisent-log-resolution state lookaheadnum i "reduce"))
2111 ((eq sassoc 'nonassoc)
2112 (wisent-log-resolution state lookaheadnum i "an error"))
2113 )
2114 (when (not (eq sassoc 'right))
2115 ;; Flush the shift for this token
2116 (wisent-RESETBIT lookaheadset i)
2117 (wisent-flush-shift state i))
2118 (when (not (eq sassoc 'left))
2119 ;; Flush the reduce for this token
2120 (wisent-RESETBIT (aref LA lookaheadnum) i))
2121 (when (eq sassoc 'nonassoc)
2122 ;; Record an explicit error for this token
2123 (aset errs nerrs i)
2124 (setq nerrs (1+ nerrs)))
2125 )))
2126 (setq i (1+ i)))
2127 (when (> nerrs 0)
2128 (set-errs-nerrs errp nerrs)
2129 (aset err-table state errp))
2130 ))
2131
2132 (defun wisent-set-conflicts (state)
2133 "Find and attempt to resolve conflicts in specified STATE."
2134 (let (i j k v shiftp symbol)
2135 (unless (aref consistent state)
2136 (fillarray lookaheadset 0)
2137
2138 (when (setq shiftp (aref shift-table state))
2139 (setq k (shifts-nshifts shiftp)
2140 v (shifts-shifts shiftp)
2141 i 0)
2142 (while (and (< i k)
2143 (wisent-ISTOKEN
2144 (setq symbol (aref accessing-symbol (aref v i)))))
2145 (or (zerop (aref v i))
2146 (wisent-SETBIT lookaheadset symbol))
2147 (setq i (1+ i))))
2148
2149 ;; Loop over all rules which require lookahead in this state
2150 ;; first check for shift-reduce conflict, and try to resolve
2151 ;; using precedence
2152 (setq i (aref lookaheads state)
2153 k (aref lookaheads (1+ state)))
2154 (while (< i k)
2155 (when (aref rprec (aref LAruleno i))
2156 (setq v (aref LA i)
2157 j 0)
2158 (while (< j tokensetsize)
2159 (if (zerop (logand (aref v j) (aref lookaheadset j)))
2160 (setq j (1+ j))
2161 ;; if (LA (i)[j] & lookaheadset[j])
2162 (wisent-resolve-sr-conflict state i)
2163 (setq j tokensetsize)))) ;; break
2164 (setq i (1+ i)))
2165
2166 ;; Loop over all rules which require lookahead in this state
2167 ;; Check for conflicts not resolved above.
2168 (setq i (aref lookaheads state))
2169 (while (< i k)
2170 (setq v (aref LA i)
2171 j 0)
2172 (while (< j tokensetsize)
2173 ;; if (LA (i)[j] & lookaheadset[j])
2174 (if (not (zerop (logand (aref v j) (aref lookaheadset j))))
2175 (aset conflicts state t))
2176 (setq j (1+ j)))
2177 (setq j 0)
2178 (while (< j tokensetsize)
2179 ;; lookaheadset[j] |= LA (i)[j];
2180 (aset lookaheadset j (logior (aref lookaheadset j)
2181 (aref v j)))
2182 (setq j (1+ j)))
2183 (setq i (1+ i)))
2184 )))
2185
2186 (defun wisent-resolve-conflicts ()
2187 "Find and resolve conflicts."
2188 (let (i)
2189 (setq conflicts (make-vector nstates nil)
2190 shiftset (make-vector tokensetsize 0)
2191 lookaheadset (make-vector tokensetsize 0)
2192 err-table (make-vector nstates nil)
2193 i 0)
2194 (while (< i nstates)
2195 (wisent-set-conflicts i)
2196 (setq i (1+ i)))))
2197
2198 (defun wisent-count-sr-conflicts (state)
2199 "Count the number of shift/reduce conflicts in specified STATE."
2200 (let (i j k shiftp symbol v)
2201 (setq src-count 0
2202 shiftp (aref shift-table state))
2203 (when shiftp
2204 (fillarray shiftset 0)
2205 (fillarray lookaheadset 0)
2206 (setq k (shifts-nshifts shiftp)
2207 v (shifts-shifts shiftp)
2208 i 0)
2209 (while (< i k)
2210 (when (not (zerop (aref v i)))
2211 (setq symbol (aref accessing-symbol (aref v i)))
2212 (if (wisent-ISVAR symbol)
2213 (setq i k) ;; break
2214 (wisent-SETBIT shiftset symbol)))
2215 (setq i (1+ i)))
2216
2217 (setq k (aref lookaheads (1+ state))
2218 i (aref lookaheads state))
2219 (while (< i k)
2220 (setq v (aref LA i)
2221 j 0)
2222 (while (< j tokensetsize)
2223 ;; lookaheadset[j] |= LA (i)[j]
2224 (aset lookaheadset j (logior (aref lookaheadset j)
2225 (aref v j)))
2226 (setq j (1+ j)))
2227 (setq i (1+ i)))
2228
2229 (setq k 0)
2230 (while (< k tokensetsize)
2231 ;; lookaheadset[k] &= shiftset[k];
2232 (aset lookaheadset k (logand (aref lookaheadset k)
2233 (aref shiftset k)))
2234 (setq k (1+ k)))
2235
2236 (setq i 0)
2237 (while (< i ntokens)
2238 (if (wisent-BITISSET lookaheadset i)
2239 (setq src-count (1+ src-count)))
2240 (setq i (1+ i))))
2241 src-count))
2242
2243 (defun wisent-count-rr-conflicts (state)
2244 "Count the number of reduce/reduce conflicts in specified STATE."
2245 (let (i j count n m)
2246 (setq rrc-count 0
2247 m (aref lookaheads state)
2248 n (aref lookaheads (1+ state)))
2249 (when (>= (- n m) 2)
2250 (setq i 0)
2251 (while (< i ntokens)
2252 (setq count 0
2253 j m)
2254 (while (< j n)
2255 (if (wisent-BITISSET (aref LA j) i)
2256 (setq count (1+ count)))
2257 (setq j (1+ j)))
2258
2259 (if (>= count 2)
2260 (setq rrc-count (1+ rrc-count)))
2261 (setq i (1+ i))))
2262 rrc-count))
2263
2264 (defvar wisent-expected-conflicts nil
2265 "*If non-nil suppress the warning about shift/reduce conflicts.
2266 It is a decimal integer N that says there should be no warning if
2267 there are N shift/reduce conflicts and no reduce/reduce conflicts. A
2268 warning is given if there are either more or fewer conflicts, or if
2269 there are any reduce/reduce conflicts.")
2270
2271 (defun wisent-total-conflicts ()
2272 "Report the total number of conflicts."
2273 (unless (and (zerop rrc-total)
2274 (or (zerop src-total)
2275 (= src-total (or wisent-expected-conflicts 0))))
2276 (let* ((src (wisent-source))
2277 (src (if src (concat " in " src) ""))
2278 (msg (format "Grammar%s contains" src)))
2279 (if (> src-total 0)
2280 (setq msg (format "%s %d shift/reduce conflict%s"
2281 msg src-total (if (> src-total 1)
2282 "s" ""))))
2283 (if (and (> src-total 0) (> rrc-total 0))
2284 (setq msg (format "%s and" msg)))
2285 (if (> rrc-total 0)
2286 (setq msg (format "%s %d reduce/reduce conflict%s"
2287 msg rrc-total (if (> rrc-total 1)
2288 "s" ""))))
2289 (message msg))))
2290
2291 (defun wisent-print-conflicts ()
2292 "Report conflicts."
2293 (let (i)
2294 (setq src-total 0
2295 rrc-total 0
2296 i 0)
2297 (while (< i nstates)
2298 (when (aref conflicts i)
2299 (wisent-count-sr-conflicts i)
2300 (wisent-count-rr-conflicts i)
2301 (setq src-total (+ src-total src-count)
2302 rrc-total (+ rrc-total rrc-count))
2303 (when (or wisent-verbose-flag wisent-debug-flag)
2304 (wisent-log "State %d contains" i)
2305 (if (> src-count 0)
2306 (wisent-log " %d shift/reduce conflict%s"
2307 src-count (if (> src-count 1) "s" "")))
2308
2309 (if (and (> src-count 0) (> rrc-count 0))
2310 (wisent-log " and"))
2311
2312 (if (> rrc-count 0)
2313 (wisent-log " %d reduce/reduce conflict%s"
2314 rrc-count (if (> rrc-count 1) "s" "")))
2315
2316 (wisent-log ".\n")))
2317 (setq i (1+ i)))
2318 (wisent-total-conflicts)))
2319 \f
2320 ;;;; --------------------------------------
2321 ;;;; Report information on generated parser
2322 ;;;; --------------------------------------
2323 (defun wisent-print-grammar ()
2324 "Print grammar."
2325 (let (i j r break left-count right-count)
2326
2327 (wisent-log "\n\nGrammar\n\n Number, Rule\n")
2328 (setq i 1)
2329 (while (<= i nrules)
2330 ;; Don't print rules disabled in `wisent-reduce-grammar-tables'.
2331 (when (aref ruseful i)
2332 (wisent-log " %s %s ->"
2333 (wisent-pad-string (number-to-string i) 6)
2334 (wisent-tag (aref rlhs i)))
2335 (setq r (aref rrhs i))
2336 (if (> (aref ritem r) 0)
2337 (while (> (aref ritem r) 0)
2338 (wisent-log " %s" (wisent-tag (aref ritem r)))
2339 (setq r (1+ r)))
2340 (wisent-log " /* empty */"))
2341 (wisent-log "\n"))
2342 (setq i (1+ i)))
2343
2344 (wisent-log "\n\nTerminals, with rules where they appear\n\n")
2345 (wisent-log "%s (-1)\n" (wisent-tag 0))
2346 (setq i 1)
2347 (while (< i ntokens)
2348 (wisent-log "%s (%d)" (wisent-tag i) i)
2349 (setq j 1)
2350 (while (<= j nrules)
2351 (setq r (aref rrhs j)
2352 break nil)
2353 (while (and (not break) (> (aref ritem r) 0))
2354 (if (setq break (= (aref ritem r) i))
2355 (wisent-log " %d" j)
2356 (setq r (1+ r))))
2357 (setq j (1+ j)))
2358 (wisent-log "\n")
2359 (setq i (1+ i)))
2360
2361 (wisent-log "\n\nNonterminals, with rules where they appear\n\n")
2362 (setq i ntokens)
2363 (while (< i nsyms)
2364 (setq left-count 0
2365 right-count 0
2366 j 1)
2367 (while (<= j nrules)
2368 (if (= (aref rlhs j) i)
2369 (setq left-count (1+ left-count)))
2370 (setq r (aref rrhs j)
2371 break nil)
2372 (while (and (not break) (> (aref ritem r) 0))
2373 (if (= (aref ritem r) i)
2374 (setq right-count (1+ right-count)
2375 break t)
2376 (setq r (1+ r))))
2377 (setq j (1+ j)))
2378 (wisent-log "%s (%d)\n " (wisent-tag i) i)
2379 (when (> left-count 0)
2380 (wisent-log " on left:")
2381 (setq j 1)
2382 (while (<= j nrules)
2383 (if (= (aref rlhs j) i)
2384 (wisent-log " %d" j))
2385 (setq j (1+ j))))
2386 (when (> right-count 0)
2387 (if (> left-count 0)
2388 (wisent-log ","))
2389 (wisent-log " on right:")
2390 (setq j 1)
2391 (while (<= j nrules)
2392 (setq r (aref rrhs j)
2393 break nil)
2394 (while (and (not break) (> (aref ritem r) 0))
2395 (if (setq break (= (aref ritem r) i))
2396 (wisent-log " %d" j)
2397 (setq r (1+ r))))
2398 (setq j (1+ j))))
2399 (wisent-log "\n")
2400 (setq i (1+ i)))
2401 ))
2402
2403 (defun wisent-print-reductions (state)
2404 "Print reductions on STATE."
2405 (let (i j k v symbol m n defaulted
2406 default-LA default-rule cmax count shiftp errp nodefault)
2407 (setq nodefault nil
2408 i 0)
2409 (fillarray shiftset 0)
2410
2411 (setq shiftp (aref shift-table state))
2412 (when shiftp
2413 (setq k (shifts-nshifts shiftp)
2414 v (shifts-shifts shiftp)
2415 i 0)
2416 (while (< i k)
2417 (when (not (zerop (aref v i)))
2418 (setq symbol (aref accessing-symbol (aref v i)))
2419 (if (wisent-ISVAR symbol)
2420 (setq i k) ;; break
2421 ;; If this state has a shift for the error token, don't
2422 ;; use a default rule.
2423 (if (= symbol error-token-number)
2424 (setq nodefault t))
2425 (wisent-SETBIT shiftset symbol)))
2426 (setq i (1+ i))))
2427
2428 (setq errp (aref err-table state))
2429 (when errp
2430 (setq k (errs-nerrs errp)
2431 v (errs-errs errp)
2432 i 0)
2433 (while (< i k)
2434 (if (not (zerop (setq symbol (aref v i))))
2435 (wisent-SETBIT shiftset symbol))
2436 (setq i (1+ i))))
2437
2438 (setq m (aref lookaheads state)
2439 n (aref lookaheads (1+ state)))
2440
2441 (cond
2442 ((and (= (- n m) 1) (not nodefault))
2443 (setq default-rule (aref LAruleno m)
2444 v (aref LA m)
2445 k 0)
2446 (while (< k tokensetsize)
2447 (aset lookaheadset k (logand (aref v k)
2448 (aref shiftset k)))
2449 (setq k (1+ k)))
2450
2451 (setq i 0)
2452 (while (< i ntokens)
2453 (if (wisent-BITISSET lookaheadset i)
2454 (wisent-log " %s\t[reduce using rule %d (%s)]\n"
2455 (wisent-tag i) default-rule
2456 (wisent-tag (aref rlhs default-rule))))
2457 (setq i (1+ i)))
2458 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2459 default-rule
2460 (wisent-tag (aref rlhs default-rule)))
2461 )
2462 ((>= (- n m) 1)
2463 (setq cmax 0
2464 default-LA -1
2465 default-rule 0)
2466 (when (not nodefault)
2467 (setq i m)
2468 (while (< i n)
2469 (setq v (aref LA i)
2470 count 0
2471 k 0)
2472 (while (< k tokensetsize)
2473 ;; lookaheadset[k] = LA (i)[k] & ~shiftset[k]
2474 (aset lookaheadset k
2475 (logand (aref v k)
2476 (lognot (aref shiftset k))))
2477 (setq k (1+ k)))
2478 (setq j 0)
2479 (while (< j ntokens)
2480 (if (wisent-BITISSET lookaheadset j)
2481 (setq count (1+ count)))
2482 (setq j (1+ j)))
2483 (if (> count cmax)
2484 (setq cmax count
2485 default-LA i
2486 default-rule (aref LAruleno i)))
2487 (setq k 0)
2488 (while (< k tokensetsize)
2489 (aset shiftset k (logior (aref shiftset k)
2490 (aref lookaheadset k)))
2491 (setq k (1+ k)))
2492 (setq i (1+ i))))
2493
2494 (fillarray shiftset 0)
2495
2496 (when shiftp
2497 (setq k (shifts-nshifts shiftp)
2498 v (shifts-shifts shiftp)
2499 i 0)
2500 (while (< i k)
2501 (when (not (zerop (aref v i)))
2502 (setq symbol (aref accessing-symbol (aref v i)))
2503 (if (wisent-ISVAR symbol)
2504 (setq i k) ;; break
2505 (wisent-SETBIT shiftset symbol)))
2506 (setq i (1+ i))))
2507
2508 (setq i 0)
2509 (while (< i ntokens)
2510 (setq defaulted nil
2511 count (if (wisent-BITISSET shiftset i) 1 0)
2512 j m)
2513 (while (< j n)
2514 (when (wisent-BITISSET (aref LA j) i)
2515 (if (zerop count)
2516 (progn
2517 (if (not (= j default-LA))
2518 (wisent-log
2519 " %s\treduce using rule %d (%s)\n"
2520 (wisent-tag i) (aref LAruleno j)
2521 (wisent-tag (aref rlhs (aref LAruleno j))))
2522 (setq defaulted t))
2523 (setq count (1+ count)))
2524 (if defaulted
2525 (wisent-log
2526 " %s\treduce using rule %d (%s)\n"
2527 (wisent-tag i) (aref LAruleno default-LA)
2528 (wisent-tag (aref rlhs (aref LAruleno default-LA)))))
2529 (setq defaulted nil)
2530 (wisent-log
2531 " %s\t[reduce using rule %d (%s)]\n"
2532 (wisent-tag i) (aref LAruleno j)
2533 (wisent-tag (aref rlhs (aref LAruleno j))))))
2534 (setq j (1+ j)))
2535 (setq i (1+ i)))
2536
2537 (if (>= default-LA 0)
2538 (wisent-log
2539 " $default\treduce using rule %d (%s)\n"
2540 default-rule
2541 (wisent-tag (aref rlhs default-rule))))
2542 ))))
2543
2544 (defun wisent-print-actions (state)
2545 "Print actions on STATE."
2546 (let (i j k v state1 symbol shiftp errp redp rule nerrs break)
2547 (setq shiftp (aref shift-table state)
2548 redp (aref reduction-table state)
2549 errp (aref err-table state))
2550 (if (and (not shiftp) (not redp))
2551 (if (= final-state state)
2552 (wisent-log " $default\taccept\n")
2553 (wisent-log " NO ACTIONS\n"))
2554 (if (not shiftp)
2555 (setq i 0
2556 k 0)
2557 (setq k (shifts-nshifts shiftp)
2558 v (shifts-shifts shiftp)
2559 i 0
2560 break nil)
2561 (while (and (not break) (< i k))
2562 (if (zerop (setq state1 (aref v i)))
2563 (setq i (1+ i))
2564 (setq symbol (aref accessing-symbol state1))
2565 ;; The following line used to be turned off.
2566 (if (wisent-ISVAR symbol)
2567 (setq break t) ;; break
2568 (wisent-log " %s\tshift, and go to state %d\n"
2569 (wisent-tag symbol) state1)
2570 (setq i (1+ i)))))
2571 (if (> i 0)
2572 (wisent-log "\n")))
2573
2574 (when errp
2575 (setq nerrs (errs-nerrs errp)
2576 v (errs-errs errp)
2577 j 0)
2578 (while (< j nerrs)
2579 (if (aref v j)
2580 (wisent-log " %s\terror (nonassociative)\n"
2581 (wisent-tag (aref v j))))
2582 (setq j (1+ j)))
2583 (if (> j 0)
2584 (wisent-log "\n")))
2585
2586 (cond
2587 ((and (aref consistent state) redp)
2588 (setq rule (aref (reductions-rules redp) 0)
2589 symbol (aref rlhs rule))
2590 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2591 rule (wisent-tag symbol))
2592 )
2593 (redp
2594 (wisent-print-reductions state)
2595 ))
2596
2597 (when (< i k)
2598 (setq v (shifts-shifts shiftp))
2599 (while (< i k)
2600 (when (setq state1 (aref v i))
2601 (setq symbol (aref accessing-symbol state1))
2602 (wisent-log " %s\tgo to state %d\n"
2603 (wisent-tag symbol) state1))
2604 (setq i (1+ i)))
2605 (wisent-log "\n"))
2606 )))
2607
2608 (defun wisent-print-core (state)
2609 "Print STATE core."
2610 (let (i k rule statep sp sp1)
2611 (setq statep (aref state-table state)
2612 k (core-nitems statep))
2613 (when (> k 0)
2614 (setq i 0)
2615 (while (< i k)
2616 ;; sp1 = sp = ritem + statep->items[i];
2617 (setq sp1 (aref (core-items statep) i)
2618 sp sp1)
2619 (while (> (aref ritem sp) 0)
2620 (setq sp (1+ sp)))
2621
2622 (setq rule (- (aref ritem sp)))
2623 (wisent-log " %s -> " (wisent-tag (aref rlhs rule)))
2624
2625 (setq sp (aref rrhs rule))
2626 (while (< sp sp1)
2627 (wisent-log "%s " (wisent-tag (aref ritem sp)))
2628 (setq sp (1+ sp)))
2629 (wisent-log ".")
2630 (while (> (aref ritem sp) 0)
2631 (wisent-log " %s" (wisent-tag (aref ritem sp)))
2632 (setq sp (1+ sp)))
2633 (wisent-log " (rule %d)\n" rule)
2634 (setq i (1+ i)))
2635 (wisent-log "\n"))))
2636
2637 (defun wisent-print-state (state)
2638 "Print information on STATE."
2639 (wisent-log "\n\nstate %d\n\n" state)
2640 (wisent-print-core state)
2641 (wisent-print-actions state))
2642
2643 (defun wisent-print-states ()
2644 "Print information on states."
2645 (let ((i 0))
2646 (while (< i nstates)
2647 (wisent-print-state i)
2648 (setq i (1+ i)))))
2649
2650 (defun wisent-print-results ()
2651 "Print information on generated parser.
2652 Report detailed information if `wisent-verbose-flag' or
2653 `wisent-debug-flag' are non-nil."
2654 (when (or wisent-verbose-flag wisent-debug-flag)
2655 (wisent-print-useless))
2656 (wisent-print-conflicts)
2657 (when (or wisent-verbose-flag wisent-debug-flag)
2658 (wisent-print-grammar)
2659 (wisent-print-states))
2660 ;; Append output to log file when running in batch mode
2661 (when (wisent-noninteractive)
2662 (wisent-append-to-log-file)
2663 (wisent-clear-log)))
2664 \f
2665 ;;;; ---------------------------------
2666 ;;;; Build the generated parser tables
2667 ;;;; ---------------------------------
2668
2669 (defun wisent-action-row (state actrow)
2670 "Figure out the actions for the specified STATE.
2671 Decide what to do for each type of token if seen as the lookahead
2672 token in specified state. The value returned is used as the default
2673 action for the state. In addition, ACTROW is filled with what to do
2674 for each kind of token, index by symbol number, with nil meaning do
2675 the default action. The value 'error, means this situation is an
2676 error. The parser recognizes this value specially.
2677
2678 This is where conflicts are resolved. The loop over lookahead rules
2679 considered lower-numbered rules last, and the last rule considered
2680 that likes a token gets to handle it."
2681 (let (i j k m n v default-rule nreds rule max count
2682 shift-state symbol redp shiftp errp nodefault)
2683
2684 (fillarray actrow nil)
2685
2686 (setq default-rule 0
2687 nodefault nil ;; nil inhibit having any default reduction
2688 nreds 0
2689 m 0
2690 n 0
2691 redp (aref reduction-table state))
2692
2693 (when redp
2694 (setq nreds (reductions-nreds redp))
2695 (when (>= nreds 1)
2696 ;; loop over all the rules available here which require
2697 ;; lookahead
2698 (setq m (aref lookaheads state)
2699 n (aref lookaheads (1+ state))
2700 i (1- n))
2701 (while (>= i m)
2702 ;; and find each token which the rule finds acceptable to
2703 ;; come next
2704 (setq j 0)
2705 (while (< j ntokens)
2706 ;; and record this rule as the rule to use if that token
2707 ;; follows.
2708 (if (wisent-BITISSET (aref LA i) j)
2709 (aset actrow j (- (aref LAruleno i)))
2710 )
2711 (setq j (1+ j)))
2712 (setq i (1- i)))))
2713
2714 ;; Now see which tokens are allowed for shifts in this state. For
2715 ;; them, record the shift as the thing to do. So shift is
2716 ;; preferred to reduce.
2717 (setq shiftp (aref shift-table state))
2718 (when shiftp
2719 (setq k (shifts-nshifts shiftp)
2720 v (shifts-shifts shiftp)
2721 i 0)
2722 (while (< i k)
2723 (setq shift-state (aref v i))
2724 (if (zerop shift-state)
2725 nil ;; continue
2726 (setq symbol (aref accessing-symbol shift-state))
2727 (if (wisent-ISVAR symbol)
2728 (setq i k) ;; break
2729 (aset actrow symbol shift-state)
2730 ;; Do not use any default reduction if there is a shift
2731 ;; for error
2732 (if (= symbol error-token-number)
2733 (setq nodefault t))))
2734 (setq i (1+ i))))
2735
2736 ;; See which tokens are an explicit error in this state (due to
2737 ;; %nonassoc). For them, record error as the action.
2738 (setq errp (aref err-table state))
2739 (when errp
2740 (setq k (errs-nerrs errp)
2741 v (errs-errs errp)
2742 i 0)
2743 (while (< i k)
2744 (aset actrow (aref v i) wisent-error-tag)
2745 (setq i (1+ i))))
2746
2747 ;; Now find the most common reduction and make it the default
2748 ;; action for this state.
2749 (when (and (>= nreds 1) (not nodefault))
2750 (if (aref consistent state)
2751 (setq default-rule (- (aref (reductions-rules redp) 0)))
2752 (setq max 0
2753 i m)
2754 (while (< i n)
2755 (setq count 0
2756 rule (- (aref LAruleno i))
2757 j 0)
2758 (while (< j ntokens)
2759 (if (and (numberp (aref actrow j))
2760 (= (aref actrow j) rule))
2761 (setq count (1+ count)))
2762 (setq j (1+ j)))
2763 (if (> count max)
2764 (setq max count
2765 default-rule rule))
2766 (setq i (1+ i)))
2767 ;; actions which match the default are replaced with zero,
2768 ;; which means "use the default"
2769 (when (> max 0)
2770 (setq j 0)
2771 (while (< j ntokens)
2772 (if (and (numberp (aref actrow j))
2773 (= (aref actrow j) default-rule))
2774 (aset actrow j nil))
2775 (setq j (1+ j)))
2776 )))
2777
2778 ;; If have no default rule, if this is the final state the default
2779 ;; is accept else it is an error. So replace any action which
2780 ;; says "error" with "use default".
2781 (when (zerop default-rule)
2782 (if (= final-state state)
2783 (setq default-rule wisent-accept-tag)
2784 (setq j 0)
2785 (while (< j ntokens)
2786 (if (eq (aref actrow j) wisent-error-tag)
2787 (aset actrow j nil))
2788 (setq j (1+ j)))
2789 (setq default-rule wisent-error-tag)))
2790 default-rule))
2791
2792 (defconst wisent-default-tag 'default
2793 "Tag used in an action table to indicate a default action.")
2794
2795 ;; These variables only exist locally in the function
2796 ;; `wisent-state-actions' and are shared by all other nested callees.
2797 (wisent-defcontext semantic-actions
2798 ;; Uninterned symbols used in code generation.
2799 stack sp gotos state
2800 ;; Name of the current semantic action
2801 NAME)
2802
2803 (defun wisent-state-actions ()
2804 "Figure out the actions for every state.
2805 Return the action table."
2806 ;; Store the semantic action obarray in (unused) RCODE[0].
2807 (aset rcode 0 (make-vector 13 0))
2808 (let (i j action-table actrow action)
2809 (setq action-table (make-vector nstates nil)
2810 actrow (make-vector ntokens nil)
2811 i 0)
2812 (wisent-with-context semantic-actions
2813 (setq stack (make-symbol "stack")
2814 sp (make-symbol "sp")
2815 gotos (make-symbol "gotos")
2816 state (make-symbol "state"))
2817 (while (< i nstates)
2818 (setq action (wisent-action-row i actrow))
2819 ;; Translate a reduction into semantic action
2820 (and (integerp action) (< action 0)
2821 (setq action (wisent-semantic-action (- action))))
2822 (aset action-table i (list (cons wisent-default-tag action)))
2823 (setq j 0)
2824 (while (< j ntokens)
2825 (when (setq action (aref actrow j))
2826 ;; Translate a reduction into semantic action
2827 (and (integerp action) (< action 0)
2828 (setq action (wisent-semantic-action (- action))))
2829 (aset action-table i (cons (cons (aref tags j) action)
2830 (aref action-table i)))
2831 )
2832 (setq j (1+ j)))
2833 (aset action-table i (nreverse (aref action-table i)))
2834 (setq i (1+ i)))
2835 action-table)))
2836
2837 (defun wisent-goto-actions ()
2838 "Figure out what to do after reducing with each rule.
2839 Depending on the saved state from before the beginning of parsing the
2840 data that matched this rule. Return the goto table."
2841 (let (i j m n symbol state goto-table)
2842 (setq goto-table (make-vector nstates nil)
2843 i ntokens)
2844 (while (< i nsyms)
2845 (setq symbol (- i ntokens)
2846 m (aref goto-map symbol)
2847 n (aref goto-map (1+ symbol))
2848 j m)
2849 (while (< j n)
2850 (setq state (aref from-state j))
2851 (aset goto-table state
2852 (cons (cons (aref tags i) (aref to-state j))
2853 (aref goto-table state)))
2854 (setq j (1+ j)))
2855 (setq i (1+ i)))
2856 goto-table))
2857
2858 (defsubst wisent-quote-p (sym)
2859 "Return non-nil if SYM is bound to the `quote' function."
2860 (condition-case nil
2861 (eq (indirect-function sym)
2862 (indirect-function 'quote))
2863 (error nil)))
2864
2865 (defsubst wisent-backquote-p (sym)
2866 "Return non-nil if SYM is bound to the `backquote' function."
2867 (condition-case nil
2868 (eq (indirect-function sym)
2869 (indirect-function 'backquote))
2870 (error nil)))
2871
2872 (defun wisent-check-$N (x m)
2873 "Return non-nil if X is a valid $N or $regionN symbol.
2874 That is if X is a $N or $regionN symbol with N >= 1 and N <= M.
2875 Also warn if X is a $N or $regionN symbol with N < 1 or N > M."
2876 (when (symbolp x)
2877 (let* ((n (symbol-name x))
2878 (i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n)
2879 (string-to-number (match-string 2 n)))))
2880 (when i
2881 (if (and (>= i 1) (<= i m))
2882 t
2883 (message
2884 "*** In %s, %s might be a free variable (rule has %s)"
2885 NAME x (format (cond ((< m 1) "no component")
2886 ((= m 1) "%d component")
2887 ("%d components"))
2888 m))
2889 nil)))))
2890
2891 (defun wisent-semantic-action-expand-body (body n &optional found)
2892 "Parse BODY of semantic action.
2893 N is the maximum number of $N variables that can be referenced in
2894 BODY. Warn on references out of permitted range.
2895 Optional argument FOUND is the accumulated list of $N references
2896 encountered so far.
2897 Return a cons (FOUND . XBODY), where FOUND is the list of $N
2898 references found in BODY, and XBODY is BODY expression with
2899 `backquote' forms expanded."
2900 (if (not (listp body))
2901 ;; BODY is an atom, no expansion needed
2902 (progn
2903 (if (wisent-check-$N body n)
2904 ;; Accumulate $i symbol
2905 (pushnew body found :test #'equal))
2906 (cons found body))
2907 ;; BODY is a list, expand inside it
2908 (let (xbody sexpr)
2909 ;; If backquote expand it first
2910 (if (wisent-backquote-p (car body))
2911 (setq body (macroexpand body)))
2912 (while body
2913 (setq sexpr (car body)
2914 body (cdr body))
2915 (cond
2916 ;; Function call excepted quote expression
2917 ((and (consp sexpr)
2918 (not (wisent-quote-p (car sexpr))))
2919 (setq sexpr (wisent-semantic-action-expand-body sexpr n found)
2920 found (car sexpr)
2921 sexpr (cdr sexpr)))
2922 ;; $i symbol
2923 ((wisent-check-$N sexpr n)
2924 ;; Accumulate $i symbol
2925 (pushnew sexpr found :test #'equal))
2926 )
2927 ;; Accumulate expanded forms
2928 (setq xbody (nconc xbody (list sexpr))))
2929 (cons found xbody))))
2930
2931 (defun wisent-semantic-action (r)
2932 "Set up the Elisp function for semantic action at rule R.
2933 On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the
2934 body of the semantic action, N is the maximum number of values
2935 available in the parser's stack, NTERM is the nonterminal the semantic
2936 action belongs to, and I is the index of the semantic action inside
2937 NTERM definition. Return the semantic action symbol.
2938 The semantic action function accepts three arguments:
2939
2940 - the state/value stack
2941 - the top-of-stack index
2942 - the goto table
2943
2944 And returns the updated top-of-stack index."
2945 (if (not (aref ruseful r))
2946 (aset rcode r nil)
2947 (let* ((actn (aref rcode r))
2948 (n (aref actn 1)) ; nb of val avail. in stack
2949 (NAME (apply 'format "%s:%d" (aref actn 2)))
2950 (form (wisent-semantic-action-expand-body (aref actn 0) n))
2951 ($l (car form)) ; list of $vars used in body
2952 (form (cdr form)) ; expanded form of body
2953 (nt (aref rlhs r)) ; nonterminal item no.
2954 (bl nil) ; `let*' binding list
2955 $v i j)
2956
2957 ;; Compute $N and $regionN bindings
2958 (setq i n)
2959 (while (> i 0)
2960 (setq j (1+ (* 2 (- n i))))
2961 ;; Only bind $regionI if used in action
2962 (setq $v (intern (format "$region%d" i)))
2963 (if (memq $v $l)
2964 (setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl)))
2965 ;; Only bind $I if used in action
2966 (setq $v (intern (format "$%d" i)))
2967 (if (memq $v $l)
2968 (setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl)))
2969 (setq i (1- i)))
2970
2971 ;; Compute J, the length of rule's RHS. It will give the
2972 ;; current parser state at STACK[SP - 2*J], and where to push
2973 ;; the new semantic value and the next state, respectively at:
2974 ;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N,
2975 ;; the maximum number of values available in the stack, is equal
2976 ;; to J. But, for mid-rule actions, N is the number of rule
2977 ;; elements before the action and J is always 0 (empty rule).
2978 (setq i (aref rrhs r)
2979 j 0)
2980 (while (> (aref ritem i) 0)
2981 (setq j (1+ j)
2982 i (1+ i)))
2983
2984 ;; Create the semantic action symbol.
2985 (setq actn (intern NAME (aref rcode 0)))
2986
2987 ;; Store source code in function cell of the semantic action
2988 ;; symbol. It will be byte-compiled at automaton's compilation
2989 ;; time. Using a byte-compiled automaton can significantly
2990 ;; speed up parsing!
2991 (fset actn
2992 `(lambda (,stack ,sp ,gotos)
2993 (let* (,@bl
2994 ($region
2995 ,(cond
2996 ((= n 1)
2997 (if (assq '$region1 bl)
2998 '$region1
2999 `(cdr (aref ,stack (1- ,sp)))))
3000 ((> n 1)
3001 `(wisent-production-bounds
3002 ,stack (- ,sp ,(1- (* 2 n))) (1- ,sp)))))
3003 ($action ,NAME)
3004 ($nterm ',(aref tags nt))
3005 ,@(and (> j 0) `((,sp (- ,sp ,(* j 2)))))
3006 (,state (cdr (assq $nterm
3007 (aref ,gotos
3008 (aref ,stack ,sp))))))
3009 (setq ,sp (+ ,sp 2))
3010 ;; push semantic value
3011 (aset ,stack (1- ,sp) (cons ,form $region))
3012 ;; push next state
3013 (aset ,stack ,sp ,state)
3014 ;; return new top of stack
3015 ,sp)))
3016
3017 ;; Return the semantic action symbol
3018 actn)))
3019 \f
3020 ;;;; ----------------------------
3021 ;;;; Build parser LALR automaton.
3022 ;;;; ----------------------------
3023
3024 (defun wisent-parser-automaton ()
3025 "Compute and return LALR(1) automaton from GRAMMAR.
3026 GRAMMAR is in internal format. GRAM/ACTS are grammar rules
3027 in internal format. STARTS defines the start symbols."
3028 ;; Check for useless stuff
3029 (wisent-reduce-grammar)
3030
3031 (wisent-set-derives)
3032 (wisent-set-nullable)
3033 ;; convert to nondeterministic finite state machine.
3034 (wisent-generate-states)
3035 ;; make it deterministic.
3036 (wisent-lalr)
3037 ;; Find and record any conflicts: places where one token of
3038 ;; lookahead is not enough to disambiguate the parsing. Also
3039 ;; resolve s/r conflicts based on precedence declarations.
3040 (wisent-resolve-conflicts)
3041 (wisent-print-results)
3042
3043 (vector (wisent-state-actions) ; action table
3044 (wisent-goto-actions) ; goto table
3045 start-table ; start symbols
3046 (aref rcode 0) ; sem. action symbol obarray
3047 )
3048 )
3049 \f
3050 ;;;; -------------------
3051 ;;;; Parse input grammar
3052 ;;;; -------------------
3053
3054 (defconst wisent-reserved-symbols (list wisent-error-term)
3055 "The list of reserved symbols.
3056 Also all symbols starting with a character defined in
3057 `wisent-reserved-capitals' are reserved for internal use.")
3058
3059 (defconst wisent-reserved-capitals '(?\$ ?\@)
3060 "The list of reserved capital letters.
3061 All symbol starting with one of these letters are reserved for
3062 internal use.")
3063
3064 (defconst wisent-starts-nonterm '$STARTS
3065 "Main start symbol.
3066 It gives the rules for start symbols.")
3067
3068 (defvar wisent-single-start-flag nil
3069 "Non-nil means allows only one start symbol like in Bison.
3070 That is don't add extra start rules to the grammar. This is
3071 useful to compare the Wisent's generated automaton with the Bison's
3072 one.")
3073
3074 (defsubst wisent-ISVALID-VAR (x)
3075 "Return non-nil if X is a character or an allowed symbol."
3076 (and x (symbolp x)
3077 (not (memq (aref (symbol-name x) 0) wisent-reserved-capitals))
3078 (not (memq x wisent-reserved-symbols))))
3079
3080 (defsubst wisent-ISVALID-TOKEN (x)
3081 "Return non-nil if X is a character or an allowed symbol."
3082 (or (wisent-char-p x)
3083 (wisent-ISVALID-VAR x)))
3084
3085 (defun wisent-push-token (symbol &optional nocheck)
3086 "Push a new SYMBOL in the list of tokens.
3087 Bypass checking if NOCHECK is non-nil."
3088 ;; Check
3089 (or nocheck (wisent-ISVALID-TOKEN symbol)
3090 (error "Invalid terminal symbol: %S" symbol))
3091 (if (memq symbol token-list)
3092 (message "*** duplicate terminal `%s' ignored" symbol)
3093 ;; Set up properties
3094 (wisent-set-prec symbol nil)
3095 (wisent-set-assoc symbol nil)
3096 (wisent-set-item-number symbol ntokens)
3097 ;; Add
3098 (setq ntokens (1+ ntokens)
3099 token-list (cons symbol token-list))))
3100
3101 (defun wisent-push-var (symbol &optional nocheck)
3102 "Push a new SYMBOL in the list of nonterminals.
3103 Bypass checking if NOCHECK is non-nil."
3104 ;; Check
3105 (unless nocheck
3106 (or (wisent-ISVALID-VAR symbol)
3107 (error "Invalid nonterminal symbol: %S" symbol))
3108 (if (memq symbol var-list)
3109 (error "Nonterminal `%s' already defined" symbol)))
3110 ;; Set up properties
3111 (wisent-set-item-number symbol nvars)
3112 ;; Add
3113 (setq nvars (1+ nvars)
3114 var-list (cons symbol var-list)))
3115
3116 (defun wisent-parse-nonterminals (defs)
3117 "Parse nonterminal definitions in DEFS.
3118 Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with
3119 respectively rule precedence level, semantic action code and
3120 usefulness flag. Return a list of rules of the form (LHS . RHS) where
3121 LHS and RHS are respectively the Left Hand Side and Right Hand Side of
3122 the rule."
3123 (setq rprec nil
3124 rcode nil
3125 nitems 0
3126 nrules 0)
3127 (let (def nonterm rlist rule rules rhs rest item items
3128 rhl plevel semact @n @count iactn)
3129 (setq @count 0)
3130 (while defs
3131 (setq def (car defs)
3132 defs (cdr defs)
3133 nonterm (car def)
3134 rlist (cdr def)
3135 iactn 0)
3136 (or (consp rlist)
3137 (error "Invalid nonterminal definition syntax: %S" def))
3138 (while rlist
3139 (setq rule (car rlist)
3140 rlist (cdr rlist)
3141 items (car rule)
3142 rest (cdr rule)
3143 rhl 0
3144 rhs nil)
3145
3146 ;; Check & count items
3147 (setq nitems (1+ nitems)) ;; LHS item
3148 (while items
3149 (setq item (car items)
3150 items (cdr items)
3151 nitems (1+ nitems)) ;; RHS items
3152 (if (listp item)
3153 ;; Mid-rule action
3154 (progn
3155 (setq @count (1+ @count)
3156 @n (intern (format "@%d" @count)))
3157 (wisent-push-var @n t)
3158 ;; Push a new empty rule with the mid-rule action
3159 (setq semact (vector item rhl (list nonterm iactn))
3160 iactn (1+ iactn)
3161 plevel nil
3162 rcode (cons semact rcode)
3163 rprec (cons plevel rprec)
3164 item @n ;; Replace action by @N nonterminal
3165 rules (cons (list item) rules)
3166 nitems (1+ nitems)
3167 nrules (1+ nrules)))
3168 ;; Check terminal or nonterminal symbol
3169 (cond
3170 ((or (memq item token-list) (memq item var-list)))
3171 ;; Create new literal character token
3172 ((wisent-char-p item) (wisent-push-token item t))
3173 ((error "Symbol `%s' is used, but is not defined as a token and has no rules"
3174 item))))
3175 (setq rhl (1+ rhl)
3176 rhs (cons item rhs)))
3177
3178 ;; Check & collect rule precedence level
3179 (setq plevel (when (vectorp (car rest))
3180 (setq item (car rest)
3181 rest (cdr rest))
3182 (if (and (= (length item) 1)
3183 (memq (aref item 0) token-list)
3184 (wisent-prec (aref item 0)))
3185 (wisent-item-number (aref item 0))
3186 (error "Invalid rule precedence level syntax: %S" item)))
3187 rprec (cons plevel rprec))
3188
3189 ;; Check & collect semantic action body
3190 (setq semact (vector
3191 (if rest
3192 (if (cdr rest)
3193 (error "Invalid semantic action syntax: %S" rest)
3194 (car rest))
3195 ;; Give a default semantic action body: nil
3196 ;; for an empty rule or $1, the value of the
3197 ;; first symbol in the rule, otherwise.
3198 (if (> rhl 0) '$1 '()))
3199 rhl
3200 (list nonterm iactn))
3201 iactn (1+ iactn)
3202 rcode (cons semact rcode))
3203 (setq rules (cons (cons nonterm (nreverse rhs)) rules)
3204 nrules (1+ nrules))))
3205
3206 (setq ruseful (make-vector (1+ nrules) t)
3207 rprec (vconcat (cons nil (nreverse rprec)))
3208 rcode (vconcat (cons nil (nreverse rcode))))
3209 (nreverse rules)
3210 ))
3211
3212 (defun wisent-parse-grammar (grammar &optional start-list)
3213 "Parse GRAMMAR and build a suitable internal representation.
3214 Optional argument START-LIST defines the start symbols.
3215 GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS)
3216
3217 TOKENS is a list of terminal symbols (tokens).
3218
3219 ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3220 describing the associativity of TOKENS. ASSOC-TYPE must be one of the
3221 `default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE
3222 is `default-prec', ASSOC-VALUE must be nil or t (the default).
3223 Otherwise it is a list of tokens which must have been previously
3224 declared in TOKENS.
3225
3226 NONTERMS is the list of non terminal definitions (see function
3227 `wisent-parse-nonterminals')."
3228 (or (and (consp grammar) (> (length grammar) 2))
3229 (error "Bad input grammar"))
3230
3231 (let (i r rhs pre dpre lst start-var assoc rules item
3232 token var def tokens defs ep-token ep-var ep-def)
3233
3234 ;; Built-in tokens
3235 (setq ntokens 0 nvars 0)
3236 (wisent-push-token wisent-eoi-term t)
3237 (wisent-push-token wisent-error-term t)
3238
3239 ;; Check/collect terminals
3240 (setq lst (car grammar))
3241 (while lst
3242 (wisent-push-token (car lst))
3243 (setq lst (cdr lst)))
3244
3245 ;; Check/Set up tokens precedence & associativity
3246 (setq lst (nth 1 grammar)
3247 pre 0
3248 defs nil
3249 dpre nil
3250 default-prec t)
3251 (while lst
3252 (setq def (car lst)
3253 assoc (car def)
3254 tokens (cdr def)
3255 lst (cdr lst))
3256 (if (eq assoc 'default-prec)
3257 (progn
3258 (or (null (cdr tokens))
3259 (memq (car tokens) '(t nil))
3260 (error "Invalid default-prec value: %S" tokens))
3261 (setq default-prec (car tokens))
3262 (if dpre
3263 (message "*** redefining default-prec to %s"
3264 default-prec))
3265 (setq dpre t))
3266 (or (memq assoc '(left right nonassoc))
3267 (error "Invalid associativity syntax: %S" assoc))
3268 (setq pre (1+ pre))
3269 (while tokens
3270 (setq token (car tokens)
3271 tokens (cdr tokens))
3272 (if (memq token defs)
3273 (message "*** redefining precedence of `%s'" token))
3274 (or (memq token token-list)
3275 ;; Define token not previously declared.
3276 (wisent-push-token token))
3277 (setq defs (cons token defs))
3278 ;; Record the precedence and associativity of the terminal.
3279 (wisent-set-prec token pre)
3280 (wisent-set-assoc token assoc))))
3281
3282 ;; Check/Collect nonterminals
3283 (setq lst (nthcdr 2 grammar)
3284 defs nil)
3285 (while lst
3286 (setq def (car lst)
3287 lst (cdr lst))
3288 (or (consp def)
3289 (error "Invalid nonterminal definition: %S" def))
3290 (if (memq (car def) token-list)
3291 (error "Nonterminal `%s' already defined as token" (car def)))
3292 (wisent-push-var (car def))
3293 (setq defs (cons def defs)))
3294 (or defs
3295 (error "No input grammar"))
3296 (setq defs (nreverse defs))
3297
3298 ;; Set up the start symbol.
3299 (setq start-table nil)
3300 (cond
3301
3302 ;; 1. START-LIST is nil, the start symbol is the first
3303 ;; nonterminal defined in the grammar (Bison like).
3304 ((null start-list)
3305 (setq start-var (caar defs)))
3306
3307 ;; 2. START-LIST contains only one element, it is the start
3308 ;; symbol (Bison like).
3309 ((or wisent-single-start-flag (null (cdr start-list)))
3310 (setq start-var (car start-list))
3311 (or (assq start-var defs)
3312 (error "Start symbol `%s' has no rule" start-var)))
3313
3314 ;; 3. START-LIST contains more than one element. All defines
3315 ;; potential start symbols. One of them (the first one by
3316 ;; default) will be given at parse time to be the parser goal.
3317 ;; If `wisent-single-start-flag' is non-nil that feature is
3318 ;; disabled and the first nonterminal in START-LIST defines
3319 ;; the start symbol, like in case 2 above.
3320 ((not wisent-single-start-flag)
3321
3322 ;; START-LIST is a list of nonterminals '(nt0 ... ntN).
3323 ;; Build and push ad hoc start rules in the grammar:
3324
3325 ;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1))
3326 ;; ($nt1 (($$nt1 nt1) $2))
3327 ;; ...
3328 ;; ($ntN (($$ntN ntN) $2))
3329
3330 ;; Where internal symbols $ntI and $$ntI are respectively
3331 ;; nonterminals and terminals.
3332
3333 ;; The internal start symbol $STARTS is used to build the
3334 ;; LALR(1) automaton. The true default start symbol used by the
3335 ;; parser is the first nonterminal in START-LIST (nt0).
3336 (setq start-var wisent-starts-nonterm
3337 lst (nreverse start-list))
3338 (while lst
3339 (setq var (car lst)
3340 lst (cdr lst))
3341 (or (memq var var-list)
3342 (error "Start symbol `%s' has no rule" var))
3343 (unless (assq var start-table) ;; Ignore duplicates
3344 ;; For each nt start symbol
3345 (setq ep-var (intern (format "$%s" var))
3346 ep-token (intern (format "$$%s" var)))
3347 (wisent-push-token ep-token t)
3348 (wisent-push-var ep-var t)
3349 (setq
3350 ;; Add entry (nt . $$nt) to start-table
3351 start-table (cons (cons var ep-token) start-table)
3352 ;; Add rule ($nt (($$nt nt) $2))
3353 defs (cons (list ep-var (list (list ep-token var) '$2)) defs)
3354 ;; Add start rule (($nt) $1)
3355 ep-def (cons (list (list ep-var) '$1) ep-def))
3356 ))
3357 (wisent-push-var start-var t)
3358 (setq defs (cons (cons start-var ep-def) defs))))
3359
3360 ;; Set up rules main data structure & RPREC, RCODE, RUSEFUL
3361 (setq rules (wisent-parse-nonterminals defs))
3362
3363 ;; Set up the terminal & nonterminal lists.
3364 (setq nsyms (+ ntokens nvars)
3365 token-list (nreverse token-list)
3366 lst var-list
3367 var-list nil)
3368 (while lst
3369 (setq var (car lst)
3370 lst (cdr lst)
3371 var-list (cons var var-list))
3372 (wisent-set-item-number ;; adjust nonterminal item number to
3373 var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS
3374
3375 ;; Store special item numbers
3376 (setq error-token-number (wisent-item-number wisent-error-term)
3377 start-symbol (wisent-item-number start-var))
3378
3379 ;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol
3380 ;; associated to item number I.
3381 (setq tags (vconcat token-list var-list))
3382 ;; Set up RLHS RRHS & RITEM data structures from list of rules
3383 ;; (LHS . RHS) received from `wisent-parse-nonterminals'.
3384 (setq rlhs (make-vector (1+ nrules) nil)
3385 rrhs (make-vector (1+ nrules) nil)
3386 ritem (make-vector (1+ nitems) nil)
3387 i 0
3388 r 1)
3389 (while rules
3390 (aset rlhs r (wisent-item-number (caar rules)))
3391 (aset rrhs r i)
3392 (setq rhs (cdar rules)
3393 pre nil)
3394 (while rhs
3395 (setq item (wisent-item-number (car rhs)))
3396 ;; Get default precedence level of rule, that is the
3397 ;; precedence of the last terminal in it.
3398 (and (wisent-ISTOKEN item)
3399 default-prec
3400 (setq pre item))
3401
3402 (aset ritem i item)
3403 (setq i (1+ i)
3404 rhs (cdr rhs)))
3405 ;; Setup the precedence level of the rule, that is the one
3406 ;; specified by %prec or the default one.
3407 (and (not (aref rprec r)) ;; Already set by %prec
3408 pre
3409 (wisent-prec (aref tags pre))
3410 (aset rprec r pre))
3411 (aset ritem i (- r))
3412 (setq i (1+ i)
3413 r (1+ r))
3414 (setq rules (cdr rules)))
3415 ))
3416 \f
3417 ;;;; ---------------------
3418 ;;;; Compile input grammar
3419 ;;;; ---------------------
3420
3421 (defun wisent-compile-grammar (grammar &optional start-list)
3422 "Compile the LALR(1) GRAMMAR.
3423
3424 GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where:
3425
3426 - TOKENS is a list of terminal symbols (tokens).
3427
3428 - ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3429 describing the associativity of TOKENS. ASSOC-TYPE must be one of
3430 the `default-prec' `nonassoc', `left' or `right' symbols. When
3431 ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the
3432 default). Otherwise it is a list of tokens which must have been
3433 previously declared in TOKENS.
3434
3435 - NONTERMS is a list of nonterminal definitions.
3436
3437 Optional argument START-LIST specify the possible grammar start
3438 symbols. This is a list of nonterminals which must have been
3439 previously declared in GRAMMAR's NONTERMS form. By default, the start
3440 symbol is the first nonterminal defined. When START-LIST contains
3441 only one element, it is the start symbol. Otherwise, all elements are
3442 possible start symbols, unless `wisent-single-start-flag' is non-nil.
3443 In that case, the first element is the start symbol, and others are
3444 ignored.
3445
3446 Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS]
3447 where:
3448
3449 - ACTIONS is a state/token matrix telling the parser what to do at
3450 every state based on the current lookahead token. That is shift,
3451 reduce, accept or error.
3452
3453 - GOTOS is a state/nonterminal matrix telling the parser the next
3454 state to go to after reducing with each rule.
3455
3456 - STARTS is an alist which maps the allowed start nonterminal symbols
3457 to tokens that will be first shifted into the parser stack.
3458
3459 - FUNCTIONS is an obarray of semantic action symbols. Each symbol's
3460 function definition is the semantic action lambda expression."
3461 (if (wisent-automaton-p grammar)
3462 grammar ;; Grammar already compiled just return it
3463 (wisent-with-context compile-grammar
3464 (let* ((gc-cons-threshold 1000000))
3465 (garbage-collect)
3466 (setq wisent-new-log-flag t)
3467 ;; Parse input grammar
3468 (wisent-parse-grammar grammar start-list)
3469 ;; Generate the LALR(1) automaton
3470 (wisent-parser-automaton)))))
3471 \f
3472 ;;;; --------------------------
3473 ;;;; Byte compile input grammar
3474 ;;;; --------------------------
3475
3476 (require 'bytecomp)
3477
3478 (defun wisent-byte-compile-grammar (form)
3479 "Byte compile the `wisent-compile-grammar' FORM.
3480 Automatically called by the Emacs Lisp byte compiler as a
3481 `byte-compile' handler."
3482 ;; Eval the `wisent-compile-grammar' form to obtain an LALR
3483 ;; automaton internal data structure. Then, because the internal
3484 ;; data structure contains an obarray, convert it to a lisp form so
3485 ;; it can be byte-compiled.
3486 (byte-compile-form
3487 ;; FIXME: we macroexpand here since `byte-compile-form' expects
3488 ;; macroexpanded code, but that's just a workaround: for lexical-binding
3489 ;; the lisp form should have to pass through closure-conversion and
3490 ;; `wisent-byte-compile-grammar' is called much too late for that.
3491 ;; Why isn't this `wisent-automaton-lisp-form' performed at
3492 ;; macroexpansion time? --Stef
3493 (macroexpand-all
3494 (wisent-automaton-lisp-form (eval form)))))
3495
3496 ;; FIXME: We shouldn't use a `byte-compile' handler. Maybe using a hash-table
3497 ;; instead of an obarray would work around the problem that obarrays
3498 ;; aren't printable. Then (put 'wisent-compile-grammar 'side-effect-free t).
3499 (put 'wisent-compile-grammar 'byte-compile 'wisent-byte-compile-grammar)
3500
3501 (defun wisent-automaton-lisp-form (automaton)
3502 "Return a Lisp form that produces AUTOMATON.
3503 See also `wisent-compile-grammar' for more details on AUTOMATON."
3504 (or (wisent-automaton-p automaton)
3505 (signal 'wrong-type-argument
3506 (list 'wisent-automaton-p automaton)))
3507 (let ((obn (make-symbol "ob")) ; Generated obarray name
3508 (obv (aref automaton 3)) ; Semantic actions obarray
3509 )
3510 `(let ((,obn (make-vector 13 0)))
3511 ;; Generate code to initialize the semantic actions obarray,
3512 ;; in local variable OBN.
3513 ,@(let (obcode)
3514 (mapatoms
3515 #'(lambda (s)
3516 (setq obcode
3517 (cons `(fset (intern ,(symbol-name s) ,obn)
3518 #',(symbol-function s))
3519 obcode)))
3520 obv)
3521 obcode)
3522 ;; Generate code to create the automaton.
3523 (vector
3524 ;; In code generated to initialize the action table, take
3525 ;; care of symbols that are interned in the semantic actions
3526 ;; obarray.
3527 (vector
3528 ,@(mapcar
3529 #'(lambda (state) ;; for each state
3530 `(list
3531 ,@(mapcar
3532 #'(lambda (tr) ;; for each transition
3533 (let ((k (car tr)) ; token
3534 (a (cdr tr))) ; action
3535 (if (and (symbolp a)
3536 (intern-soft (symbol-name a) obv))
3537 `(cons ,(if (symbolp k) `(quote ,k) k)
3538 (intern-soft ,(symbol-name a) ,obn))
3539 `(quote ,tr))))
3540 state)))
3541 (aref automaton 0)))
3542 ;; The code of the goto table is unchanged.
3543 ,(aref automaton 1)
3544 ;; The code of the alist of start symbols is unchanged.
3545 ',(aref automaton 2)
3546 ;; The semantic actions obarray is in the local variable OBN.
3547 ,obn))))
3548
3549 (provide 'semantic/wisent/comp)
3550
3551 ;; Disable messages with regards to lexical scoping, since this will
3552 ;; produce a bunch of 'lacks a prefix' warnings with the
3553 ;; `wisent-defcontext' trickery above.
3554
3555 ;; Local variables:
3556 ;; byte-compile-warnings: (not lexical)
3557 ;; generated-autoload-load-name: "semantic/wisent/comp"
3558 ;; End:
3559
3560 ;;; semantic/wisent/comp.el ends here