]> code.delx.au - gnu-emacs-elpa/blob - packages/trie/trie.el
509887c9b70f75e0e0b9d11e749396af54dc5f39
[gnu-emacs-elpa] / packages / trie / trie.el
1 ;;; trie.el --- Trie data structure
2
3 ;; Copyright (C) 2008-2010, 2012 Free Software Foundation, Inc
4
5 ;; Author: Toby Cubitt <toby-predictive@dr-qubit.org>
6 ;; Version: 0.2.5
7 ;; Keywords: extensions, matching, data structures
8 ;; trie, ternary search tree, tree, completion, regexp
9 ;; Package-Requires: ((tNFA "0.1.1") (heap "0.3"))
10 ;; URL: http://www.dr-qubit.org/emacs.php
11 ;; Repository: http://www.dr-qubit.org/git/predictive.git
12
13 ;; This file is part of Emacs.
14 ;;
15 ;; GNU Emacs is free software: you can redistribute it and/or modify it under
16 ;; the terms of the GNU General Public License as published by the Free
17 ;; Software Foundation, either version 3 of the License, or (at your option)
18 ;; any later version.
19 ;;
20 ;; GNU Emacs is distributed in the hope that it will be useful, but WITHOUT
21 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
23 ;; more details.
24 ;;
25 ;; You should have received a copy of the GNU General Public License along
26 ;; with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27
28
29 ;;; Commentary:
30 ;;
31 ;; Quick Overview
32 ;; --------------
33 ;; A trie is a data structure used to store keys that are ordered sequences of
34 ;; elements (vectors, lists or strings in Elisp; strings are by far the most
35 ;; common), in such a way that both storage and retrieval are space- and
36 ;; time-efficient. But, more importantly, a variety of more advanced queries
37 ;; can also be performed efficiently: for example, returning all strings with
38 ;; a given prefix, searching for keys matching a given wildcard pattern or
39 ;; regular expression, or searching for all keys that match any of the above
40 ;; to within a given Lewenstein distance (though this last is not yet
41 ;; implemented in this package - code contributions welcome!).
42 ;;
43 ;; You create a trie using `make-trie', create an association using
44 ;; `trie-insert', retrieve an association using `trie-lookup', and map over a
45 ;; trie using `trie-map', `trie-mapc', `trie-mapcar', or `trie-mapf'. You can
46 ;; find completions of a prefix sequence using `trie-complete', or search for
47 ;; keys matching a regular expression using `trie-regexp-search'. Using
48 ;; `trie-stack', you can create an object that allows the contents of the trie
49 ;; to be used like a stack, useful for building other algorithms on top of
50 ;; tries; `trie-stack-pop' pops elements off the stack one-by-one, in
51 ;; "lexical" order, whilst `trie-stack-push' pushes things onto the
52 ;; stack. Similarly, `trie-complete-stack', and `trie-regexp-stack' create
53 ;; "lexically-ordered" stacks of query results.
54 ;;
55 ;; Note that there are two uses for a trie: as a lookup table, in which case
56 ;; only the presence or absence of a key in the trie is significant, or as an
57 ;; associative array, in which case each key carries some associated
58 ;; data. Libraries for other data structure often only implement lookup
59 ;; tables, leaving it up to you to implement an associative array on top of
60 ;; this (by storing key+data pairs in the data structure's keys, then defining
61 ;; a comparison function that only compares the key part). For a trie,
62 ;; however, the underlying data structures naturally support associative
63 ;; arrays at no extra cost, so this package does the opposite: it implements
64 ;; associative arrays, and leaves it up to you to use them as lookup tables if
65 ;; you so desire.
66 ;;
67 ;;
68 ;; Different Types of Trie
69 ;; -----------------------
70 ;; There are numerous ways to implement trie data structures internally, each
71 ;; with its own time- and space-efficiency trade-offs. By viewing a trie as a
72 ;; tree whose nodes are themselves lookup tables for key elements, this
73 ;; package is able to support all types of trie in a uniform manner. This
74 ;; relies on there existing (or you writing!) an Elisp implementation of the
75 ;; corresponding type of lookup table. The best type of trie to use will
76 ;; depend on what trade-offs are appropriate for your particular
77 ;; application. The following gives an overview of the advantages and
78 ;; disadvantages of various types of trie. (Not all of the underlying lookup
79 ;; tables have been implemented in Elisp yet, so using some of the trie types
80 ;; described below would require writing the missing Elisp package!)
81 ;;
82 ;;
83 ;; One of the most effective all-round implementations of a trie is a ternary
84 ;; search tree, which can be viewed as a tree of binary trees. If basic binary
85 ;; search trees are used for the nodes of the trie, we get a standard ternary
86 ;; search tree. If self-balancing binary trees are used (e.g. AVL or red-black
87 ;; trees), we get a self-balancing ternary search tree. If splay trees are
88 ;; used, we get yet another self-organising variant of a ternary search
89 ;; tree. All ternary search trees have, in common, good space-efficiency. The
90 ;; time-efficiency of the various trie operations is also good, assuming the
91 ;; underlying binary trees are balanced. Under that assumption, all variants
92 ;; of ternary search trees described below have the same asymptotic
93 ;; time-complexity for all trie operations.
94 ;;
95 ;; Self-balancing trees ensure the underlying binary trees are always close to
96 ;; perfectly balanced, with the usual trade-offs between the different the
97 ;; types of self-balancing binary tree: AVL trees are slightly more efficient
98 ;; for lookup operations than red-black trees, at a cost of slightly less
99 ;; efficienct insertion operations, and less efficient deletion
100 ;; operations. Splay trees give good average-case complexity and are simpler
101 ;; to implement than AVL or red-black trees (which can mean they're faster in
102 ;; practice!), at the expense of poor worst-case complexity.
103 ;;
104 ;; If your tries are going to be static (i.e. created once and rarely
105 ;; modified), then using perfectly balanced binary search trees might be
106 ;; appropriate. Perfectly balancing the binary trees is very inefficient, but
107 ;; it only has to be when the trie is first created or modified. Lookup
108 ;; operations will then be as efficient as possible for ternary search trees,
109 ;; and the implementation will also be simpler (so probably faster) than a
110 ;; self-balancing tree, without the space and time overhead required to keep
111 ;; track of rebalancing.
112 ;;
113 ;; On the other hand, adding data to a binary search tree in a random order
114 ;; usually results in a reasonably balanced tree. If this is the likely
115 ;; scenario, using a basic binary tree without bothering to balance it at all
116 ;; might be quite efficient, and, being even simpler to implement, could be
117 ;; quite fast overall.
118 ;;
119 ;;
120 ;; A digital trie is a different implementation of a trie, which can be viewed
121 ;; as a tree of arrays, and has different space- and time-complexities than a
122 ;; ternary search tree. Roughly speaking, a digital trie has worse
123 ;; space-complexity, but better time-complexity. Using hash tables instead of
124 ;; arrays for the nodes gives something similar to a digital trie, potentially
125 ;; with better space-complexity and the same amortised time-complexity, but at
126 ;; the expense of occasional significant inefficiency when inserting and
127 ;; deleting (whenever a hash table has to be resized). Indeed, an array can be
128 ;; viewed as a perfect hash table, but as such it requires the number of
129 ;; possible values to be known in advance.
130 ;;
131 ;; Finally, if you really need optimal efficiency from your trie, you could
132 ;; even write a custom type of underlying lookup table, optimised for your
133 ;; specific needs.
134 ;;
135 ;; This package uses the AVL tree package avl-tree.el, the tagged NFA package
136 ;; tNFA.el, and the heap package heap.el.
137
138
139 ;;; Change Log:
140 ;;
141 ;; Version 0.2.5
142 ;; * removed `trie--avl-transform-for-print' and
143 ;; `trie--avl-transform-from-read', since Emacs has supported printing and
144 ;; reading circular data structures for a long time now, rendering these
145 ;; transormers obsolete (note that `print-circle' *must* be enabled now when
146 ;; printing an avl trie)
147 ;;
148 ;; Version 0.2.4
149 ;; * minor bug-fix to `trie--edebug-pretty-print' to print "nil" instead
150 ;; of "()"
151 ;;
152 ;; Version 0.2.3
153 ;; * bug-fix in `trie--edebug-pretty-print'
154 ;;
155 ;; Version 0.2.2
156 ;; * added `edebug-prin1' and `edebug-prin1-to-string' advice to prevent
157 ;; edebug hanging whilst printing large tries
158 ;;
159 ;; Version 0.2.1
160 ;; * bug-fix to result accumulation in `trie--do-regexp-search'
161 ;;
162 ;; Version 0.2
163 ;; * Replaced wildcard searches with regexp searches, using the tNFA.el tagged
164 ;; non-deterministic finite state automata library. This is both more
165 ;; general *and* more efficient.
166 ;; * bug fix in `trie--do-regexp-search'
167 ;;
168 ;; Version 0.1
169 ;; * Initial release (complete rewrite from scratch of tstree.el!)
170 ;; * Ternary search trees are now implemented as a tree of avl trees, which
171 ;; has numerous advantages: self-balancing trees guarantee O(log n)
172 ;; complexity regardless of how the tree is built; deletion is now done
173 ;; properly.
174 ;; * Unlike tstree.el, trie.el is general enough to implement all sorts of
175 ;; tries, not just ternary search trees (though these remain the default).
176 ;; * Up to "tstree"->"trie" renaming, many functions are drop-in replacements
177 ;; for tstree.el functions. However, insertion and rank functions are no
178 ;; longer stored in the data structure, so corresponidng arguments are no
179 ;; longer optional. A single `trie-complete' function now deals with sorting
180 ;; completions in both lexical or arbitrary order, the ranking function
181 ;; being passed as an optional argument in the latter case. And functions
182 ;; can no longer operate over multiple data structures at once; i.e. they no
183 ;; longer accept lists of trees as arguments. (These features belong in
184 ;; higher level libraries, and the efficiency loss is negligible.)
185 ;; * `trie-wildcard-search' implements efficient shell-glob-like wildcard
186 ;; searches of tries!
187
188
189
190 ;;; Code:
191
192 (eval-when-compile (require 'cl))
193 (require 'avl-tree)
194 (require 'heap)
195 (require 'tNFA)
196
197
198
199 ;;; ================================================================
200 ;;; Pre-defined trie types
201
202 (defconst trie--types '(avl))
203
204
205 ;; --- avl-tree ---
206 (put 'avl :trie-createfun
207 (lambda (cmpfun seq) (avl-tree-create cmpfun)))
208 (put 'avl :trie-insertfun 'avl-tree-enter)
209 (put 'avl :trie-deletefun 'avl-tree-delete)
210 (put 'avl :trie-lookupfun 'avl-tree-member)
211 (put 'avl :trie-mapfun 'avl-tree-mapc)
212 (put 'avl :trie-emptyfun 'avl-tree-empty)
213 (put 'avl :trie-stack-createfun 'avl-tree-stack)
214 (put 'avl :trie-stack-popfun 'avl-tree-stack-pop)
215 (put 'avl :trie-stack-emptyfun 'avl-tree-stack-empty-p)
216
217
218
219 ;;; ================================================================
220 ;;; Internal utility functions and macros
221
222 ;;; ----------------------------------------------------------------
223 ;;; Functions and macros for handling a trie.
224
225 ;; symbol used to denote a trie leaf node
226 (defconst trie--terminator '--trie--terminator)
227
228 (defstruct
229 (trie-
230 :named
231 (:constructor nil)
232 (:constructor trie--create
233 (comparison-function &optional (type 'avl)
234 &aux
235 (dummy
236 (or (memq type trie--types)
237 (error "trie--create: unknown trie TYPE, %s" type)))
238 (createfun (get type :trie-createfun))
239 (insertfun (get type :trie-insertfun))
240 (deletefun (get type :trie-deletefun))
241 (lookupfun (get type :trie-lookupfun))
242 (mapfun (get type :trie-mapfun))
243 (emptyfun (get type :trie-emptyfun))
244 (stack-createfun (get type :trie-stack-createfun))
245 (stack-popfun (get type :trie-stack-popfun))
246 (stack-emptyfun (get type :trie-stack-emptyfun))
247 (transform-for-print (get type :trie-transform-for-print))
248 (transform-from-read (get type :trie-transform-from-read))
249 (cmpfun (trie--wrap-cmpfun comparison-function))
250 (root (trie--node-create-root createfun cmpfun))
251 ))
252 (:constructor trie--create-custom
253 (comparison-function
254 &key
255 (createfun 'avl-tree-create-bare)
256 (insertfun 'avl-tree-enter)
257 (deletefun 'avl-tree-delete)
258 (lookupfun 'avl-tree-member)
259 (mapfun 'avl-tree-mapc)
260 (emptyfun 'avl-tree-empty)
261 (stack-createfun 'avl-tree-stack)
262 (stack-popfun 'avl-tree-stack-pop)
263 (stack-emptyfun 'avl-tree-stack-empty-p)
264 (transform-for-print nil)
265 (transform-from-read nil)
266 &aux
267 (cmpfun (trie--wrap-cmpfun comparison-function))
268 (root (trie--node-create-root createfun cmpfun))
269 ))
270 (:copier nil))
271 root comparison-function cmpfun
272 createfun insertfun deletefun lookupfun mapfun emptyfun
273 stack-createfun stack-popfun stack-emptyfun
274 transform-for-print transform-from-read print-form)
275
276
277 (defun trie--wrap-cmpfun (cmpfun)
278 ;; wrap CMPFUN for use in a subtree
279 `(lambda (a b)
280 (setq a (trie--node-split a)
281 b (trie--node-split b))
282 (cond ((eq a trie--terminator)
283 (if (eq b trie--terminator) nil t))
284 ((eq b trie--terminator) nil)
285 (t (,cmpfun a b)))))
286
287
288 (defun trie--construct-equality-function (comparison-function)
289 ;; create equality function from trie comparison function
290 `(lambda (a b)
291 (and (not (,comparison-function a b))
292 (not (,comparison-function b a)))))
293
294
295
296 ;;; ----------------------------------------------------------------
297 ;;; Functions and macros for handling a trie node.
298
299 (defstruct
300 (trie--node
301 (:type vector)
302 (:constructor nil)
303 (:constructor trie--node-create
304 (split seq trie
305 &aux (subtree (funcall (trie--createfun trie)
306 (trie--cmpfun trie) seq))))
307 (:constructor trie--node-create-data
308 (data &aux (split trie--terminator) (subtree data)))
309 (:constructor trie--node-create-dummy
310 (split &aux (subtree nil)))
311 (:constructor trie--node-create-root
312 (createfun cmpfun
313 &aux
314 (split nil)
315 (subtree (funcall createfun cmpfun []))))
316 (:copier nil))
317 split subtree)
318
319 ;; data is stored in the subtree cell of a terminal node
320 (defalias 'trie--node-data 'trie--node-subtree)
321
322 (defsetf trie--node-data (node) `(setf (trie--node-subtree ,node)))
323
324 (defmacro trie--node-data-p (node)
325 ;; Return t if NODE is a data node, nil otherwise.
326 `(eq (trie--node-split ,node) trie--terminator))
327
328 (defmacro trie--node-p (node)
329 ;; Return t if NODE is a TRIE trie--node, nil otherwise. Have to
330 ;; define this ourselves, because we created a defstruct without any
331 ;; identifying tags (i.e. (:type vector)) for efficiency, but this
332 ;; means we can only perform a rudimentary and very unreliable test.
333 `(and (vectorp ,node) (= (length ,node) 2)))
334
335
336 (defun trie--node-find (node seq lookupfun)
337 ;; Returns the node below NODE corresponding to SEQ, or nil if none
338 ;; found.
339 (let ((len (length seq))
340 (i -1))
341 ;; descend trie until we find SEQ or run out of trie
342 (while (and node (< (incf i) len))
343 (setq node
344 (funcall lookupfun
345 (trie--node-subtree node)
346 (trie--node-create-dummy (elt seq i))
347 nil)))
348 node))
349
350
351 (defmacro trie--find-data-node (node lookupfun)
352 ;; Return data node from NODE's subtree, or nil if NODE has no data
353 ;; node in its subtree.
354 `(funcall ,lookupfun
355 (trie--node-subtree ,node)
356 (trie--node-create-dummy trie--terminator)
357 nil))
358
359
360 (defmacro trie--find-data (node lookupfun)
361 ;; Return data associated with sequence corresponding to NODE, or nil
362 ;; if sequence has no associated data.
363 `(let ((node (trie--find-data-node ,node ,lookupfun)))
364 (when node (trie--node-data node))))
365
366
367
368 ;;; ----------------------------------------------------------------
369 ;;; print/read transformation functions
370
371 (defun trie-transform-for-print (trie)
372 "Transform TRIE to print form."
373 (when (trie--transform-for-print trie)
374 (if (trie--print-form trie)
375 (warn "Trie has already been transformed to print-form")
376 (funcall (trie--transform-for-print trie) trie)
377 (setf (trie--print-form trie) t))))
378
379
380 (defun trie-transform-from-read (trie)
381 "Transform TRIE from print form."
382 (when (trie--transform-from-read trie)
383 (if (not (trie--print-form trie))
384 (warn "Trie is not in print-form")
385 (funcall (trie--transform-from-read trie) trie)
386 (setf (trie--print-form trie) nil))))
387
388
389 (defmacro trie-transform-from-read-warn (trie)
390 "Transform TRIE from print form, with warning."
391 `(when (trie--print-form ,trie)
392 (warn (concat "Attempt to operate on trie in print-form;\
393 converting to normal form"))
394 (trie-transform-from-read ,trie)))
395
396
397 (defun trie--avl-transform-for-print (trie)
398 ;; transform avl-tree based TRIE to print form.
399 (trie-mapc-internal
400 (lambda (avl seq) (setf (avl-tree--cmpfun avl) nil))
401 trie))
402
403
404 (defun trie--avl-transform-from-read (trie)
405 ;; transform avl-tree based TRIE from print form."
406 (let ((--trie-avl-transform--cmpfun (trie--cmpfun trie)))
407 (trie-mapc-internal
408 (lambda (avl seq)
409 (setf (avl-tree--cmpfun avl) --trie-avl-transform--cmpfun))
410 trie)))
411
412
413
414 ;;; ----------------------------------------------------------------
415 ;;; Replacements for CL functions
416
417 ;; copied from cl-extra.el
418 (defun trie--subseq (seq start &optional end)
419 "Return the subsequence of SEQ from START to END.
420 If END is omitted, it defaults to the length of the sequence.
421 If START or END is negative, it counts from the end."
422 (if (stringp seq) (substring seq start end)
423 (let (len)
424 (and end (< end 0) (setq end (+ end (setq len (length seq)))))
425 (when (< start 0)
426 (setq start (+ start (or len (setq len (length seq))))))
427 (cond ((listp seq)
428 (if (> start 0) (setq seq (nthcdr start seq)))
429 (if end
430 (let ((res nil))
431 (while (>= (setq end (1- end)) start)
432 (push (pop seq) res))
433 (nreverse res))
434 (copy-sequence seq)))
435 (t
436 (or end (setq end (or len (length seq))))
437 (let ((res (make-vector (max (- end start) 0) nil))
438 (i 0))
439 (while (< start end)
440 (aset res i (aref seq start))
441 (setq i (1+ i) start (1+ start)))
442 res))))))
443
444
445 (defun trie--position (item list)
446 "Find the first occurrence of ITEM in LIST.
447 Return the index of the matching item, or nil of not found.
448 Comparison is done with 'equal."
449 (let ((i 0))
450 (catch 'found
451 (while (progn
452 (when (equal item (car list)) (throw 'found i))
453 (setq i (1+ i))
454 (setq list (cdr list))))
455 nil)))
456
457
458 (defsubst trie--seq-append (seq el)
459 "Append EL to the end of sequence SEQ."
460 (cond
461 ((stringp seq) (concat seq (string el)))
462 ((vectorp seq) (vconcat seq (vector el)))
463 ((listp seq) (append seq (list el)))))
464
465
466 (defsubst trie--seq-concat (seq &rest sequences)
467 "Concatenate SEQ and SEQUENCES, and make the result the same
468 type of sequence as SEQ."
469 (cond
470 ((stringp seq) (apply 'concat seq sequences))
471 ((vectorp seq) (apply 'vconcat seq sequences))
472 ((listp seq) (apply 'append seq sequences))))
473
474
475
476
477 ;;; ================================================================
478 ;;; Basic trie operations
479
480 ;;;###autoload
481 (defalias 'make-trie 'trie--create
482 "Return a new trie that uses comparison function COMPARISON-FUNCTION.
483
484 A trie stores sequences (strings, vectors or lists) along with
485 associated data. COMPARISON-FUNCTEION should accept two
486 arguments, each being an element of such a sequence, and return t
487 if the first is strictly smaller than the second.
488
489 The optional argument TYPE specifies the type of trie to
490 create. However, the only one that is currently implemented is
491 the default, so this argument is useless for now.
492
493 (See also `make-trie-custom'.)")
494
495
496 ;;;###autoload
497 (defalias 'trie-create 'make-trie)
498
499
500 ;;;###autoload
501 (defalias 'make-trie-custom 'trie--create-custom
502 "Return a new trie that uses comparison function COMPARISON-FUNCTION.
503
504 A trie stores sequences (strings, vectors or lists) along with
505 associated data. COMPARISON-FUNCTION should accept two arguments,
506 each being an element of such a sequence, and return t if the
507 first is strictly smaller than the second.
508
509 The remaining keyword arguments: :CREATEFUN, :INSERTFUN, :DELETEFUN,
510 :LOOKUPFUN, :MAPFUN, :EMPTYFUN, :STACK-CREATEFUN, :STACK-POPFUN,
511 :STACK-EMPTYFUN, :TRANSFORM-FOR-PRINT and :TRANSFORM-FROM-READ
512 determine the type of trie that is created.
513
514 CREATEFUN is called as follows:
515
516 (CREATEFUN COMPARISON-FUNCTION SEQ)
517
518 and should return a data structure (\"ARRAY\") that can be used
519 as an associative array, where two elements A and B are equal if
520 the following is non-nil:
521
522 (and (COMPARISON-FUNCTION b a)
523 (COMPARISON-FUNCTION b a))
524
525 The SEQ argument is a vector containing the sequence that will
526 correspond to the newly created array in the trie. For most types
527 of trie, this value is ignored. It is passed to CREATEFUN only in
528 order to allow the creation of \"hybrid\" trie structures, in
529 which different types of associative array are used in different
530 parts of the trie. For example, the type of associative array
531 could be chosen based on the depth in the trie, given by \(length
532 SEQ\). (Note that all the other functions described below must be
533 able to correctly handle *any* of the types of associate array
534 that might be created by CREATEFUN.)
535
536 INSERTFUN, DELETEFUN, LOOKUPFUN, MAPFUN and EMPTYFUN should
537 insert, delete, lookup, map over, and check-if-there-exist-any
538 elements in an associative array. They are called as follows:
539
540 (INSERTFUN array element &optional updatefun)
541 (DELETEFUN array element &optional predicate nilflag)
542 (LOOKUPFUN array element &optional nilflag)
543 (MAPFUN function array &optional reverse)
544 (EMPTYFUN array)
545
546 INSERTFUN should insert ELEMENT into ARRAY and return the new
547 element, which will be ELEMENT itself unless UPDATEFUN is
548 specified. In that case, if and only if an element matching
549 ELEMENT already exists in the associative array, INSERTFUN should
550 instead pass ELEMENT and the matching element as arguments to
551 UPDATEFUN, replace the matching element with the return value,
552 and return that return value.
553
554 DELETEFUN should delete the element in the associative array that
555 matches ELEMENT, and return the deleted element. However, if
556 PREDICATE is specified and a matching element exists in ARRAY,
557 DELETEFUN should first pass the matching element as an argument
558 to PREDICATE before deleting, and should only delete the element
559 if PREDICATE returns non-nil. DELETEFUN should return NILFLAG if
560 no element was deleted (either becuase no matching element was
561 found, or because TESTFUN returned nil).
562
563 LOOKUPFUN should return the element from the associative array
564 that matches ELEMENT, or NILFLAG if no matching element exists.
565
566 MAPFUN should map FUNCTION over all elements in the order defined by
567 COMPARISON-FUNCTION, or in reverse order if REVERSE is non-nil.
568
569
570 STACK-CREATEFUN, STACK-POPFUN and STACK-EMPTYFUN should allow the
571 associative array to be used as a stack. STACK-CREATEFUN is
572 called as follows:
573
574 (STACK-CREATEFUN array)
575
576 and should return a data structure (\"STACK\") that behaves like
577 a sorted stack of all elements in the associative array. I.e.
578 successive calls to
579
580 (STACK-POPFUN stack)
581
582 should return elements from the associative array in the order
583 defined by COMPARISON-FUNCTION, and
584
585 (STACK-EMPTYFUN stack)
586
587 should return non-nil if the stack is empty, nil otherwise.
588
589 The stack functions are optional, in that all trie operations
590 other than the stack-related ones will work correctly. However,
591 any code that makes use of trie-stacks will complain if supplied
592 with this type of trie.
593
594
595 The :TRANSFORM-FOR-PRINT and :TRANSFORM-FROM-READ arguments are
596 optional. If supplied, they can be used to transform the trie
597 into a format suitable for passing to Elisp's `print'
598 functions (typically used to persistently store the trie by
599 writing it to file), and transform from that format back to the
600 original usable form.
601
602
603 Warning: to avoid nasty dynamic scoping bugs, the supplied
604 functions must *never* bind any variables with names commencing
605 \"--\".")
606
607
608 ;;;###autoload
609 (defalias 'trie-create-custom 'make-trie-custom)
610
611
612
613 (defalias 'trie-comparison-function 'trie--comparison-function
614 "Return the comparison function for TRIE.")
615
616
617 (defalias 'trie-p 'trie--p
618 "Return t if argument is a trie, nil otherwise.")
619
620
621 (defun trie-empty (trie)
622 "Return t if the TRIE is empty, nil otherwise."
623 (trie-transform-from-read-warn trie)
624 (funcall (trie--emptyfun trie)
625 (trie--node-subtree (trie--root trie))))
626
627
628 (defun trie-construct-sortfun (cmpfun &optional reverse)
629 "Construct function to compare key sequences, based on a CMPFUN
630 that compares individual elements of the sequence. Order is
631 reversed if REVERSE is non-nil."
632 (if reverse
633 `(lambda (a b)
634 (let (cmp)
635 (catch 'compared
636 (dotimes (i (min (length a) (length b)))
637 (cond ((,cmpfun (elt b i) (elt a i))
638 (throw 'compared t))
639 ((,cmpfun (elt a i) (elt b i))
640 (throw 'compared nil))))
641 (< (length a) (length b)))))
642 `(lambda (a b)
643 (let (cmp)
644 (catch 'compared
645 (dotimes (i (min (length a) (length b)))
646 (cond ((,cmpfun (elt a i) (elt b i))
647 (throw 'compared t))
648 ((,cmpfun (elt b i) (elt a i))
649 (throw 'compared nil))))
650 (< (length a) (length b)))))))
651
652
653
654 ;; ----------------------------------------------------------------
655 ;; Inserting data
656
657 (defun trie-insert (trie key &optional data updatefun)
658 "Associate DATA with KEY in TRIE.
659
660 If KEY already exists in TRIE, then DATA replaces the existing
661 association, unless UPDATEFUN is supplied. Note that if DATA is
662 *not* supplied, this means that the existing association of KEY
663 will be replaced by nil.
664
665 If UPDATEFUN is supplied and KEY already exists in TRIE,
666 UPDATEFUN is called with two arguments: DATA and the existing
667 association of KEY. Its return value becomes the new association
668 for KEY.
669
670 Returns the new association of KEY.
671
672 Note: to avoid nasty dynamic scoping bugs, UPDATEFUN must *not*
673 bind any variables with names commencing \"--\"."
674
675 ;; convert trie from print-form if necessary
676 (trie-transform-from-read-warn trie)
677
678 ;; absurd variable names are an attempt to avoid dynamic scoping bugs
679 (let ((--trie-insert--updatefun updatefun)
680 --trie-insert--old-node-flag
681 (node (trie--root trie))
682 (len (length key))
683 (i -1))
684 ;; Descend trie, adding nodes for non-existent elements of KEY. The
685 ;; update function passed to `trie--insertfun' ensures that existing
686 ;; nodes are left intact.
687 (while (< (incf i) len)
688 (setq --trie-insert--old-node-flag nil)
689 (setq node (funcall (trie--insertfun trie)
690 (trie--node-subtree node)
691 (trie--node-create (elt key i) key trie)
692 (lambda (a b)
693 (setq --trie-insert--old-node-flag t) b))))
694 ;; Create or update data node.
695 (setq node (funcall (trie--insertfun trie)
696 (trie--node-subtree node)
697 (trie--node-create-data data)
698 ;; if using existing data node, wrap UPDATEFUN
699 ;; if any was supplied
700 (when (and --trie-insert--old-node-flag
701 --trie-insert--updatefun)
702 (lambda (new old)
703 (setf (trie--node-data old)
704 (funcall --trie-insert--updatefun
705 (trie--node-data new)
706 (trie--node-data old)))
707 old))))
708 (trie--node-data node))) ; return new data
709
710
711
712 ;; ----------------------------------------------------------------
713 ;; Deleting data
714
715 (defun trie-delete (trie key &optional test)
716 "Delete KEY and its associated data from TRIE.
717
718 If KEY was deleted, a cons cell containing KEY and its
719 association is returned. Returns nil if KEY does not exist in
720 TRIE.
721
722 If TEST is supplied, it should be a function that accepts two
723 arguments: the key being deleted, and its associated data. The
724 key will then only be deleted if TEST returns non-nil.
725
726 Note: to avoid nasty dynamic scoping bugs, TEST must *not* bind
727 any variables with names commencing \"--\"."
728 ;; convert trie from print-form if necessary
729 (trie-transform-from-read-warn trie)
730 ;; set up deletion (real work is done by `trie--do-delete'
731 (let (--trie-deleted--node
732 (--trie-delete--key key))
733 (declare (special --trie-deleted--node)
734 (special --trie-delete--key))
735 (trie--do-delete (trie--root trie) key test
736 (trie--deletefun trie)
737 (trie--emptyfun trie)
738 (trie--cmpfun trie))
739 (when --trie-deleted--node
740 (cons key (trie--node-data --trie-deleted--node)))))
741
742
743 (defun trie--do-delete (node --trie--do-delete--seq
744 --trie--do-delete--test
745 --trie--do-delete--deletefun
746 --trie--do-delete--emptyfun
747 --trie--do-delete--cmpfun)
748 ;; Delete --TRIE--DO-DELETE--SEQ starting from trie node NODE, and
749 ;; return non-nil if we deleted a node. If --TRIE--DO-DELETE--TEST is
750 ;; supplied, it is called with two arguments, the key being deleted
751 ;; and the associated data, and the deletion is only carried out if it
752 ;; returns non-nil.
753
754 ;; The absurd argument names are to lessen the likelihood of dynamical
755 ;; scoping bugs caused by a supplied function binding a variable with
756 ;; the same name as one of the arguments, which would cause a nasty
757 ;; bug when the lambda's (below) are called.
758 (declare (special --trie-deleted--node)
759 (special --trie-delete--key))
760 ;; if --TRIE--DO-DELETE--SEQ is empty, try to delete data node and
761 ;; return non-nil if we did (return value of
762 ;; --TRIE--DO-DELETE--DELETEFUN is the deleted data, which is always
763 ;; non-nil for a trie)
764 (if (= (length --trie--do-delete--seq) 0)
765 (setq --trie-deleted--node
766 (funcall --trie--do-delete--deletefun
767 (trie--node-subtree node)
768 (trie--node-create-dummy trie--terminator)
769 (when --trie--do-delete--test
770 (lambda (n)
771 (funcall --trie--do-delete--test
772 --trie-delete--key (trie--node-data n))))
773 nil))
774 ;; otherwise, delete on down (return value of
775 ;; --TRIE--DO-DELETE--DELETEFUN is the deleted data, which is always
776 ;; non-nil for a trie)
777 (funcall --trie--do-delete--deletefun
778 (trie--node-subtree node)
779 (trie--node-create-dummy (elt --trie--do-delete--seq 0))
780 (lambda (n)
781 (and (trie--do-delete
782 n (trie--subseq --trie--do-delete--seq 1)
783 --trie--do-delete--test
784 --trie--do-delete--deletefun
785 --trie--do-delete--emptyfun
786 --trie--do-delete--cmpfun)
787 (funcall --trie--do-delete--emptyfun
788 (trie--node-subtree n))))
789 nil)))
790
791
792
793 ;; ----------------------------------------------------------------
794 ;; Retrieving data
795
796 (defun trie-lookup (trie key &optional nilflag)
797 "Return the data associated with KEY in the TRIE,
798 or nil if KEY does not exist in TRIE.
799
800 Optional argument NILFLAG specifies a value to return instead of
801 nil if KEY does not exist in TRIE. This allows a non-existent KEY
802 to be distinguished from an element with a null association. (See
803 also `trie-member-p', which does this for you.)"
804 ;; convert trie from print-form if necessary
805 (trie-transform-from-read-warn trie)
806 ;; find node corresponding to key, then find data node, then return
807 ;; data
808 (let (node)
809 (or (and (setq node (trie--node-find (trie--root trie) key
810 (trie--lookupfun trie)))
811 (trie--find-data node (trie--lookupfun trie)))
812 nilflag)))
813
814 (defalias 'trie-member 'trie-lookup)
815
816
817 (defun trie-member-p (trie key)
818 "Return t if KEY exists in TRIE, nil otherwise."
819 ;; convert trie from print-form if necessary
820 (trie-transform-from-read-warn trie)
821 (let ((flag '(nil)))
822 (not (eq flag (trie-member trie key flag)))))
823
824
825
826
827 ;;; ================================================================
828 ;;; Mapping over tries
829
830 (defun trie--mapc (--trie--mapc--function --trie--mapc--mapfun
831 --trie--mapc--root --trie--mapc--seq
832 &optional --trie--mapc--reverse)
833 ;; Apply TRIE--MAPC--FUNCTION to all elements in a trie beneath
834 ;; TRIE--MAPC--ROOT, which should correspond to the sequence
835 ;; TRIE--MAPC--SEQ. TRIE--MAPC--FUNCTION is passed two arguments: the
836 ;; trie node itself and the sequence it corresponds to. It is applied
837 ;; in ascending order, or descending order if TRIE--MAPC--REVERSE is
838 ;; non-nil.
839
840 ;; The absurd argument names are to lessen the likelihood of dynamical
841 ;; scoping bugs caused by a supplied function binding a variable with
842 ;; the same name as one of the arguments.
843 (funcall
844 --trie--mapc--mapfun
845 (lambda (--trie--mapc--node)
846 ;; data node: apply function
847 (if (trie--node-data-p --trie--mapc--node)
848 (funcall --trie--mapc--function
849 --trie--mapc--node
850 --trie--mapc--seq)
851 ;; internal node: append split value to seq and keep descending
852 (trie--mapc --trie--mapc--function
853 --trie--mapc--mapfun
854 --trie--mapc--node
855 (trie--seq-append
856 (copy-sequence --trie--mapc--seq)
857 (trie--node-split --trie--mapc--node))
858 --trie--mapc--reverse)))
859 ;; --TRIE--MAPC--MAPFUN target
860 (trie--node-subtree --trie--mapc--root)
861 --trie--mapc--reverse))
862
863
864 (defun trie-mapc-internal (function trie &optional type)
865 "Apply FUNCTION to all internal associative arrays within TRIE.
866 FUNCTION is passed two arguments: an associative array, and the
867 sequence it corresponds to.
868
869 Optional argument TYPE (one of the symbols vector, lisp or
870 string) sets the type of sequence passed to FUNCTION. Defaults to
871 vector."
872 (trie--mapc-internal function (trie--mapfun trie) (trie--root trie)
873 (cond ((eq type 'string) "")
874 ((eq type 'lisp) ())
875 (t []))))
876
877
878 (defun trie--mapc-internal (--trie--mapc-internal--function
879 --trie--mapc-internal--mapfun
880 --trie--mapc-internal--root
881 --trie--mapc-internal--seq)
882 (funcall
883 --trie--mapc-internal--mapfun
884 (lambda (--trie--mapc-internal--node)
885 ;; data node
886 (unless (trie--node-data-p --trie--mapc-internal--node)
887 (funcall --trie--mapc-internal--function
888 (trie--node-subtree --trie--mapc-internal--node)
889 --trie--mapc-internal--seq)
890 (trie--mapc-internal
891 --trie--mapc-internal--function
892 --trie--mapc-internal--mapfun
893 --trie--mapc-internal--node
894 (trie--seq-append
895 (copy-sequence --trie--mapc-internal--seq)
896 (trie--node-split --trie--mapc-internal--node)))))
897 (trie--node-subtree --trie--mapc-internal--root)))
898
899
900 (defun trie-map (function trie &optional type reverse)
901 "Modify all elements in TRIE by applying FUNCTION to them.
902
903 FUNCTION should take two arguments: a sequence stored in the trie
904 and its associated data. Its return value replaces the existing
905 data.
906
907 Optional argument TYPE (one of the symbols vector, lisp or
908 string) sets the type of sequence passed to FUNCTION. Defaults to
909 vector.
910
911 FUNCTION is applied in ascending order, or descending order if
912 REVERSE is non-nil.
913
914 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
915 bind any variables with names commencing \"--\"."
916 ;; convert from print-form if necessary
917 (trie-transform-from-read-warn trie)
918 ;; map FUNCTION over TRIE
919 (let ((--trie-map--function function)) ; avoid dynamic scoping bugs
920 (trie--mapc
921 (lambda (node seq)
922 (setf (trie--node-data node)
923 (funcall --trie-map--function seq (trie--node-data node))))
924 (trie--mapfun trie)
925 (trie--root trie)
926 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
927 reverse)))
928
929
930 (defun trie-mapc (function trie &optional type reverse)
931 "Apply FUNCTION to all elements in TRIE for side effect only.
932
933 FUNCTION should take two arguments: a sequence stored in the trie
934 and its associated data.
935
936 Optional argument TYPE (one of the symbols vector, lisp or
937 string) sets the type of sequence passed to FUNCTION. Defaults to
938 vector.
939
940 FUNCTION is applied in ascending order, or descending order if
941 REVERSE is non-nil.
942
943 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
944 bind any variables with names commencing \"--\"."
945 ;; convert from print-form if necessary
946 (trie-transform-from-read-warn trie)
947 ;; map FUNCTION over TRIE
948 (let ((--trie-mapc--function function)) ; avoid dynamic scoping bugs
949 (trie--mapc
950 (lambda (node seq)
951 (funcall --trie-mapc--function seq (trie--node-data node)))
952 (trie--mapfun trie)
953 (trie--root trie)
954 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
955 reverse)))
956
957
958 (defun trie-mapf (function combinator trie &optional type reverse)
959 "Apply FUNCTION to all elements in TRIE, and combine the results
960 using COMBINATOR.
961
962 FUNCTION should take two arguments: a sequence stored in the
963 trie, and its associated data.
964
965 Optional argument TYPE (one of the symbols vector, lisp or
966 string; defaults to vector) sets the type of sequence passed to
967 FUNCTION. If TYPE is 'string, it must be possible to apply the
968 function `string' to the individual elements of key sequences
969 stored in TRIE.
970
971 The FUNCTION is applied and the results combined in ascending
972 order, or descending order if REVERSE is non-nil.
973
974 Note: to avoid nasty dynamic scoping bugs, FUNCTION and
975 COMBINATOR must *not* bind any variables with names
976 commencing \"--\"."
977 ;; convert from print-form if necessary
978 (trie-transform-from-read-warn trie)
979 ;; map FUNCTION over TRIE, combining results with COMBINATOR
980 (let ((--trie-mapf--function function) ; avoid dynamic scoping bugs
981 --trie-mapf--accumulate)
982 (trie--mapc
983 (lambda (node seq)
984 (setq --trie-mapf--accumulate
985 (funcall combinator
986 (funcall --trie-mapf--function
987 seq (trie--node-data node))
988 --trie-mapf--accumulate)))
989 (trie--mapfun trie)
990 (trie--root trie)
991 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
992 reverse)
993 --trie-mapf--accumulate))
994
995
996 (defun trie-mapcar (function trie &optional type reverse)
997 "Apply FUNCTION to all elements in TRIE,
998 and make a list of the results.
999
1000 FUNCTION should take two arguments: a sequence stored in the trie
1001 and its associated data.
1002
1003 Optional argument TYPE (one of the symbols vector, lisp or
1004 string) sets the type of sequence passed to FUNCTION. Defaults to
1005 vector.
1006
1007 The FUNCTION is applied and the list constructed in ascending
1008 order, or descending order if REVERSE is non-nil.
1009
1010 Note that if you don't care about the order in which FUNCTION is
1011 applied, just that the resulting list is in the correct order,
1012 then
1013
1014 (trie-mapf function 'cons trie type (not reverse))
1015
1016 is more efficient.
1017
1018 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
1019 bind any variables with names commencing \"--\"."
1020 ;; convert from print-form if necessary
1021 (trie-transform-from-read-warn trie)
1022 ;; map FUNCTION over TRIE and accumulate in a list
1023 (nreverse (trie-mapf function 'cons trie type reverse)))
1024
1025
1026
1027
1028 ;;; ================================================================
1029 ;;; Using tries as stacks
1030
1031 (defstruct (trie--stack
1032 (:constructor nil)
1033 (:constructor
1034 trie--stack-create
1035 (trie
1036 &optional
1037 (type 'vector)
1038 reverse
1039 &aux
1040 (comparison-function (trie--comparison-function trie))
1041 (lookupfun (trie--lookupfun trie))
1042 (stack-createfun (trie--stack-createfun trie))
1043 (stack-popfun (trie--stack-popfun trie))
1044 (stack-emptyfun (trie--stack-emptyfun trie))
1045 (repopulatefun 'trie--stack-repopulate)
1046 (store
1047 (if (trie-empty trie)
1048 nil
1049 (trie--stack-repopulate
1050 (list (cons
1051 (cond ((eq type 'list) ())
1052 ((eq type 'string) "")
1053 (t []))
1054 (funcall
1055 stack-createfun
1056 (trie--node-subtree (trie--root trie))
1057 reverse)))
1058 reverse
1059 comparison-function lookupfun
1060 stack-createfun stack-popfun stack-emptyfun)))
1061 (pushed '())
1062 ))
1063 (:constructor
1064 trie--completion-stack-create
1065 (trie prefix
1066 &optional
1067 reverse
1068 &aux
1069 (comparison-function (trie--comparison-function trie))
1070 (lookupfun (trie--lookupfun trie))
1071 (stack-createfun (trie--stack-createfun trie))
1072 (stack-popfun (trie--stack-popfun trie))
1073 (stack-emptyfun (trie--stack-emptyfun trie))
1074 (repopulatefun 'trie--stack-repopulate)
1075 (store (trie--completion-stack-construct-store
1076 trie prefix reverse))
1077 (pushed '())
1078 ))
1079 (:constructor
1080 trie--regexp-stack-create
1081 (trie regexp
1082 &optional
1083 reverse
1084 &aux
1085 (comparison-function (trie--comparison-function trie))
1086 (lookupfun (trie--lookupfun trie))
1087 (stack-createfun (trie--stack-createfun trie))
1088 (stack-popfun (trie--stack-popfun trie))
1089 (stack-emptyfun (trie--stack-emptyfun trie))
1090 (repopulatefun 'trie--regexp-stack-repopulate)
1091 (store (trie--regexp-stack-construct-store
1092 trie regexp reverse))
1093 (pushed '())
1094 ))
1095 (:copier nil))
1096 reverse comparison-function lookupfun
1097 stack-createfun stack-popfun stack-emptyfun
1098 repopulatefun store pushed)
1099
1100
1101 (defun trie-stack (trie &optional type reverse)
1102 "Return an object that allows TRIE to be accessed as a stack.
1103
1104 The stack is sorted in \"lexical\" order, i.e. the order defined
1105 by the trie's comparison function, or in reverse order if REVERSE
1106 is non-nil. Calling `trie-stack-pop' pops the top element (a key
1107 and its associated data) from the stack.
1108
1109 Optional argument TYPE (one of the symbols vector, lisp or
1110 string) sets the type of sequence used for the keys.
1111
1112 Note that any modification to TRIE *immediately* invalidates all
1113 trie-stacks created before the modification (in particular,
1114 calling `trie-stack-pop' will give unpredictable results).
1115
1116 Operations on trie-stacks are significantly more efficient than
1117 constructing a real stack from the trie and using standard stack
1118 functions. As such, they can be useful in implementing efficient
1119 algorithms on tries. However, in cases where mapping functions
1120 `trie-mapc', `trie-mapcar' or `trie-mapf' would be sufficient, it
1121 is better to use one of those instead."
1122 ;; convert trie from print-form if necessary
1123 (trie-transform-from-read-warn trie)
1124 ;; if stack functions aren't defined for trie type, throw error
1125 (if (not (functionp (trie--stack-createfun trie)))
1126 (error "Trie type does not support stack operations")
1127 ;; otherwise, create and initialise a stack
1128 (trie--stack-create trie type reverse)))
1129
1130
1131 (defun trie-stack-pop (trie-stack &optional nilflag)
1132 "Pop the first element from TRIE-STACK.
1133
1134 Returns nil if the stack is empty, or NILFLAG if specified. (The
1135 latter allows an empty stack to be distinguished from a null
1136 element stored in the trie.)"
1137 ;; return nilflag if stack is empty
1138 (if (trie-stack-empty-p trie-stack)
1139 nilflag
1140 ;; if elements have been pushed onto the stack, pop those first
1141 (if (trie--stack-pushed trie-stack)
1142 (pop (trie--stack-pushed trie-stack))
1143 ;; otherwise, pop first element from trie-stack and repopulate it
1144 (prog1
1145 (pop (trie--stack-store trie-stack))
1146 (setf (trie--stack-store trie-stack)
1147 (funcall (trie--stack-repopulatefun trie-stack)
1148 (trie--stack-store trie-stack)
1149 (trie--stack-reverse trie-stack)
1150 (trie--stack-comparison-function trie-stack)
1151 (trie--stack-lookupfun trie-stack)
1152 (trie--stack-stack-createfun trie-stack)
1153 (trie--stack-stack-popfun trie-stack)
1154 (trie--stack-stack-emptyfun trie-stack)))))))
1155
1156
1157 (defun trie-stack-push (element trie-stack)
1158 "Push ELEMENT onto TRIE-STACK."
1159 (push element (trie--stack-pushed trie-stack)))
1160
1161
1162 (defun trie-stack-first (trie-stack &optional nilflag)
1163 "Return the first element from TRIE-STACK, without removing it
1164 from the stack.
1165
1166 Returns nil if the stack is empty, or NILFLAG if specified. (The
1167 latter allows an empty stack to be distinguished from a null
1168 element stored in the trie.)"
1169 ;; return nilflag if stack is empty
1170 (if (trie-stack-empty-p trie-stack)
1171 nilflag
1172 ;; if elements have been pushed onto the stack, return first of
1173 ;; those
1174 (if (trie--stack-pushed trie-stack)
1175 (car (trie--stack-pushed trie-stack))
1176 ;; otherwise, return first element from trie-stack
1177 (car (trie--stack-store trie-stack)))))
1178
1179
1180 (defalias 'trie-stack-p 'trie--stack-p
1181 "Return t if argument is a trie-stack, nil otherwise.")
1182
1183
1184 (defun trie-stack-empty-p (trie-stack)
1185 "Return t if TRIE-STACK is empty, nil otherwise."
1186 (and (null (trie--stack-store trie-stack))
1187 (null (trie--stack-pushed trie-stack))))
1188
1189
1190 (defun trie--stack-repopulate
1191 (store reverse comparison-function lookupfun
1192 stack-createfun stack-popfun stack-emptyfun)
1193 ;; Recursively push children of the node at the head of STORE onto the
1194 ;; front of STORE, until a data node is reached.
1195
1196 ;; nothing to do if stack is empty
1197 (when store
1198 (let ((node (funcall stack-popfun (cdar store)))
1199 (seq (caar store)))
1200 (when (funcall stack-emptyfun (cdar store))
1201 ;; (pop store) here produces irritating compiler warnings
1202 (setq store (cdr store)))
1203
1204 (while (not (trie--node-data-p node))
1205 (push
1206 (cons (trie--seq-append seq (trie--node-split node))
1207 (funcall stack-createfun
1208 (trie--node-subtree node) reverse))
1209 store)
1210 (setq node (funcall stack-popfun (cdar store))
1211 seq (caar store))
1212 (when (funcall stack-emptyfun (cdar store))
1213 ;; (pop store) here produces irritating compiler warnings
1214 (setq store (cdr store))))
1215
1216 (push (cons seq (trie--node-data node)) store))))
1217
1218
1219
1220
1221 ;; ================================================================
1222 ;; Query-building utility macros
1223
1224 ;; Implementation Note
1225 ;; -------------------
1226 ;; For queries ranked in anything other than lexical order, we use a
1227 ;; partial heap-sort to find the k=MAXNUM highest ranked matches among
1228 ;; the n possibile matches. This has worst-case time complexity
1229 ;; O(n log k), and is both simple and elegant. An optimal algorithm
1230 ;; (e.g. partial quick-sort discarding the irrelevant partition at each
1231 ;; step) would have complexity O(n + k log k), but is probably not worth
1232 ;; the extra coding effort, and would have worse space complexity unless
1233 ;; coded to work "in-place", which would be highly non-trivial. (I
1234 ;; haven't done any benchmarking, though, so feel free to do so and let
1235 ;; me know the results!)
1236
1237 (defmacro trie--construct-accumulator (maxnum filter resultfun)
1238 ;; Does what it says on the tin! | sed -e 's/tin/macro name/'
1239 `(cond
1240 ;; filter, maxnum, resultfun
1241 ((and ,filter ,maxnum ,resultfun)
1242 (lambda (seq data)
1243 (when (funcall ,filter seq data)
1244 (aset trie--accumulate 0
1245 (cons (funcall ,resultfun seq data)
1246 (aref trie--accumulate 0)))
1247 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1248 (throw 'trie-accumulate--done nil)))))
1249 ;; filter, maxnum, !resultfun
1250 ((and ,filter ,maxnum (not ,resultfun))
1251 (lambda (seq data)
1252 (when (funcall ,filter seq data)
1253 (aset trie--accumulate 0
1254 (cons (cons seq data)
1255 (aref trie--accumulate 0)))
1256 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1257 (throw 'trie-accumulate--done nil)))))
1258 ;; filter, !maxnum, resultfun
1259 ((and ,filter (not ,maxnum) ,resultfun)
1260 (lambda (seq data)
1261 (when (funcall ,filter seq data)
1262 (aset trie--accumulate 0
1263 (cons (funcall ,resultfun seq data)
1264 (aref trie--accumulate 0))))))
1265 ;; filter, !maxnum, !resultfun
1266 ((and ,filter (not ,maxnum) (not ,resultfun))
1267 (lambda (seq data)
1268 (when (funcall ,filter seq data)
1269 (aset trie--accumulate 0
1270 (cons (cons seq data)
1271 (aref trie--accumulate 0))))))
1272 ;; !filter, maxnum, resultfun
1273 ((and (not ,filter) ,maxnum ,resultfun)
1274 (lambda (seq data)
1275 (aset trie--accumulate 0
1276 (cons (funcall ,resultfun seq data)
1277 (aref trie--accumulate 0)))
1278 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1279 (throw 'trie-accumulate--done nil))))
1280 ;; !filter, maxnum, !resultfun
1281 ((and (not ,filter) ,maxnum (not ,resultfun))
1282 (lambda (seq data)
1283 (aset trie--accumulate 0
1284 (cons (cons seq data)
1285 (aref trie--accumulate 0)))
1286 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1287 (throw 'trie-accumulate--done nil))))
1288 ;; !filter, !maxnum, resultfun
1289 ((and (not ,filter) (not ,maxnum) ,resultfun)
1290 (lambda (seq data)
1291 (aset trie--accumulate 0
1292 (cons (funcall ,resultfun seq data)
1293 (aref trie--accumulate 0)))))
1294 ;; !filter, !maxnum, !resultfun
1295 ((and (not ,filter) (not ,maxnum) (not ,resultfun))
1296 (lambda (seq data)
1297 (aset trie--accumulate 0
1298 (cons (cons seq data)
1299 (aref trie--accumulate 0)))))
1300 ))
1301
1302
1303
1304 (defmacro trie--construct-ranked-accumulator (maxnum filter)
1305 ;; Does what it says on the tin! | sed -e 's/tin/macro name/'
1306 `(cond
1307 ;; filter, maxnum
1308 ((and ,filter ,maxnum)
1309 (lambda (seq data)
1310 (when (funcall ,filter seq data)
1311 (heap-add trie--accumulate (cons seq data))
1312 (and (> (heap-size trie--accumulate) ,maxnum)
1313 (heap-delete-root trie--accumulate)))))
1314 ;; filter, !maxnum
1315 ((and ,filter (not ,maxnum))
1316 (lambda (seq data)
1317 (when (funcall ,filter seq data)
1318 (heap-add trie--accumulate (cons seq data)))))
1319 ;; !filter, maxnum
1320 ((and (not ,filter) ,maxnum)
1321 (lambda (seq data)
1322 (heap-add trie--accumulate (cons seq data))
1323 (and (> (heap-size trie--accumulate) ,maxnum)
1324 (heap-delete-root trie--accumulate))))
1325 ;; !filter, !maxnum
1326 ((and (not ,filter) (not ,maxnum))
1327 (lambda (seq data)
1328 (heap-add trie--accumulate (cons seq data))))))
1329
1330
1331
1332 (defmacro trie--accumulate-results
1333 (rankfun maxnum reverse filter resultfun accfun duplicates &rest body)
1334 ;; Accumulate results of running BODY code, and return them in
1335 ;; appropriate order. BODY should call ACCFUN to accumulate a result,
1336 ;; passing it two arguments: a trie data node, and the corresponding
1337 ;; sequence. BODY can throw 'trie-accumulate--done to terminate the
1338 ;; accumulation and return the results. A non-null DUPLICATES flag
1339 ;; signals that the accumulated results might contain duplicates,
1340 ;; which should be deleted. Note that DUPLICATES is ignored if RANKFUN
1341 ;; is null. The other arguments should be passed straight through from
1342 ;; the query function.
1343
1344 ;; rename functions to help avoid dynamic-scoping bugs
1345 `(let* ((--trie-accumulate--rankfun ,rankfun)
1346 (--trie-accumulate--filter ,filter)
1347 (--trie-accumulate--resultfun ,resultfun)
1348 ;; construct structure in which to accumulate results
1349 (trie--accumulate
1350 (if ,rankfun
1351 (heap-create ; heap order is inverse of rank order
1352 (if ,reverse
1353 (lambda (a b)
1354 (funcall --trie-accumulate--rankfun a b))
1355 (lambda (a b)
1356 (not (funcall --trie-accumulate--rankfun a b))))
1357 (when ,maxnum (1+ ,maxnum)))
1358 (make-vector 1 nil)))
1359 ;; construct function to accumulate completions
1360 (,accfun
1361 (if ,rankfun
1362 (trie--construct-ranked-accumulator
1363 ,maxnum --trie-accumulate--filter)
1364 (trie--construct-accumulator
1365 ,maxnum --trie-accumulate--filter
1366 --trie-accumulate--resultfun))))
1367
1368 ;; accumulate results
1369 (catch 'trie-accumulate--done ,@body)
1370
1371 ;; return list of completions
1372 (cond
1373 ;; for a ranked query, extract completions from heap
1374 (,rankfun
1375 (let (completions)
1376 ;; check for and delete duplicates if flag is set
1377 (if ,duplicates
1378 (while (not (heap-empty trie--accumulate))
1379 (if (equal (car (heap-root trie--accumulate))
1380 (caar completions))
1381 (heap-delete-root trie--accumulate)
1382 (push (heap-delete-root trie--accumulate)
1383 completions)))
1384 ;; skip duplicate checking if flag is not set
1385 (while (not (heap-empty trie--accumulate))
1386 (if ,resultfun
1387 (let ((res (heap-delete-root trie--accumulate)))
1388 (push (funcall ,resultfun (car res) (cdr res))
1389 completions))
1390 (push (heap-delete-root trie--accumulate)
1391 completions))))
1392 completions))
1393
1394 ;; for lexical query, reverse result list if MAXNUM supplied
1395 (,maxnum (nreverse (aref trie--accumulate 0)))
1396 ;; otherwise, just return list
1397 (t (aref trie--accumulate 0)))))
1398
1399
1400
1401
1402 ;; ================================================================
1403 ;; Completing
1404
1405 (defun trie-complete
1406 (trie prefix &optional rankfun maxnum reverse filter resultfun)
1407 "Return an alist containing all completions of PREFIX in TRIE
1408 along with their associated data, in the order defined by
1409 RANKFUN, defaulting to \"lexical\" order (i.e. the order defined
1410 by the trie's comparison function). If REVERSE is non-nil, the
1411 completions are sorted in the reverse order. Returns nil if no
1412 completions are found.
1413
1414 PREFIX must be a sequence (vector, list or string) containing
1415 elements of the type used to reference data in the trie. (If
1416 PREFIX is a string, it must be possible to apply `string' to
1417 individual elements of the sequences stored in the trie.) The
1418 completions returned in the alist will be sequences of the same
1419 type as KEY. If PREFIX is a list of sequences, completions of all
1420 sequences in the list are included in the returned alist. All
1421 sequences in the list must be of the same type.
1422
1423 The optional integer argument MAXNUM limits the results to the
1424 first MAXNUM completions. Otherwise, all completions are
1425 returned.
1426
1427 If specified, RANKFUN must accept two arguments, both cons
1428 cells. The car contains a sequence from the trie (of the same
1429 type as PREFIX), the cdr contains its associated data. It should
1430 return non-nil if first argument is ranked strictly higher than
1431 the second, nil otherwise.
1432
1433 The FILTER argument sets a filter function for the
1434 completions. If supplied, it is called for each possible
1435 completion with two arguments: the completion, and its associated
1436 data. If the filter function returns nil, the completion is not
1437 included in the results, and does not count towards MAXNUM.
1438
1439 RESULTFUN defines a function used to process results before
1440 adding them to the final result list. If specified, it should
1441 accept two arguments: a key and its associated data. It's return
1442 value is what gets added to the final result list, instead of the
1443 default key-data cons cell."
1444
1445 ;; convert trie from print-form if necessary
1446 (trie-transform-from-read-warn trie)
1447 ;; wrap prefix in a list if necessary
1448 ;; FIXME: the test for a list of prefixes, below, will fail if the
1449 ;; PREFIX sequence is a list, and the elements of PREFIX are
1450 ;; themselves lists (there might be no easy way to fully fix
1451 ;; this...)
1452 (if (or (atom prefix)
1453 (and (listp prefix) (not (sequencep (car prefix)))))
1454 (setq prefix (list prefix))
1455 ;; sort list of prefixes if sorting completions lexically
1456 (when (null rankfun)
1457 (setq prefix
1458 (sort prefix (trie-construct-sortfun
1459 (trie--comparison-function trie))))))
1460
1461 ;; accumulate completions
1462 (let (node)
1463 (declare (special accumulator))
1464 (trie--accumulate-results
1465 rankfun maxnum reverse filter resultfun accumulator nil
1466 (mapc (lambda (pfx)
1467 (setq node (trie--node-find (trie--root trie) pfx
1468 (trie--lookupfun trie)))
1469 (when node
1470 (trie--mapc
1471 (lambda (node seq)
1472 (funcall accumulator seq (trie--node-data node)))
1473 (trie--mapfun trie) node pfx
1474 (if maxnum reverse (not reverse)))))
1475 prefix))
1476 ))
1477
1478
1479
1480 (defun trie-complete-stack (trie prefix &optional reverse)
1481 "Return an object that allows completions of PREFIX to be accessed
1482 as if they were a stack.
1483
1484 The stack is sorted in \"lexical\" order, i.e. the order defined
1485 by TRIE's comparison function, or in reverse order if REVERSE is
1486 non-nil. Calling `trie-stack-pop' pops the top element (a key and
1487 its associated data) from the stack.
1488
1489 PREFIX must be a sequence (vector, list or string) that forms the
1490 initial part of a TRIE key, or a list of such sequences. (If
1491 PREFIX is a string, it must be possible to apply `string' to
1492 individual elements of TRIE keys.) The completions returned in
1493 the alist will be sequences of the same type as KEY. If PREFIX is
1494 a list of sequences, completions of all sequences in the list are
1495 included in the stack. All sequences in the list must be of the
1496 same type.
1497
1498 Note that any modification to TRIE *immediately* invalidates all
1499 trie-stacks created before the modification (in particular,
1500 calling `trie-stack-pop' will give unpredictable results).
1501
1502 Operations on trie-stacks are significantly more efficient than
1503 constructing a real stack from completions of PREFIX in TRIE and
1504 using standard stack functions. As such, they can be useful in
1505 implementing efficient algorithms on tries. However, in cases
1506 where `trie-complete' or `trie-complete-ordered' is sufficient,
1507 it is better to use one of those instead."
1508 ;; convert trie from print-form if necessary
1509 (trie-transform-from-read-warn trie)
1510 ;; if stack functions aren't defined for trie type, throw error
1511 (if (not (functionp (trie--stack-createfun trie)))
1512 (error "Trie type does not support stack operations")
1513 ;; otherwise, create and initialise a stack
1514 (trie--completion-stack-create trie prefix reverse)))
1515
1516
1517 (defun trie--completion-stack-construct-store (trie prefix reverse)
1518 ;; Construct store for completion stack based on TRIE.
1519 (let (store node)
1520 (if (or (atom prefix)
1521 (and (listp prefix)
1522 (not (sequencep (car prefix)))))
1523 (setq prefix (list prefix))
1524 (setq prefix
1525 (sort prefix
1526 (trie-construct-sortfun
1527 (trie--comparison-function trie)
1528 (not reverse)))))
1529 (dolist (pfx prefix)
1530 (when (setq node (trie--node-find (trie--root trie) pfx
1531 (trie--lookupfun trie)))
1532 (push (cons pfx (funcall (trie--stack-createfun trie)
1533 (trie--node-subtree node)
1534 reverse))
1535 store)))
1536 (trie--stack-repopulate
1537 store reverse
1538 (trie--comparison-function trie)
1539 (trie--lookupfun trie)
1540 (trie--stack-createfun trie)
1541 (trie--stack-popfun trie)
1542 (trie--stack-emptyfun trie))))
1543
1544
1545
1546
1547 ;; ================================================================
1548 ;; Regexp search
1549
1550 (defun trie-regexp-search
1551 (trie regexp &optional rankfun maxnum reverse filter resultfun type)
1552 "Return an alist containing all matches for REGEXP in TRIE
1553 along with their associated data, in the order defined by
1554 RANKFUN, defauling to \"lexical\" order (i.e. the order defined
1555 by the trie's comparison function). If REVERSE is non-nil, the
1556 completions are sorted in the reverse order. Returns nil if no
1557 completions are found.
1558
1559 REGEXP is a regular expression, but it need not necessarily be a
1560 string. It must be a sequence (vector, list of string) whose
1561 elements are either elements of the same type as elements of the
1562 trie keys (which behave as literals in the regexp), or any of the
1563 usual regexp special characters and backslash constructs. If
1564 REGEXP is a string, it must be possible to apply `string' to
1565 individual elements of the keys stored in the trie. The matches
1566 returned in the alist will be sequences of the same type as KEY.
1567
1568 Only a subset of the full Emacs regular expression syntax is
1569 supported. There is no support for regexp constructs that are
1570 only meaningful for strings (character ranges and character
1571 classes inside character alternatives, and syntax-related
1572 backslash constructs). Back-references and non-greedy postfix
1573 operators are not supported, so `?' after a postfix operator
1574 loses its special meaning. Also, matches are always anchored, so
1575 `$' and `^' lose their special meanings (use `.*' at the
1576 beginning and end of the regexp to get an unanchored match).
1577
1578 If the regexp contains any non-shy grouping constructs, subgroup
1579 match data is included in the results. In this case, the car of
1580 each match is no longer just a key. Instead, it is a list whose
1581 first element is the matching key, and whose remaining elements
1582 are cons cells whose cars and cdrs give the start and end indices
1583 of the elements that matched the corresponding groups, in order.
1584
1585 The optional integer argument MAXNUM limits the results to the
1586 first MAXNUM matches. Otherwise, all matches are returned.
1587
1588 If specified, RANKFUN must accept two arguments, both cons
1589 cells. The car contains a sequence from the trie (of the same
1590 type as PREFIX), the cdr contains its associated data. It should
1591 return non-nil if first argument is ranked strictly higher than
1592 the second, nil otherwise.
1593
1594 The FILTER argument sets a filter function for the matches. If
1595 supplied, it is called for each possible match with two
1596 arguments: the matching key, and its associated data. If the
1597 filter function returns nil, the match is not included in the
1598 results, and does not count towards MAXNUM.
1599
1600 RESULTFUN defines a function used to process results before
1601 adding them to the final result list. If specified, it should
1602 accept two arguments: a key and its associated data. It's return
1603 value is what gets added to the final result list, instead of the
1604 default key-data cons cell."
1605
1606 ;; convert trie from print-form if necessary
1607 (trie-transform-from-read-warn trie)
1608
1609 ;; massage rankfun to cope with grouping data
1610 ;; FIXME: could skip this if REGEXP contains no grouping constructs
1611 (when rankfun
1612 (setq rankfun
1613 `(lambda (a b)
1614 ;; if car of argument contains a key+group list rather than
1615 ;; a straight key, remove group list
1616 ;; FIXME: the test for straight key, below, will fail if
1617 ;; the key is a list, and the first element of the
1618 ;; key is itself a list (there might be no easy way
1619 ;; to fully fix this...)
1620 (unless (or (atom (car a))
1621 (and (listp (car a))
1622 (not (sequencep (caar a)))))
1623 (setq a (cons (caar a) (cdr a))))
1624 (unless (or (atom (car b))
1625 (and (listp (car b))
1626 (not (sequencep (caar b)))))
1627 (setq b (cons (caar b) (cdr b))))
1628 ;; call rankfun on massaged arguments
1629 (,rankfun a b))))
1630
1631 ;; accumulate completions
1632 (declare (special accumulator))
1633 (trie--accumulate-results
1634 rankfun maxnum reverse filter resultfun accumulator nil
1635 (trie--do-regexp-search
1636 (trie--root trie)
1637 (tNFA-from-regexp regexp :test (trie--construct-equality-function
1638 (trie--comparison-function trie)))
1639 (cond ((stringp regexp) "") ((listp regexp) ()) (t [])) 0
1640 (or (and maxnum reverse) (and (not maxnum) (not reverse)))
1641 (trie--comparison-function trie)
1642 (trie--lookupfun trie)
1643 (trie--mapfun trie))))
1644
1645
1646
1647 (defun trie--do-regexp-search
1648 (--trie--regexp-search--node tNFA seq pos reverse
1649 comparison-function lookupfun mapfun)
1650 ;; Search everything below the node --TRIE--REGEXP-SEARCH-NODE for
1651 ;; matches to the regexp encoded in tNFA. SEQ is the sequence
1652 ;; corresponding to NODE, POS is it's length. REVERSE is the usual
1653 ;; query argument, and the remaining arguments are the corresponding
1654 ;; trie functions.
1655 (declare (special accumulator))
1656
1657 ;; if NFA has matched and we're accumulating in normal order, check if
1658 ;; trie contains current string
1659 (when (and (not reverse) (tNFA-match-p tNFA))
1660 (let (node groups)
1661 (when (setq node (trie--find-data-node
1662 --trie--regexp-search--node lookupfun))
1663 (setq groups (tNFA-group-data tNFA))
1664 (funcall accumulator
1665 (if groups (cons seq groups) seq)
1666 (trie--node-data node)))))
1667
1668 (cond
1669 ;; ;; data node
1670 ;; ((trie--node-data-p --trie--regexp-search--node)
1671 ;; (when (tNFA-match-p tNFA)
1672 ;; (let ((groups (tNFA-group-data tNFA)))
1673 ;; (funcall accumulator
1674 ;; (if groups (cons seq groups) seq)
1675 ;; (trie--node-data --trie--regexp-search--node)))))
1676
1677 ;; wildcard transition: map over all nodes in subtree
1678 ((tNFA-wildcard-p tNFA)
1679 (let (state groups)
1680 (funcall mapfun
1681 (lambda (node)
1682 (unless (trie--node-data-p node)
1683 ;; (when (tNFA-match-p tNFA)
1684 ;; (setq groups (tNFA-group-data tNFA))
1685 ;; (funcall accumulator
1686 ;; (if groups (cons seq groups) seq)
1687 ;; (trie--node-data node)))
1688 (when (setq state (tNFA-next-state
1689 tNFA (trie--node-split node) pos))
1690 (trie--do-regexp-search
1691 node state
1692 (trie--seq-append seq (trie--node-split node))
1693 (1+ pos) reverse comparison-function
1694 lookupfun mapfun))))
1695 (trie--node-subtree --trie--regexp-search--node)
1696 reverse)))
1697
1698 (t ;; no wildcard transition: loop over all transitions
1699 (let (node state)
1700 (dolist (chr (sort (tNFA-transitions tNFA)
1701 (if reverse
1702 `(lambda (a b) (,comparison-function b a))
1703 comparison-function)))
1704 (when (and (setq node (trie--node-find
1705 --trie--regexp-search--node
1706 (vector chr) lookupfun))
1707 (setq state (tNFA-next-state tNFA chr pos)))
1708 (trie--do-regexp-search
1709 node state (trie--seq-append seq chr) (1+ pos)
1710 reverse comparison-function lookupfun mapfun))))))
1711
1712 ;; if NFA has matched and we're accumulating in reverse order, check if
1713 ;; trie contains current string
1714 (when (and reverse (tNFA-match-p tNFA))
1715 (let (node groups)
1716 (when (setq node (trie--find-data-node
1717 --trie--regexp-search--node lookupfun))
1718 (setq groups (tNFA-group-data tNFA))
1719 (funcall accumulator
1720 (if groups (cons seq groups) seq)
1721 (trie--node-data node))))))
1722
1723
1724
1725 (defun trie-regexp-stack (trie regexp &optional reverse)
1726 "Return an object that allows matches to REGEXP to be accessed
1727 as if they were a stack.
1728
1729 The stack is sorted in \"lexical\" order, i.e. the order defined
1730 by TRIE's comparison function, or in reverse order if REVERSE is
1731 non-nil. Calling `trie-stack-pop' pops the top element (a cons
1732 cell containing a key and its associated data) from the stack.
1733
1734 REGEXP is a regular expression, but it need not necessarily be a
1735 string. It must be a sequence (vector, list of string) whose
1736 elements are either elements of the same type as elements of the
1737 trie keys (which behave as literals in the regexp), or any of the
1738 usual regexp special characters and backslash constructs. If
1739 REGEXP is a string, it must be possible to apply `string' to
1740 individual elements of the keys stored in the trie. The matches
1741 returned in the alist will be sequences of the same type as KEY.
1742
1743 Back-references and non-greedy postfix operators are *not*
1744 supported, and the matches are always anchored, so `$' and `^'
1745 lose their special meanings.
1746
1747 If the regexp contains any non-shy grouping constructs, subgroup
1748 match data is included in the results. In this case, the car of
1749 each match (as returned by a call to `trie-stack-pop' is no
1750 longer just a key. Instead, it is a list whose first element is
1751 the matching key, and whose remaining elements are cons cells
1752 whose cars and cdrs give the start and end indices of the
1753 elements that matched the corresponding groups, in order."
1754
1755 ;; convert trie from print-form if necessary
1756 (trie-transform-from-read-warn trie)
1757 ;; if stack functions aren't defined for trie type, throw error
1758 (if (not (functionp (trie--stack-createfun trie)))
1759 (error "Trie type does not support stack operations")
1760 ;; otherwise, create and initialise a regexp stack
1761 (trie--regexp-stack-create trie regexp reverse)))
1762
1763
1764 (defun trie--regexp-stack-construct-store
1765 (trie regexp &optional reverse)
1766 ;; Construct store for regexp stack based on TRIE.
1767 (let ((seq (cond ((stringp regexp) "") ((listp regexp) ()) (t [])))
1768 store)
1769 (push (list seq (trie--root trie)
1770 (tNFA-from-regexp
1771 regexp :test (trie--construct-equality-function
1772 (trie--comparison-function trie)))
1773 0)
1774 store)
1775 (trie--regexp-stack-repopulate
1776 store reverse
1777 (trie--comparison-function trie)
1778 (trie--lookupfun trie)
1779 (trie--stack-createfun trie)
1780 (trie--stack-popfun trie)
1781 (trie--stack-emptyfun trie))))
1782
1783
1784 (defun trie--regexp-stack-repopulate
1785 (store reverse comparison-function lookupfun
1786 stack-createfun stack-popfun stack-emptyfun)
1787 ;; Recursively push matching children of the node at the head of STORE
1788 ;; onto STORE, until a data node is reached. REVERSE is the usual
1789 ;; query argument, and the remaining arguments are the corresponding
1790 ;; trie functions.
1791 (let (state seq node pos groups n s)
1792 (while
1793 (progn
1794 (setq pos (pop store)
1795 seq (nth 0 pos)
1796 node (nth 1 pos)
1797 state (nth 2 pos)
1798 pos (nth 3 pos))
1799 (cond
1800 ;; if stack is empty, we're done
1801 ((null node) nil)
1802
1803 ;; if stack element is a trie node...
1804 ((trie--node-p node)
1805 (cond
1806 ;; matching data node: add data to the stack and we're done
1807 ((trie--node-data-p node)
1808 (when (tNFA-match-p state)
1809 (setq groups (tNFA-group-data state))
1810 (push (cons (if groups (cons groups seq) seq)
1811 (trie--node-data node))
1812 store))
1813 nil) ; return nil to exit loop
1814
1815 ;; wildcard transition: add new node stack
1816 ((tNFA-wildcard-p state)
1817 (push (list seq
1818 (funcall stack-createfun
1819 (trie--node-subtree node) reverse)
1820 state pos)
1821 store))
1822
1823 (t ;; non-wildcard transition: add all possible next nodes
1824 (dolist (chr (sort (tNFA-transitions state)
1825 (if reverse
1826 comparison-function
1827 `(lambda (a b)
1828 (,comparison-function b a)))))
1829 (when (and (setq n (trie--node-find
1830 node (vector chr) lookupfun))
1831 (setq s (tNFA-next-state state chr pos)))
1832 (push (list (trie--seq-append seq chr) n s (1+ pos))
1833 store)))
1834 t))) ; return t to keep looping
1835
1836 ;; otherwise, stack element is a node stack...
1837 (t
1838 ;; if node stack is empty, dump it and keep repopulating
1839 (if (funcall stack-emptyfun node)
1840 t ; return t to keep looping
1841 ;; otherwise, add node stack back, and add next node from
1842 ;; stack
1843 (push (list seq node state pos) store)
1844 (setq node (funcall stack-popfun node)
1845 state (tNFA-next-state state
1846 (trie--node-split node) pos))
1847 (when state
1848 ;; matching data node: add data to the stack and we're
1849 ;; done
1850 (if (trie--node-data-p node)
1851 (progn
1852 (push (cons seq (trie--node-data node)) store)
1853 nil) ; return nil to exit loop
1854 ;; normal node: add it to the stack and keep
1855 ;; repopulating
1856 (push (list
1857 (trie--seq-append seq (trie--node-split node))
1858 node state (1+ pos))
1859 store)))))
1860 ))))
1861 store)
1862
1863
1864
1865 ;; ----------------------------------------------------------------
1866 ;; Pretty-print tries during edebug
1867
1868 ;; Note:
1869 ;; -----
1870
1871 ;; We advise the `edebug-prin1' and `edebug-prin1-to-string' functions
1872 ;; (actually, aliases) so that they print "#<trie>" instead of the full
1873 ;; print form for tries.
1874 ;;
1875 ;; This is because, if left to its own devices, edebug hangs for ages
1876 ;; whilst printing large tries, and you either have to wait for a *very*
1877 ;; long time for it to finish, or kill Emacs entirely. (Even C-g C-g
1878 ;; fails!)
1879 ;;
1880 ;; We do this also for lists of tries, since those occur quite often,
1881 ;; but not for other sequence types or deeper nested structures, to keep
1882 ;; the implementation as simple as possible.
1883 ;;
1884 ;; Since the print form of a trie is practically incomprehensible
1885 ;; anyway, we don't lose much by doing this. If you *really* want to
1886 ;; print tries in full whilst edebugging, despite this warning, disable
1887 ;; the advice.
1888 ;;
1889 ;; FIXME: We could use `cedet-edebug-prin1-extensions' instead of advice
1890 ;; when `cedet-edebug' is loaded, though I believe the current
1891 ;; implementation still works in that case.
1892
1893
1894 (eval-when-compile
1895 (require 'edebug)
1896 (require 'advice))
1897
1898
1899 (defun trie--edebug-pretty-print (object)
1900 (cond
1901 ((trie-p object) "#<trie>")
1902 ((null object) "nil")
1903 ((let ((tlist object) (test t))
1904 (while (or (trie-p (car-safe tlist))
1905 (and tlist (setq test nil)))
1906 (setq tlist (cdr tlist)))
1907 test)
1908 (concat "(" (mapconcat (lambda (dummy) "#<trie>") object " ") ")"))
1909 ;; ((vectorp object)
1910 ;; (let ((pretty "[") (len (length object)))
1911 ;; (dotimes (i (1- len))
1912 ;; (setq pretty
1913 ;; (concat pretty
1914 ;; (if (trie-p (aref object i))
1915 ;; "#<trie>" (prin1-to-string (aref object i))) " ")))
1916 ;; (concat pretty
1917 ;; (if (trie-p (aref object (1- len)))
1918 ;; "#<trie>" (prin1-to-string (aref object (1- len))))
1919 ;; "]")))
1920 ))
1921
1922
1923 (when (fboundp 'ad-define-subr-args)
1924 (ad-define-subr-args 'edebug-prin1 '(object &optional printcharfun)))
1925
1926 (defadvice edebug-prin1
1927 (around trie activate compile preactivate)
1928 (let ((pretty (trie--edebug-pretty-print object)))
1929 (if pretty
1930 (progn
1931 (prin1 pretty printcharfun)
1932 (setq ad-return-value pretty))
1933 ad-do-it)))
1934
1935
1936 (when (fboundp 'ad-define-subr-args)
1937 (ad-define-subr-args 'edebug-prin1-to-string '(object &optional noescape)))
1938
1939 (defadvice edebug-prin1-to-string
1940 (around trie activate compile preactivate)
1941 (let ((pretty (trie--edebug-pretty-print object)))
1942 (if pretty
1943 (setq ad-return-value pretty)
1944 ad-do-it)))
1945
1946
1947
1948 (provide 'trie)
1949
1950 ;;; trie.el ends here