1 ;;; trie.el --- Trie data structure
3 ;; Copyright (C) 2008-2010, 2012 Free Software Foundation, Inc
5 ;; Author: Toby Cubitt <toby-predictive@dr-qubit.org>
7 ;; Keywords: extensions, matching, data structures
8 ;; trie, ternary search tree, tree, completion, regexp
9 ;; Package-Requires: ((tNFA "0.1.1") (heap "0.3"))
10 ;; URL: http://www.dr-qubit.org/emacs.php
11 ;; Repository: http://www.dr-qubit.org/git/predictive.git
13 ;; This file is part of Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify it under
16 ;; the terms of the GNU General Public License as published by the Free
17 ;; Software Foundation, either version 3 of the License, or (at your option)
20 ;; GNU Emacs is distributed in the hope that it will be useful, but WITHOUT
21 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
25 ;; You should have received a copy of the GNU General Public License along
26 ;; with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
33 ;; A trie is a data structure used to store keys that are ordered sequences of
34 ;; elements (vectors, lists or strings in Elisp; strings are by far the most
35 ;; common), in such a way that both storage and retrieval are space- and
36 ;; time-efficient. But, more importantly, a variety of more advanced queries
37 ;; can also be performed efficiently: for example, returning all strings with
38 ;; a given prefix, searching for keys matching a given wildcard pattern or
39 ;; regular expression, or searching for all keys that match any of the above
40 ;; to within a given Lewenstein distance (though this last is not yet
41 ;; implemented in this package - code contributions welcome!).
43 ;; You create a trie using `make-trie', create an association using
44 ;; `trie-insert', retrieve an association using `trie-lookup', and map over a
45 ;; trie using `trie-map', `trie-mapc', `trie-mapcar', or `trie-mapf'. You can
46 ;; find completions of a prefix sequence using `trie-complete', or search for
47 ;; keys matching a regular expression using `trie-regexp-search'. Using
48 ;; `trie-stack', you can create an object that allows the contents of the trie
49 ;; to be used like a stack, useful for building other algorithms on top of
50 ;; tries; `trie-stack-pop' pops elements off the stack one-by-one, in
51 ;; "lexical" order, whilst `trie-stack-push' pushes things onto the
52 ;; stack. Similarly, `trie-complete-stack', and `trie-regexp-stack' create
53 ;; "lexically-ordered" stacks of query results.
55 ;; Note that there are two uses for a trie: as a lookup table, in which case
56 ;; only the presence or absence of a key in the trie is significant, or as an
57 ;; associative array, in which case each key carries some associated
58 ;; data. Libraries for other data structure often only implement lookup
59 ;; tables, leaving it up to you to implement an associative array on top of
60 ;; this (by storing key+data pairs in the data structure's keys, then defining
61 ;; a comparison function that only compares the key part). For a trie,
62 ;; however, the underlying data structures naturally support associative
63 ;; arrays at no extra cost, so this package does the opposite: it implements
64 ;; associative arrays, and leaves it up to you to use them as lookup tables if
68 ;; Different Types of Trie
69 ;; -----------------------
70 ;; There are numerous ways to implement trie data structures internally, each
71 ;; with its own time- and space-efficiency trade-offs. By viewing a trie as a
72 ;; tree whose nodes are themselves lookup tables for key elements, this
73 ;; package is able to support all types of trie in a uniform manner. This
74 ;; relies on there existing (or you writing!) an Elisp implementation of the
75 ;; corresponding type of lookup table. The best type of trie to use will
76 ;; depend on what trade-offs are appropriate for your particular
77 ;; application. The following gives an overview of the advantages and
78 ;; disadvantages of various types of trie. (Not all of the underlying lookup
79 ;; tables have been implemented in Elisp yet, so using some of the trie types
80 ;; described below would require writing the missing Elisp package!)
83 ;; One of the most effective all-round implementations of a trie is a ternary
84 ;; search tree, which can be viewed as a tree of binary trees. If basic binary
85 ;; search trees are used for the nodes of the trie, we get a standard ternary
86 ;; search tree. If self-balancing binary trees are used (e.g. AVL or red-black
87 ;; trees), we get a self-balancing ternary search tree. If splay trees are
88 ;; used, we get yet another self-organising variant of a ternary search
89 ;; tree. All ternary search trees have, in common, good space-efficiency. The
90 ;; time-efficiency of the various trie operations is also good, assuming the
91 ;; underlying binary trees are balanced. Under that assumption, all variants
92 ;; of ternary search trees described below have the same asymptotic
93 ;; time-complexity for all trie operations.
95 ;; Self-balancing trees ensure the underlying binary trees are always close to
96 ;; perfectly balanced, with the usual trade-offs between the different the
97 ;; types of self-balancing binary tree: AVL trees are slightly more efficient
98 ;; for lookup operations than red-black trees, at a cost of slightly less
99 ;; efficienct insertion operations, and less efficient deletion
100 ;; operations. Splay trees give good average-case complexity and are simpler
101 ;; to implement than AVL or red-black trees (which can mean they're faster in
102 ;; practice!), at the expense of poor worst-case complexity.
104 ;; If your tries are going to be static (i.e. created once and rarely
105 ;; modified), then using perfectly balanced binary search trees might be
106 ;; appropriate. Perfectly balancing the binary trees is very inefficient, but
107 ;; it only has to be when the trie is first created or modified. Lookup
108 ;; operations will then be as efficient as possible for ternary search trees,
109 ;; and the implementation will also be simpler (so probably faster) than a
110 ;; self-balancing tree, without the space and time overhead required to keep
111 ;; track of rebalancing.
113 ;; On the other hand, adding data to a binary search tree in a random order
114 ;; usually results in a reasonably balanced tree. If this is the likely
115 ;; scenario, using a basic binary tree without bothering to balance it at all
116 ;; might be quite efficient, and, being even simpler to implement, could be
117 ;; quite fast overall.
120 ;; A digital trie is a different implementation of a trie, which can be viewed
121 ;; as a tree of arrays, and has different space- and time-complexities than a
122 ;; ternary search tree. Roughly speaking, a digital trie has worse
123 ;; space-complexity, but better time-complexity. Using hash tables instead of
124 ;; arrays for the nodes gives something similar to a digital trie, potentially
125 ;; with better space-complexity and the same amortised time-complexity, but at
126 ;; the expense of occasional significant inefficiency when inserting and
127 ;; deleting (whenever a hash table has to be resized). Indeed, an array can be
128 ;; viewed as a perfect hash table, but as such it requires the number of
129 ;; possible values to be known in advance.
131 ;; Finally, if you really need optimal efficiency from your trie, you could
132 ;; even write a custom type of underlying lookup table, optimised for your
135 ;; This package uses the AVL tree package avl-tree.el, the tagged NFA package
136 ;; tNFA.el, and the heap package heap.el.
141 (eval-when-compile (require 'cl))
148 ;;; ================================================================
149 ;;; Pre-defined trie types
151 (defconst trie--types '(avl))
155 (put 'avl :trie-createfun
156 (lambda (cmpfun seq) (avl-tree-create cmpfun)))
157 (put 'avl :trie-insertfun 'avl-tree-enter)
158 (put 'avl :trie-deletefun 'avl-tree-delete)
159 (put 'avl :trie-lookupfun 'avl-tree-member)
160 (put 'avl :trie-mapfun 'avl-tree-mapc)
161 (put 'avl :trie-emptyfun 'avl-tree-empty)
162 (put 'avl :trie-stack-createfun 'avl-tree-stack)
163 (put 'avl :trie-stack-popfun 'avl-tree-stack-pop)
164 (put 'avl :trie-stack-emptyfun 'avl-tree-stack-empty-p)
165 (put 'avl :trie-transform-for-print 'trie--avl-transform-for-print)
166 (put 'avl :trie-transform-from-read 'trie--avl-transform-from-read)
170 ;;; ================================================================
171 ;;; Internal utility functions and macros
173 ;;; ----------------------------------------------------------------
174 ;;; Functions and macros for handling a trie.
176 ;; symbol used to denote a trie leaf node
177 (defconst trie--terminator '--trie--terminator)
183 (:constructor trie--create
184 (comparison-function &optional (type 'avl)
187 (or (memq type trie--types)
188 (error "trie--create: unknown trie TYPE, %s" type)))
189 (createfun (get type :trie-createfun))
190 (insertfun (get type :trie-insertfun))
191 (deletefun (get type :trie-deletefun))
192 (lookupfun (get type :trie-lookupfun))
193 (mapfun (get type :trie-mapfun))
194 (emptyfun (get type :trie-emptyfun))
195 (stack-createfun (get type :trie-stack-createfun))
196 (stack-popfun (get type :trie-stack-popfun))
197 (stack-emptyfun (get type :trie-stack-emptyfun))
198 (transform-for-print (get type :trie-transform-for-print))
199 (transform-from-read (get type :trie-transform-from-read))
200 (cmpfun (trie--wrap-cmpfun comparison-function))
201 (root (trie--node-create-root createfun cmpfun))
203 (:constructor trie--create-custom
206 (createfun 'avl-tree-create-bare)
207 (insertfun 'avl-tree-enter)
208 (deletefun 'avl-tree-delete)
209 (lookupfun 'avl-tree-member)
210 (mapfun 'avl-tree-mapc)
211 (emptyfun 'avl-tree-empty)
212 (stack-createfun 'avl-tree-stack)
213 (stack-popfun 'avl-tree-stack-pop)
214 (stack-emptyfun 'avl-tree-stack-empty-p)
215 (transform-for-print 'trie--avl-transform-for-print)
216 (transform-from-read 'trie--avl-transform-from-read)
218 (cmpfun (trie--wrap-cmpfun comparison-function))
219 (root (trie--node-create-root createfun cmpfun))
222 root comparison-function cmpfun
223 createfun insertfun deletefun lookupfun mapfun emptyfun
224 stack-createfun stack-popfun stack-emptyfun
225 transform-for-print transform-from-read print-form)
228 (defun trie--wrap-cmpfun (cmpfun)
229 ;; wrap CMPFUN for use in a subtree
231 (setq a (trie--node-split a)
232 b (trie--node-split b))
233 (cond ((eq a trie--terminator)
234 (if (eq b trie--terminator) nil t))
235 ((eq b trie--terminator) nil)
239 (defun trie--construct-equality-function (comparison-function)
240 ;; create equality function from trie comparison function
242 (and (not (,comparison-function a b))
243 (not (,comparison-function b a)))))
247 ;;; ----------------------------------------------------------------
248 ;;; Functions and macros for handling a trie node.
254 (:constructor trie--node-create
256 &aux (subtree (funcall (trie--createfun trie)
257 (trie--cmpfun trie) seq))))
258 (:constructor trie--node-create-data
259 (data &aux (split trie--terminator) (subtree data)))
260 (:constructor trie--node-create-dummy
261 (split &aux (subtree nil)))
262 (:constructor trie--node-create-root
266 (subtree (funcall createfun cmpfun []))))
270 ;; data is stored in the subtree cell of a terminal node
271 (defalias 'trie--node-data 'trie--node-subtree)
273 (defsetf trie--node-data trie--node-set-data)
274 (defmacro trie--node-set-data (node data)
275 `(setf (trie--node-subtree ,node) ,data))
277 (defmacro trie--node-data-p (node)
278 ;; Return t if NODE is a data node, nil otherwise.
279 `(eq (trie--node-split ,node) trie--terminator))
281 (defmacro trie--node-p (node)
282 ;; Return t if NODE is a TRIE trie--node, nil otherwise. Have to
283 ;; define this ourselves, because we created a defstruct without any
284 ;; identifying tags (i.e. (:type vector)) for efficiency, but this
285 ;; means we can only perform a rudimentary and very unreliable test.
286 `(and (vectorp ,node) (= (length ,node) 2)))
289 (defun trie--node-find (node seq lookupfun)
290 ;; Returns the node below NODE corresponding to SEQ, or nil if none
292 (let ((len (length seq))
294 ;; descend trie until we find SEQ or run out of trie
295 (while (and node (< (incf i) len))
298 (trie--node-subtree node)
299 (trie--node-create-dummy (elt seq i))
304 (defmacro trie--find-data-node (node lookupfun)
305 ;; Return data node from NODE's subtree, or nil if NODE has no data
306 ;; node in its subtree.
308 (trie--node-subtree ,node)
309 (trie--node-create-dummy trie--terminator)
313 (defmacro trie--find-data (node lookupfun)
314 ;; Return data associated with sequence corresponding to NODE, or nil
315 ;; if sequence has no associated data.
316 `(let ((node (trie--find-data-node ,node ,lookupfun)))
317 (when node (trie--node-data node))))
321 ;;; ----------------------------------------------------------------
322 ;;; print/read transformation functions
324 (defun trie-transform-for-print (trie)
325 "Transform TRIE to print form."
326 (when (trie--transform-for-print trie)
327 (if (trie--print-form trie)
328 (warn "Trie has already been transformed to print-form")
329 (funcall (trie--transform-for-print trie) trie)
330 (setf (trie--print-form trie) t))))
333 (defun trie-transform-from-read (trie)
334 "Transform TRIE from print form."
335 (when (trie--transform-from-read trie)
336 (if (not (trie--print-form trie))
337 (warn "Trie is not in print-form")
338 (funcall (trie--transform-from-read trie) trie)
339 (setf (trie--print-form trie) nil))))
342 (defmacro trie-transform-from-read-warn (trie)
343 "Transform TRIE from print form, with warning."
344 `(when (trie--print-form ,trie)
345 (warn (concat "Attempt to operate on trie in print-form;\
346 converting to normal form"))
347 (trie-transform-from-read ,trie)))
350 (defun trie--avl-transform-for-print (trie)
351 ;; transform avl-tree based TRIE to print form.
353 (lambda (avl seq) (setf (avl-tree--cmpfun avl) nil))
357 (defun trie--avl-transform-from-read (trie)
358 ;; transform avl-tree based TRIE from print form."
359 (let ((--trie-avl-transform--cmpfun (trie--cmpfun trie)))
362 (setf (avl-tree--cmpfun avl) --trie-avl-transform--cmpfun))
367 ;;; ----------------------------------------------------------------
368 ;;; Replacements for CL functions
370 ;; copied from cl-extra.el
371 (defun trie--subseq (seq start &optional end)
372 "Return the subsequence of SEQ from START to END.
373 If END is omitted, it defaults to the length of the sequence.
374 If START or END is negative, it counts from the end."
375 (if (stringp seq) (substring seq start end)
377 (and end (< end 0) (setq end (+ end (setq len (length seq)))))
379 (setq start (+ start (or len (setq len (length seq))))))
381 (if (> start 0) (setq seq (nthcdr start seq)))
384 (while (>= (setq end (1- end)) start)
385 (push (pop seq) res))
387 (copy-sequence seq)))
389 (or end (setq end (or len (length seq))))
390 (let ((res (make-vector (max (- end start) 0) nil))
393 (aset res i (aref seq start))
394 (setq i (1+ i) start (1+ start)))
398 (defun trie--position (item list)
399 "Find the first occurrence of ITEM in LIST.
400 Return the index of the matching item, or nil of not found.
401 Comparison is done with 'equal."
405 (when (equal item (car list)) (throw 'found i))
407 (setq list (cdr list))))
411 (defsubst trie--seq-append (seq el)
412 "Append EL to the end of sequence SEQ."
414 ((stringp seq) (concat seq (string el)))
415 ((vectorp seq) (vconcat seq (vector el)))
416 ((listp seq) (append seq (list el)))))
419 (defsubst trie--seq-concat (seq &rest sequences)
420 "Concatenate SEQ and SEQUENCES, and make the result the same
421 type of sequence as SEQ."
423 ((stringp seq) (apply 'concat seq sequences))
424 ((vectorp seq) (apply 'vconcat seq sequences))
425 ((listp seq) (apply 'append seq sequences))))
430 ;;; ================================================================
431 ;;; Basic trie operations
434 (defalias 'make-trie 'trie--create
435 "Return a new trie that uses comparison function COMPARISON-FUNCTION.
437 A trie stores sequences (strings, vectors or lists) along with
438 associated data. COMPARISON-FUNCTEION should accept two
439 arguments, each being an element of such a sequence, and return t
440 if the first is strictly smaller than the second.
442 The optional argument TYPE specifies the type of trie to
443 create. However, the only one that is currently implemented is
444 the default, so this argument is useless for now.
446 (See also `make-trie-custom'.)")
450 (defalias 'trie-create 'make-trie)
454 (defalias 'make-trie-custom 'trie--create-custom
455 "Return a new trie that uses comparison function COMPARISON-FUNCTION.
457 A trie stores sequences (strings, vectors or lists) along with
458 associated data. COMPARISON-FUNCTION should accept two arguments,
459 each being an element of such a sequence, and return t if the
460 first is strictly smaller than the second.
462 The remaining keyword arguments: :CREATEFUN, :INSERTFUN, :DELETEFUN,
463 :LOOKUPFUN, :MAPFUN, :EMPTYFUN, :STACK-CREATEFUN, :STACK-POPFUN,
464 :STACK-EMPTYFUN, :TRANSFORM-FOR-PRINT and :TRANSFORM-FROM-READ
465 determine the type of trie that is created.
467 CREATEFUN is called as follows:
469 (CREATEFUN COMPARISON-FUNCTION SEQ)
471 and should return a data structure (\"ARRAY\") that can be used
472 as an associative array, where two elements A and B are equal if
473 the following is non-nil:
475 (and (COMPARISON-FUNCTION b a)
476 (COMPARISON-FUNCTION b a))
478 The SEQ argument is a vector containing the sequence that will
479 correspond to the newly created array in the trie. For most types
480 of trie, this value is ignored. It is passed to CREATEFUN only in
481 order to allow the creation of \"hybrid\" trie structures, in
482 which different types of associative array are used in different
483 parts of the trie. For example, the type of associative array
484 could be chosen based on the depth in the trie, given by \(length
485 SEQ\). (Note that all the other functions described below must be
486 able to correctly handle *any* of the types of associate array
487 that might be created by CREATEFUN.)
489 INSERTFUN, DELETEFUN, LOOKUPFUN, MAPFUN and EMPTYFUN should
490 insert, delete, lookup, map over, and check-if-there-exist-any
491 elements in an associative array. They are called as follows:
493 (INSERTFUN array element &optional updatefun)
494 (DELETEFUN array element &optional predicate nilflag)
495 (LOOKUPFUN array element &optional nilflag)
496 (MAPFUN function array &optional reverse)
499 INSERTFUN should insert ELEMENT into ARRAY and return the new
500 element, which will be ELEMENT itself unless UPDATEFUN is
501 specified. In that case, if and only if an element matching
502 ELEMENT already exists in the associative array, INSERTFUN should
503 instead pass ELEMENT and the matching element as arguments to
504 UPDATEFUN, replace the matching element with the return value,
505 and return that return value.
507 DELETEFUN should delete the element in the associative array that
508 matches ELEMENT, and return the deleted element. However, if
509 PREDICATE is specified and a matching element exists in ARRAY,
510 DELETEFUN should first pass the matching element as an argument
511 to PREDICATE before deleting, and should only delete the element
512 if PREDICATE returns non-nil. DELETEFUN should return NILFLAG if
513 no element was deleted (either becuase no matching element was
514 found, or because TESTFUN returned nil).
516 LOOKUPFUN should return the element from the associative array
517 that matches ELEMENT, or NILFLAG if no matching element exists.
519 MAPFUN should map FUNCTION over all elements in the order defined by
520 COMPARISON-FUNCTION, or in reverse order if REVERSE is non-nil.
523 STACK-CREATEFUN, STACK-POPFUN and STACK-EMPTYFUN should allow the
524 associative array to be used as a stack. STACK-CREATEFUN is
527 (STACK-CREATEFUN array)
529 and should return a data structure (\"STACK\") that behaves like
530 a sorted stack of all elements in the associative array. I.e.
535 should return elements from the associative array in the order
536 defined by COMPARISON-FUNCTION, and
538 (STACK-EMPTYFUN stack)
540 should return non-nil if the stack is empty, nil otherwise.
542 The stack functions are optional, in that all trie operations
543 other than the stack-related ones will work correctly. However,
544 any code that makes use of trie-stacks will complain if supplied
545 with this type of trie.
548 The :TRANSFORM-FOR-PRINT and :TRANSFORM-FROM-READ arguments are
549 optional. If supplied, they can be used to transform the trie
550 into a format suitable for passing to Elisp's `print'
551 functions (typically used to persistently store the trie by
552 writing it to file), and transform from that format back to the
553 original usable form.
556 Warning: to avoid nasty dynamic scoping bugs, the supplied
557 functions must *never* bind any variables with names commencing
562 (defalias 'trie-create-custom 'make-trie-custom)
566 (defalias 'trie-comparison-function 'trie--comparison-function
567 "Return the comparison function for TRIE.")
570 (defalias 'trie-p 'trie--p
571 "Return t if argument is a trie, nil otherwise.")
574 (defun trie-empty (trie)
575 "Return t if the TRIE is empty, nil otherwise."
576 (trie-transform-from-read-warn trie)
577 (funcall (trie--emptyfun trie)
578 (trie--node-subtree (trie--root trie))))
581 (defun trie-construct-sortfun (cmpfun &optional reverse)
582 "Construct function to compare key sequences, based on a CMPFUN
583 that compares individual elements of the sequence. Order is
584 reversed if REVERSE is non-nil."
589 (dotimes (i (min (length a) (length b)))
590 (cond ((,cmpfun (elt b i) (elt a i))
592 ((,cmpfun (elt a i) (elt b i))
593 (throw 'compared nil))))
594 (< (length a) (length b)))))
598 (dotimes (i (min (length a) (length b)))
599 (cond ((,cmpfun (elt a i) (elt b i))
601 ((,cmpfun (elt b i) (elt a i))
602 (throw 'compared nil))))
603 (< (length a) (length b)))))))
607 ;; ----------------------------------------------------------------
610 (defun trie-insert (trie key &optional data updatefun)
611 "Associate DATA with KEY in TRIE.
613 If KEY already exists in TRIE, then DATA replaces the existing
614 association, unless UPDATEFUN is supplied. Note that if DATA is
615 *not* supplied, this means that the existing association of KEY
616 will be replaced by nil.
618 If UPDATEFUN is supplied and KEY already exists in TRIE,
619 UPDATEFUN is called with two arguments: DATA and the existing
620 association of KEY. Its return value becomes the new association
623 Returns the new association of KEY.
625 Note: to avoid nasty dynamic scoping bugs, UPDATEFUN must *not*
626 bind any variables with names commencing \"--\"."
628 ;; convert trie from print-form if necessary
629 (trie-transform-from-read-warn trie)
631 ;; absurd variable names are an attempt to avoid dynamic scoping bugs
632 (let ((--trie-insert--updatefun updatefun)
633 --trie-insert--old-node-flag
634 (node (trie--root trie))
637 ;; Descend trie, adding nodes for non-existent elements of KEY. The
638 ;; update function passed to `trie--insertfun' ensures that existing
639 ;; nodes are left intact.
640 (while (< (incf i) len)
641 (setq --trie-insert--old-node-flag nil)
642 (setq node (funcall (trie--insertfun trie)
643 (trie--node-subtree node)
644 (trie--node-create (elt key i) key trie)
646 (setq --trie-insert--old-node-flag t) b))))
647 ;; Create or update data node.
648 (setq node (funcall (trie--insertfun trie)
649 (trie--node-subtree node)
650 (trie--node-create-data data)
651 ;; if using existing data node, wrap UPDATEFUN
652 ;; if any was supplied
653 (when (and --trie-insert--old-node-flag
654 --trie-insert--updatefun)
656 (setf (trie--node-data old)
657 (funcall --trie-insert--updatefun
658 (trie--node-data new)
659 (trie--node-data old)))
661 (trie--node-data node))) ; return new data
665 ;; ----------------------------------------------------------------
668 (defun trie-delete (trie key &optional test)
669 "Delete KEY and its associated data from TRIE.
671 If KEY was deleted, a cons cell containing KEY and its
672 association is returned. Returns nil if KEY does not exist in
675 If TEST is supplied, it should be a function that accepts two
676 arguments: the key being deleted, and its associated data. The
677 key will then only be deleted if TEST returns non-nil.
679 Note: to avoid nasty dynamic scoping bugs, TEST must *not* bind
680 any variables with names commencing \"--\"."
681 ;; convert trie from print-form if necessary
682 (trie-transform-from-read-warn trie)
683 ;; set up deletion (real work is done by `trie--do-delete'
684 (let (--trie-deleted--node
685 (--trie-delete--key key))
686 (declare (special --trie-deleted--node)
687 (special --trie-delete--key))
688 (trie--do-delete (trie--root trie) key test
689 (trie--deletefun trie)
690 (trie--emptyfun trie)
692 (when --trie-deleted--node
693 (cons key (trie--node-data --trie-deleted--node)))))
696 (defun trie--do-delete (node --trie--do-delete--seq
697 --trie--do-delete--test
698 --trie--do-delete--deletefun
699 --trie--do-delete--emptyfun
700 --trie--do-delete--cmpfun)
701 ;; Delete --TRIE--DO-DELETE--SEQ starting from trie node NODE, and
702 ;; return non-nil if we deleted a node. If --TRIE--DO-DELETE--TEST is
703 ;; supplied, it is called with two arguments, the key being deleted
704 ;; and the associated data, and the deletion is only carried out if it
707 ;; The absurd argument names are to lessen the likelihood of dynamical
708 ;; scoping bugs caused by a supplied function binding a variable with
709 ;; the same name as one of the arguments, which would cause a nasty
710 ;; bug when the lambda's (below) are called.
711 (declare (special --trie-deleted--node)
712 (special --trie-delete--key))
713 ;; if --TRIE--DO-DELETE--SEQ is empty, try to delete data node and
714 ;; return non-nil if we did (return value of
715 ;; --TRIE--DO-DELETE--DELETEFUN is the deleted data, which is always
716 ;; non-nil for a trie)
717 (if (= (length --trie--do-delete--seq) 0)
718 (setq --trie-deleted--node
719 (funcall --trie--do-delete--deletefun
720 (trie--node-subtree node)
721 (trie--node-create-dummy trie--terminator)
722 (when --trie--do-delete--test
724 (funcall --trie--do-delete--test
725 --trie-delete--key (trie--node-data n))))
727 ;; otherwise, delete on down (return value of
728 ;; --TRIE--DO-DELETE--DELETEFUN is the deleted data, which is always
729 ;; non-nil for a trie)
730 (funcall --trie--do-delete--deletefun
731 (trie--node-subtree node)
732 (trie--node-create-dummy (elt --trie--do-delete--seq 0))
734 (and (trie--do-delete
735 n (trie--subseq --trie--do-delete--seq 1)
736 --trie--do-delete--test
737 --trie--do-delete--deletefun
738 --trie--do-delete--emptyfun
739 --trie--do-delete--cmpfun)
740 (funcall --trie--do-delete--emptyfun
741 (trie--node-subtree n))))
746 ;; ----------------------------------------------------------------
749 (defun trie-lookup (trie key &optional nilflag)
750 "Return the data associated with KEY in the TRIE,
751 or nil if KEY does not exist in TRIE.
753 Optional argument NILFLAG specifies a value to return instead of
754 nil if KEY does not exist in TRIE. This allows a non-existent KEY
755 to be distinguished from an element with a null association. (See
756 also `trie-member-p', which does this for you.)"
757 ;; convert trie from print-form if necessary
758 (trie-transform-from-read-warn trie)
759 ;; find node corresponding to key, then find data node, then return
762 (or (and (setq node (trie--node-find (trie--root trie) key
763 (trie--lookupfun trie)))
764 (trie--find-data node (trie--lookupfun trie)))
767 (defalias 'trie-member 'trie-lookup)
770 (defun trie-member-p (trie key)
771 "Return t if KEY exists in TRIE, nil otherwise."
772 ;; convert trie from print-form if necessary
773 (trie-transform-from-read-warn trie)
775 (not (eq flag (trie-member trie key flag)))))
780 ;;; ================================================================
781 ;;; Mapping over tries
783 (defun trie--mapc (--trie--mapc--function --trie--mapc--mapfun
784 --trie--mapc--root --trie--mapc--seq
785 &optional --trie--mapc--reverse)
786 ;; Apply TRIE--MAPC--FUNCTION to all elements in a trie beneath
787 ;; TRIE--MAPC--ROOT, which should correspond to the sequence
788 ;; TRIE--MAPC--SEQ. TRIE--MAPC--FUNCTION is passed two arguments: the
789 ;; trie node itself and the sequence it corresponds to. It is applied
790 ;; in ascending order, or descending order if TRIE--MAPC--REVERSE is
793 ;; The absurd argument names are to lessen the likelihood of dynamical
794 ;; scoping bugs caused by a supplied function binding a variable with
795 ;; the same name as one of the arguments.
798 (lambda (--trie--mapc--node)
799 ;; data node: apply function
800 (if (trie--node-data-p --trie--mapc--node)
801 (funcall --trie--mapc--function
804 ;; internal node: append split value to seq and keep descending
805 (trie--mapc --trie--mapc--function
809 (copy-sequence --trie--mapc--seq)
810 (trie--node-split --trie--mapc--node))
811 --trie--mapc--reverse)))
812 ;; --TRIE--MAPC--MAPFUN target
813 (trie--node-subtree --trie--mapc--root)
814 --trie--mapc--reverse))
817 (defun trie-mapc-internal (function trie &optional type)
818 "Apply FUNCTION to all internal associative arrays within TRIE.
819 FUNCTION is passed two arguments: an associative array, and the
820 sequence it corresponds to.
822 Optional argument TYPE (one of the symbols vector, lisp or
823 string) sets the type of sequence passed to FUNCTION. Defaults to
825 (trie--mapc-internal function (trie--mapfun trie) (trie--root trie)
826 (cond ((eq type 'string) "")
831 (defun trie--mapc-internal (--trie--mapc-internal--function
832 --trie--mapc-internal--mapfun
833 --trie--mapc-internal--root
834 --trie--mapc-internal--seq)
836 --trie--mapc-internal--mapfun
837 (lambda (--trie--mapc-internal--node)
839 (unless (trie--node-data-p --trie--mapc-internal--node)
840 (funcall --trie--mapc-internal--function
841 (trie--node-subtree --trie--mapc-internal--node)
842 --trie--mapc-internal--seq)
844 --trie--mapc-internal--function
845 --trie--mapc-internal--mapfun
846 --trie--mapc-internal--node
848 (copy-sequence --trie--mapc-internal--seq)
849 (trie--node-split --trie--mapc-internal--node)))))
850 (trie--node-subtree --trie--mapc-internal--root)))
853 (defun trie-map (function trie &optional type reverse)
854 "Modify all elements in TRIE by applying FUNCTION to them.
856 FUNCTION should take two arguments: a sequence stored in the trie
857 and its associated data. Its return value replaces the existing
860 Optional argument TYPE (one of the symbols vector, lisp or
861 string) sets the type of sequence passed to FUNCTION. Defaults to
864 FUNCTION is applied in ascending order, or descending order if
867 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
868 bind any variables with names commencing \"--\"."
869 ;; convert from print-form if necessary
870 (trie-transform-from-read-warn trie)
871 ;; map FUNCTION over TRIE
872 (let ((--trie-map--function function)) ; avoid dynamic scoping bugs
875 (setf (trie--node-data node)
876 (funcall --trie-map--function seq (trie--node-data node))))
879 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
883 (defun trie-mapc (function trie &optional type reverse)
884 "Apply FUNCTION to all elements in TRIE for side effect only.
886 FUNCTION should take two arguments: a sequence stored in the trie
887 and its associated data.
889 Optional argument TYPE (one of the symbols vector, lisp or
890 string) sets the type of sequence passed to FUNCTION. Defaults to
893 FUNCTION is applied in ascending order, or descending order if
896 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
897 bind any variables with names commencing \"--\"."
898 ;; convert from print-form if necessary
899 (trie-transform-from-read-warn trie)
900 ;; map FUNCTION over TRIE
901 (let ((--trie-mapc--function function)) ; avoid dynamic scoping bugs
904 (funcall --trie-mapc--function seq (trie--node-data node)))
907 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
911 (defun trie-mapf (function combinator trie &optional type reverse)
912 "Apply FUNCTION to all elements in TRIE, and combine the results
915 FUNCTION should take two arguments: a sequence stored in the
916 trie, and its associated data.
918 Optional argument TYPE (one of the symbols vector, lisp or
919 string; defaults to vector) sets the type of sequence passed to
920 FUNCTION. If TYPE is 'string, it must be possible to apply the
921 function `string' to the individual elements of key sequences
924 The FUNCTION is applied and the results combined in ascending
925 order, or descending order if REVERSE is non-nil.
927 Note: to avoid nasty dynamic scoping bugs, FUNCTION and
928 COMBINATOR must *not* bind any variables with names
930 ;; convert from print-form if necessary
931 (trie-transform-from-read-warn trie)
932 ;; map FUNCTION over TRIE, combining results with COMBINATOR
933 (let ((--trie-mapf--function function) ; avoid dynamic scoping bugs
934 --trie-mapf--accumulate)
937 (setq --trie-mapf--accumulate
939 (funcall --trie-mapf--function
940 seq (trie--node-data node))
941 --trie-mapf--accumulate)))
944 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
946 --trie-mapf--accumulate))
949 (defun trie-mapcar (function trie &optional type reverse)
950 "Apply FUNCTION to all elements in TRIE,
951 and make a list of the results.
953 FUNCTION should take two arguments: a sequence stored in the trie
954 and its associated data.
956 Optional argument TYPE (one of the symbols vector, lisp or
957 string) sets the type of sequence passed to FUNCTION. Defaults to
960 The FUNCTION is applied and the list constructed in ascending
961 order, or descending order if REVERSE is non-nil.
963 Note that if you don't care about the order in which FUNCTION is
964 applied, just that the resulting list is in the correct order,
967 (trie-mapf function 'cons trie type (not reverse))
971 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
972 bind any variables with names commencing \"--\"."
973 ;; convert from print-form if necessary
974 (trie-transform-from-read-warn trie)
975 ;; map FUNCTION over TRIE and accumulate in a list
976 (nreverse (trie-mapf function 'cons trie type reverse)))
981 ;;; ================================================================
982 ;;; Using tries as stacks
984 (defstruct (trie--stack
993 (comparison-function (trie--comparison-function trie))
994 (lookupfun (trie--lookupfun trie))
995 (stack-createfun (trie--stack-createfun trie))
996 (stack-popfun (trie--stack-popfun trie))
997 (stack-emptyfun (trie--stack-emptyfun trie))
998 (repopulatefun 'trie--stack-repopulate)
1000 (if (trie-empty trie)
1002 (trie--stack-repopulate
1004 (cond ((eq type 'list) ())
1005 ((eq type 'string) "")
1009 (trie--node-subtree (trie--root trie))
1012 comparison-function lookupfun
1013 stack-createfun stack-popfun stack-emptyfun)))
1017 trie--completion-stack-create
1022 (comparison-function (trie--comparison-function trie))
1023 (lookupfun (trie--lookupfun trie))
1024 (stack-createfun (trie--stack-createfun trie))
1025 (stack-popfun (trie--stack-popfun trie))
1026 (stack-emptyfun (trie--stack-emptyfun trie))
1027 (repopulatefun 'trie--stack-repopulate)
1028 (store (trie--completion-stack-construct-store
1029 trie prefix reverse))
1033 trie--regexp-stack-create
1038 (comparison-function (trie--comparison-function trie))
1039 (lookupfun (trie--lookupfun trie))
1040 (stack-createfun (trie--stack-createfun trie))
1041 (stack-popfun (trie--stack-popfun trie))
1042 (stack-emptyfun (trie--stack-emptyfun trie))
1043 (repopulatefun 'trie--regexp-stack-repopulate)
1044 (store (trie--regexp-stack-construct-store
1045 trie regexp reverse))
1049 reverse comparison-function lookupfun
1050 stack-createfun stack-popfun stack-emptyfun
1051 repopulatefun store pushed)
1054 (defun trie-stack (trie &optional type reverse)
1055 "Return an object that allows TRIE to be accessed as a stack.
1057 The stack is sorted in \"lexical\" order, i.e. the order defined
1058 by the trie's comparison function, or in reverse order if REVERSE
1059 is non-nil. Calling `trie-stack-pop' pops the top element (a key
1060 and its associated data) from the stack.
1062 Optional argument TYPE (one of the symbols vector, lisp or
1063 string) sets the type of sequence used for the keys.
1065 Note that any modification to TRIE *immediately* invalidates all
1066 trie-stacks created before the modification (in particular,
1067 calling `trie-stack-pop' will give unpredictable results).
1069 Operations on trie-stacks are significantly more efficient than
1070 constructing a real stack from the trie and using standard stack
1071 functions. As such, they can be useful in implementing efficient
1072 algorithms on tries. However, in cases where mapping functions
1073 `trie-mapc', `trie-mapcar' or `trie-mapf' would be sufficient, it
1074 is better to use one of those instead."
1075 ;; convert trie from print-form if necessary
1076 (trie-transform-from-read-warn trie)
1077 ;; if stack functions aren't defined for trie type, throw error
1078 (if (not (functionp (trie--stack-createfun trie)))
1079 (error "Trie type does not support stack operations")
1080 ;; otherwise, create and initialise a stack
1081 (trie--stack-create trie type reverse)))
1084 (defun trie-stack-pop (trie-stack &optional nilflag)
1085 "Pop the first element from TRIE-STACK.
1087 Returns nil if the stack is empty, or NILFLAG if specified. (The
1088 latter allows an empty stack to be distinguished from a null
1089 element stored in the trie.)"
1090 ;; return nilflag if stack is empty
1091 (if (trie-stack-empty-p trie-stack)
1093 ;; if elements have been pushed onto the stack, pop those first
1094 (if (trie--stack-pushed trie-stack)
1095 (pop (trie--stack-pushed trie-stack))
1096 ;; otherwise, pop first element from trie-stack and repopulate it
1098 (pop (trie--stack-store trie-stack))
1099 (setf (trie--stack-store trie-stack)
1100 (funcall (trie--stack-repopulatefun trie-stack)
1101 (trie--stack-store trie-stack)
1102 (trie--stack-reverse trie-stack)
1103 (trie--stack-comparison-function trie-stack)
1104 (trie--stack-lookupfun trie-stack)
1105 (trie--stack-stack-createfun trie-stack)
1106 (trie--stack-stack-popfun trie-stack)
1107 (trie--stack-stack-emptyfun trie-stack)))))))
1110 (defun trie-stack-push (element trie-stack)
1111 "Push ELEMENT onto TRIE-STACK."
1112 (push element (trie--stack-pushed trie-stack)))
1115 (defun trie-stack-first (trie-stack &optional nilflag)
1116 "Return the first element from TRIE-STACK, without removing it
1119 Returns nil if the stack is empty, or NILFLAG if specified. (The
1120 latter allows an empty stack to be distinguished from a null
1121 element stored in the trie.)"
1122 ;; return nilflag if stack is empty
1123 (if (trie-stack-empty-p trie-stack)
1125 ;; if elements have been pushed onto the stack, return first of
1127 (if (trie--stack-pushed trie-stack)
1128 (car (trie--stack-pushed trie-stack))
1129 ;; otherwise, return first element from trie-stack
1130 (car (trie--stack-store trie-stack)))))
1133 (defalias 'trie-stack-p 'trie--stack-p
1134 "Return t if argument is a trie-stack, nil otherwise.")
1137 (defun trie-stack-empty-p (trie-stack)
1138 "Return t if TRIE-STACK is empty, nil otherwise."
1139 (and (null (trie--stack-store trie-stack))
1140 (null (trie--stack-pushed trie-stack))))
1143 (defun trie--stack-repopulate
1144 (store reverse comparison-function lookupfun
1145 stack-createfun stack-popfun stack-emptyfun)
1146 ;; Recursively push children of the node at the head of STORE onto the
1147 ;; front of STORE, until a data node is reached.
1149 ;; nothing to do if stack is empty
1151 (let ((node (funcall stack-popfun (cdar store)))
1153 (when (funcall stack-emptyfun (cdar store))
1154 ;; (pop store) here produces irritating compiler warnings
1155 (setq store (cdr store)))
1157 (while (not (trie--node-data-p node))
1159 (cons (trie--seq-append seq (trie--node-split node))
1160 (funcall stack-createfun
1161 (trie--node-subtree node) reverse))
1163 (setq node (funcall stack-popfun (cdar store))
1165 (when (funcall stack-emptyfun (cdar store))
1166 ;; (pop store) here produces irritating compiler warnings
1167 (setq store (cdr store))))
1169 (push (cons seq (trie--node-data node)) store))))
1174 ;; ================================================================
1175 ;; Query-building utility macros
1177 ;; Implementation Note
1178 ;; -------------------
1179 ;; For queries ranked in anything other than lexical order, we use a
1180 ;; partial heap-sort to find the k=MAXNUM highest ranked matches among
1181 ;; the n possibile matches. This has worst-case time complexity
1182 ;; O(n log k), and is both simple and elegant. An optimal algorithm
1183 ;; (e.g. partial quick-sort discarding the irrelevant partition at each
1184 ;; step) would have complexity O(n + k log k), but is probably not worth
1185 ;; the extra coding effort, and would have worse space complexity unless
1186 ;; coded to work "in-place", which would be highly non-trivial. (I
1187 ;; haven't done any benchmarking, though, so feel free to do so and let
1188 ;; me know the results!)
1190 (defmacro trie--construct-accumulator (maxnum filter resultfun)
1191 ;; Does what it says on the tin! | sed -e 's/tin/macro name/'
1193 ;; filter, maxnum, resultfun
1194 ((and ,filter ,maxnum ,resultfun)
1196 (when (funcall ,filter seq data)
1197 (aset trie--accumulate 0
1198 (cons (funcall ,resultfun seq data)
1199 (aref trie--accumulate 0)))
1200 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1201 (throw 'trie-accumulate--done nil)))))
1202 ;; filter, maxnum, !resultfun
1203 ((and ,filter ,maxnum (not ,resultfun))
1205 (when (funcall ,filter seq data)
1206 (aset trie--accumulate 0
1207 (cons (cons seq data)
1208 (aref trie--accumulate 0)))
1209 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1210 (throw 'trie-accumulate--done nil)))))
1211 ;; filter, !maxnum, resultfun
1212 ((and ,filter (not ,maxnum) ,resultfun)
1214 (when (funcall ,filter seq data)
1215 (aset trie--accumulate 0
1216 (cons (funcall ,resultfun seq data)
1217 (aref trie--accumulate 0))))))
1218 ;; filter, !maxnum, !resultfun
1219 ((and ,filter (not ,maxnum) (not ,resultfun))
1221 (when (funcall ,filter seq data)
1222 (aset trie--accumulate 0
1223 (cons (cons seq data)
1224 (aref trie--accumulate 0))))))
1225 ;; !filter, maxnum, resultfun
1226 ((and (not ,filter) ,maxnum ,resultfun)
1228 (aset trie--accumulate 0
1229 (cons (funcall ,resultfun seq data)
1230 (aref trie--accumulate 0)))
1231 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1232 (throw 'trie-accumulate--done nil))))
1233 ;; !filter, maxnum, !resultfun
1234 ((and (not ,filter) ,maxnum (not ,resultfun))
1236 (aset trie--accumulate 0
1237 (cons (cons seq data)
1238 (aref trie--accumulate 0)))
1239 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1240 (throw 'trie-accumulate--done nil))))
1241 ;; !filter, !maxnum, resultfun
1242 ((and (not ,filter) (not ,maxnum) ,resultfun)
1244 (aset trie--accumulate 0
1245 (cons (funcall ,resultfun seq data)
1246 (aref trie--accumulate 0)))))
1247 ;; !filter, !maxnum, !resultfun
1248 ((and (not ,filter) (not ,maxnum) (not ,resultfun))
1250 (aset trie--accumulate 0
1251 (cons (cons seq data)
1252 (aref trie--accumulate 0)))))
1257 (defmacro trie--construct-ranked-accumulator (maxnum filter)
1258 ;; Does what it says on the tin! | sed -e 's/tin/macro name/'
1261 ((and ,filter ,maxnum)
1263 (when (funcall ,filter seq data)
1264 (heap-add trie--accumulate (cons seq data))
1265 (and (> (heap-size trie--accumulate) ,maxnum)
1266 (heap-delete-root trie--accumulate)))))
1268 ((and ,filter (not ,maxnum))
1270 (when (funcall ,filter seq data)
1271 (heap-add trie--accumulate (cons seq data)))))
1273 ((and (not ,filter) ,maxnum)
1275 (heap-add trie--accumulate (cons seq data))
1276 (and (> (heap-size trie--accumulate) ,maxnum)
1277 (heap-delete-root trie--accumulate))))
1279 ((and (not ,filter) (not ,maxnum))
1281 (heap-add trie--accumulate (cons seq data))))))
1285 (defmacro trie--accumulate-results
1286 (rankfun maxnum reverse filter resultfun accfun duplicates &rest body)
1287 ;; Accumulate results of running BODY code, and return them in
1288 ;; appropriate order. BODY should call ACCFUN to accumulate a result,
1289 ;; passing it two arguments: a trie data node, and the corresponding
1290 ;; sequence. BODY can throw 'trie-accumulate--done to terminate the
1291 ;; accumulation and return the results. A non-null DUPLICATES flag
1292 ;; signals that the accumulated results might contain duplicates,
1293 ;; which should be deleted. Note that DUPLICATES is ignored if RANKFUN
1294 ;; is null. The other arguments should be passed straight through from
1295 ;; the query function.
1297 ;; rename functions to help avoid dynamic-scoping bugs
1298 `(let* ((--trie-accumulate--rankfun ,rankfun)
1299 (--trie-accumulate--filter ,filter)
1300 (--trie-accumulate--resultfun ,resultfun)
1301 ;; construct structure in which to accumulate results
1304 (heap-create ; heap order is inverse of rank order
1307 (funcall --trie-accumulate--rankfun a b))
1309 (not (funcall --trie-accumulate--rankfun a b))))
1310 (when ,maxnum (1+ ,maxnum)))
1311 (make-vector 1 nil)))
1312 ;; construct function to accumulate completions
1315 (trie--construct-ranked-accumulator
1316 ,maxnum --trie-accumulate--filter)
1317 (trie--construct-accumulator
1318 ,maxnum --trie-accumulate--filter
1319 --trie-accumulate--resultfun))))
1321 ;; accumulate results
1322 (catch 'trie-accumulate--done ,@body)
1324 ;; return list of completions
1326 ;; for a ranked query, extract completions from heap
1329 ;; check for and delete duplicates if flag is set
1331 (while (not (heap-empty trie--accumulate))
1332 (if (equal (car (heap-root trie--accumulate))
1334 (heap-delete-root trie--accumulate)
1335 (push (heap-delete-root trie--accumulate)
1337 ;; skip duplicate checking if flag is not set
1338 (while (not (heap-empty trie--accumulate))
1340 (let ((res (heap-delete-root trie--accumulate)))
1341 (push (funcall ,resultfun (car res) (cdr res))
1343 (push (heap-delete-root trie--accumulate)
1347 ;; for lexical query, reverse result list if MAXNUM supplied
1348 (,maxnum (nreverse (aref trie--accumulate 0)))
1349 ;; otherwise, just return list
1350 (t (aref trie--accumulate 0)))))
1355 ;; ================================================================
1358 (defun trie-complete
1359 (trie prefix &optional rankfun maxnum reverse filter resultfun)
1360 "Return an alist containing all completions of PREFIX in TRIE
1361 along with their associated data, in the order defined by
1362 RANKFUN, defaulting to \"lexical\" order (i.e. the order defined
1363 by the trie's comparison function). If REVERSE is non-nil, the
1364 completions are sorted in the reverse order. Returns nil if no
1365 completions are found.
1367 PREFIX must be a sequence (vector, list or string) containing
1368 elements of the type used to reference data in the trie. (If
1369 PREFIX is a string, it must be possible to apply `string' to
1370 individual elements of the sequences stored in the trie.) The
1371 completions returned in the alist will be sequences of the same
1372 type as KEY. If PREFIX is a list of sequences, completions of all
1373 sequences in the list are included in the returned alist. All
1374 sequences in the list must be of the same type.
1376 The optional integer argument MAXNUM limits the results to the
1377 first MAXNUM completions. Otherwise, all completions are
1380 If specified, RANKFUN must accept two arguments, both cons
1381 cells. The car contains a sequence from the trie (of the same
1382 type as PREFIX), the cdr contains its associated data. It should
1383 return non-nil if first argument is ranked strictly higher than
1384 the second, nil otherwise.
1386 The FILTER argument sets a filter function for the
1387 completions. If supplied, it is called for each possible
1388 completion with two arguments: the completion, and its associated
1389 data. If the filter function returns nil, the completion is not
1390 included in the results, and does not count towards MAXNUM.
1392 RESULTFUN defines a function used to process results before
1393 adding them to the final result list. If specified, it should
1394 accept two arguments: a key and its associated data. It's return
1395 value is what gets added to the final result list, instead of the
1396 default key-data cons cell."
1398 ;; convert trie from print-form if necessary
1399 (trie-transform-from-read-warn trie)
1400 ;; wrap prefix in a list if necessary
1401 ;; FIXME: the test for a list of prefixes, below, will fail if the
1402 ;; PREFIX sequence is a list, and the elements of PREFIX are
1403 ;; themselves lists (there might be no easy way to fully fix
1405 (if (or (atom prefix)
1406 (and (listp prefix) (not (sequencep (car prefix)))))
1407 (setq prefix (list prefix))
1408 ;; sort list of prefixes if sorting completions lexically
1409 (when (null rankfun)
1411 (sort prefix (trie-construct-sortfun
1412 (trie--comparison-function trie))))))
1414 ;; accumulate completions
1416 (declare (special accumulator))
1417 (trie--accumulate-results
1418 rankfun maxnum reverse filter resultfun accumulator nil
1420 (setq node (trie--node-find (trie--root trie) pfx
1421 (trie--lookupfun trie)))
1425 (funcall accumulator seq (trie--node-data node)))
1426 (trie--mapfun trie) node pfx
1427 (if maxnum reverse (not reverse)))))
1433 (defun trie-complete-stack (trie prefix &optional reverse)
1434 "Return an object that allows completions of PREFIX to be accessed
1435 as if they were a stack.
1437 The stack is sorted in \"lexical\" order, i.e. the order defined
1438 by TRIE's comparison function, or in reverse order if REVERSE is
1439 non-nil. Calling `trie-stack-pop' pops the top element (a key and
1440 its associated data) from the stack.
1442 PREFIX must be a sequence (vector, list or string) that forms the
1443 initial part of a TRIE key, or a list of such sequences. (If
1444 PREFIX is a string, it must be possible to apply `string' to
1445 individual elements of TRIE keys.) The completions returned in
1446 the alist will be sequences of the same type as KEY. If PREFIX is
1447 a list of sequences, completions of all sequences in the list are
1448 included in the stack. All sequences in the list must be of the
1451 Note that any modification to TRIE *immediately* invalidates all
1452 trie-stacks created before the modification (in particular,
1453 calling `trie-stack-pop' will give unpredictable results).
1455 Operations on trie-stacks are significantly more efficient than
1456 constructing a real stack from completions of PREFIX in TRIE and
1457 using standard stack functions. As such, they can be useful in
1458 implementing efficient algorithms on tries. However, in cases
1459 where `trie-complete' or `trie-complete-ordered' is sufficient,
1460 it is better to use one of those instead."
1461 ;; convert trie from print-form if necessary
1462 (trie-transform-from-read-warn trie)
1463 ;; if stack functions aren't defined for trie type, throw error
1464 (if (not (functionp (trie--stack-createfun trie)))
1465 (error "Trie type does not support stack operations")
1466 ;; otherwise, create and initialise a stack
1467 (trie--completion-stack-create trie prefix reverse)))
1470 (defun trie--completion-stack-construct-store (trie prefix reverse)
1471 ;; Construct store for completion stack based on TRIE.
1473 (if (or (atom prefix)
1475 (not (sequencep (car prefix)))))
1476 (setq prefix (list prefix))
1479 (trie-construct-sortfun
1480 (trie--comparison-function trie)
1482 (dolist (pfx prefix)
1483 (when (setq node (trie--node-find (trie--root trie) pfx
1484 (trie--lookupfun trie)))
1485 (push (cons pfx (funcall (trie--stack-createfun trie)
1486 (trie--node-subtree node)
1489 (trie--stack-repopulate
1491 (trie--comparison-function trie)
1492 (trie--lookupfun trie)
1493 (trie--stack-createfun trie)
1494 (trie--stack-popfun trie)
1495 (trie--stack-emptyfun trie))))
1500 ;; ================================================================
1503 (defun trie-regexp-search
1504 (trie regexp &optional rankfun maxnum reverse filter resultfun type)
1505 "Return an alist containing all matches for REGEXP in TRIE
1506 along with their associated data, in the order defined by
1507 RANKFUN, defauling to \"lexical\" order (i.e. the order defined
1508 by the trie's comparison function). If REVERSE is non-nil, the
1509 completions are sorted in the reverse order. Returns nil if no
1510 completions are found.
1512 REGEXP is a regular expression, but it need not necessarily be a
1513 string. It must be a sequence (vector, list of string) whose
1514 elements are either elements of the same type as elements of the
1515 trie keys (which behave as literals in the regexp), or any of the
1516 usual regexp special characters and backslash constructs. If
1517 REGEXP is a string, it must be possible to apply `string' to
1518 individual elements of the keys stored in the trie. The matches
1519 returned in the alist will be sequences of the same type as KEY.
1521 Only a subset of the full Emacs regular expression syntax is
1522 supported. There is no support for regexp constructs that are
1523 only meaningful for strings (character ranges and character
1524 classes inside character alternatives, and syntax-related
1525 backslash constructs). Back-references and non-greedy postfix
1526 operators are not supported, so `?' after a postfix operator
1527 loses its special meaning. Also, matches are always anchored, so
1528 `$' and `^' lose their special meanings (use `.*' at the
1529 beginning and end of the regexp to get an unanchored match).
1531 If the regexp contains any non-shy grouping constructs, subgroup
1532 match data is included in the results. In this case, the car of
1533 each match is no longer just a key. Instead, it is a list whose
1534 first element is the matching key, and whose remaining elements
1535 are cons cells whose cars and cdrs give the start and end indices
1536 of the elements that matched the corresponding groups, in order.
1538 The optional integer argument MAXNUM limits the results to the
1539 first MAXNUM matches. Otherwise, all matches are returned.
1541 If specified, RANKFUN must accept two arguments, both cons
1542 cells. The car contains a sequence from the trie (of the same
1543 type as PREFIX), the cdr contains its associated data. It should
1544 return non-nil if first argument is ranked strictly higher than
1545 the second, nil otherwise.
1547 The FILTER argument sets a filter function for the matches. If
1548 supplied, it is called for each possible match with two
1549 arguments: the matching key, and its associated data. If the
1550 filter function returns nil, the match is not included in the
1551 results, and does not count towards MAXNUM.
1553 RESULTFUN defines a function used to process results before
1554 adding them to the final result list. If specified, it should
1555 accept two arguments: a key and its associated data. It's return
1556 value is what gets added to the final result list, instead of the
1557 default key-data cons cell."
1559 ;; convert trie from print-form if necessary
1560 (trie-transform-from-read-warn trie)
1562 ;; massage rankfun to cope with grouping data
1563 ;; FIXME: could skip this if REGEXP contains no grouping constructs
1567 ;; if car of argument contains a key+group list rather than
1568 ;; a straight key, remove group list
1569 ;; FIXME: the test for straight key, below, will fail if
1570 ;; the key is a list, and the first element of the
1571 ;; key is itself a list (there might be no easy way
1572 ;; to fully fix this...)
1573 (unless (or (atom (car a))
1574 (and (listp (car a))
1575 (not (sequencep (caar a)))))
1576 (setq a (cons (caar a) (cdr a))))
1577 (unless (or (atom (car b))
1578 (and (listp (car b))
1579 (not (sequencep (caar b)))))
1580 (setq b (cons (caar b) (cdr b))))
1581 ;; call rankfun on massaged arguments
1584 ;; accumulate completions
1585 (declare (special accumulator))
1586 (trie--accumulate-results
1587 rankfun maxnum reverse filter resultfun accumulator nil
1588 (trie--do-regexp-search
1590 (tNFA-from-regexp regexp :test (trie--construct-equality-function
1591 (trie--comparison-function trie)))
1592 (cond ((stringp regexp) "") ((listp regexp) ()) (t [])) 0
1593 (or (and maxnum reverse) (and (not maxnum) (not reverse)))
1594 (trie--comparison-function trie)
1595 (trie--lookupfun trie)
1596 (trie--mapfun trie))))
1600 (defun trie--do-regexp-search
1601 (--trie--regexp-search--node tNFA seq pos reverse
1602 comparison-function lookupfun mapfun)
1603 ;; Search everything below the node --TRIE--REGEXP-SEARCH-NODE for
1604 ;; matches to the regexp encoded in tNFA. SEQ is the sequence
1605 ;; corresponding to NODE, POS is it's length. REVERSE is the usual
1606 ;; query argument, and the remaining arguments are the corresponding
1608 (declare (special accumulator))
1610 ;; if NFA has matched and we're accumulating in normal order, check if
1611 ;; trie contains current string
1612 (when (and (not reverse) (tNFA-match-p tNFA))
1614 (when (setq node (trie--find-data-node
1615 --trie--regexp-search--node lookupfun))
1616 (setq groups (tNFA-group-data tNFA))
1617 (funcall accumulator
1618 (if groups (cons seq groups) seq)
1619 (trie--node-data node)))))
1623 ;; ((trie--node-data-p --trie--regexp-search--node)
1624 ;; (when (tNFA-match-p tNFA)
1625 ;; (let ((groups (tNFA-group-data tNFA)))
1626 ;; (funcall accumulator
1627 ;; (if groups (cons seq groups) seq)
1628 ;; (trie--node-data --trie--regexp-search--node)))))
1630 ;; wildcard transition: map over all nodes in subtree
1631 ((tNFA-wildcard-p tNFA)
1635 (unless (trie--node-data-p node)
1636 ;; (when (tNFA-match-p tNFA)
1637 ;; (setq groups (tNFA-group-data tNFA))
1638 ;; (funcall accumulator
1639 ;; (if groups (cons seq groups) seq)
1640 ;; (trie--node-data node)))
1641 (when (setq state (tNFA-next-state
1642 tNFA (trie--node-split node) pos))
1643 (trie--do-regexp-search
1645 (trie--seq-append seq (trie--node-split node))
1646 (1+ pos) reverse comparison-function
1647 lookupfun mapfun))))
1648 (trie--node-subtree --trie--regexp-search--node)
1651 (t ;; no wildcard transition: loop over all transitions
1653 (dolist (chr (sort (tNFA-transitions tNFA)
1655 `(lambda (a b) (,comparison-function b a))
1656 comparison-function)))
1657 (when (and (setq node (trie--node-find
1658 --trie--regexp-search--node
1659 (vector chr) lookupfun))
1660 (setq state (tNFA-next-state tNFA chr pos)))
1661 (trie--do-regexp-search
1662 node state (trie--seq-append seq chr) (1+ pos)
1663 reverse comparison-function lookupfun mapfun))))))
1665 ;; if NFA has matched and we're accumulating in reverse order, check if
1666 ;; trie contains current string
1667 (when (and reverse (tNFA-match-p tNFA))
1669 (when (setq node (trie--find-data-node
1670 --trie--regexp-search--node lookupfun))
1671 (setq groups (tNFA-group-data tNFA))
1672 (funcall accumulator
1673 (if groups (cons seq groups) seq)
1674 (trie--node-data node))))))
1678 (defun trie-regexp-stack (trie regexp &optional reverse)
1679 "Return an object that allows matches to REGEXP to be accessed
1680 as if they were a stack.
1682 The stack is sorted in \"lexical\" order, i.e. the order defined
1683 by TRIE's comparison function, or in reverse order if REVERSE is
1684 non-nil. Calling `trie-stack-pop' pops the top element (a cons
1685 cell containing a key and its associated data) from the stack.
1687 REGEXP is a regular expression, but it need not necessarily be a
1688 string. It must be a sequence (vector, list of string) whose
1689 elements are either elements of the same type as elements of the
1690 trie keys (which behave as literals in the regexp), or any of the
1691 usual regexp special characters and backslash constructs. If
1692 REGEXP is a string, it must be possible to apply `string' to
1693 individual elements of the keys stored in the trie. The matches
1694 returned in the alist will be sequences of the same type as KEY.
1696 Back-references and non-greedy postfix operators are *not*
1697 supported, and the matches are always anchored, so `$' and `^'
1698 lose their special meanings.
1700 If the regexp contains any non-shy grouping constructs, subgroup
1701 match data is included in the results. In this case, the car of
1702 each match (as returned by a call to `trie-stack-pop' is no
1703 longer just a key. Instead, it is a list whose first element is
1704 the matching key, and whose remaining elements are cons cells
1705 whose cars and cdrs give the start and end indices of the
1706 elements that matched the corresponding groups, in order."
1708 ;; convert trie from print-form if necessary
1709 (trie-transform-from-read-warn trie)
1710 ;; if stack functions aren't defined for trie type, throw error
1711 (if (not (functionp (trie--stack-createfun trie)))
1712 (error "Trie type does not support stack operations")
1713 ;; otherwise, create and initialise a regexp stack
1714 (trie--regexp-stack-create trie regexp reverse)))
1717 (defun trie--regexp-stack-construct-store
1718 (trie regexp &optional reverse)
1719 ;; Construct store for regexp stack based on TRIE.
1720 (let ((seq (cond ((stringp regexp) "") ((listp regexp) ()) (t [])))
1722 (push (list seq (trie--root trie)
1724 regexp :test (trie--construct-equality-function
1725 (trie--comparison-function trie)))
1728 (trie--regexp-stack-repopulate
1730 (trie--comparison-function trie)
1731 (trie--lookupfun trie)
1732 (trie--stack-createfun trie)
1733 (trie--stack-popfun trie)
1734 (trie--stack-emptyfun trie))))
1737 (defun trie--regexp-stack-repopulate
1738 (store reverse comparison-function lookupfun
1739 stack-createfun stack-popfun stack-emptyfun)
1740 ;; Recursively push matching children of the node at the head of STORE
1741 ;; onto STORE, until a data node is reached. REVERSE is the usual
1742 ;; query argument, and the remaining arguments are the corresponding
1744 (let (state seq node pos groups n s)
1747 (setq pos (pop store)
1753 ;; if stack is empty, we're done
1756 ;; if stack element is a trie node...
1757 ((trie--node-p node)
1759 ;; matching data node: add data to the stack and we're done
1760 ((trie--node-data-p node)
1761 (when (tNFA-match-p state)
1762 (setq groups (tNFA-group-data state))
1763 (push (cons (if groups (cons groups seq) seq)
1764 (trie--node-data node))
1766 nil) ; return nil to exit loop
1768 ;; wildcard transition: add new node stack
1769 ((tNFA-wildcard-p state)
1771 (funcall stack-createfun
1772 (trie--node-subtree node) reverse)
1776 (t ;; non-wildcard transition: add all possible next nodes
1777 (dolist (chr (sort (tNFA-transitions state)
1781 (,comparison-function b a)))))
1782 (when (and (setq n (trie--node-find
1783 node (vector chr) lookupfun))
1784 (setq s (tNFA-next-state state chr pos)))
1785 (push (list (trie--seq-append seq chr) n s (1+ pos))
1787 t))) ; return t to keep looping
1789 ;; otherwise, stack element is a node stack...
1791 ;; if node stack is empty, dump it and keep repopulating
1792 (if (funcall stack-emptyfun node)
1793 t ; return t to keep looping
1794 ;; otherwise, add node stack back, and add next node from
1796 (push (list seq node state pos) store)
1797 (setq node (funcall stack-popfun node)
1798 state (tNFA-next-state state
1799 (trie--node-split node) pos))
1801 ;; matching data node: add data to the stack and we're
1803 (if (trie--node-data-p node)
1805 (push (cons seq (trie--node-data node)) store)
1806 nil) ; return nil to exit loop
1807 ;; normal node: add it to the stack and keep
1810 (trie--seq-append seq (trie--node-split node))
1811 node state (1+ pos))
1818 ;; ----------------------------------------------------------------
1819 ;; Pretty-print tries during edebug
1824 ;; We advise the `edebug-prin1' and `edebug-prin1-to-string' functions
1825 ;; (actually, aliases) so that they print "#<trie>" instead of the full
1826 ;; print form for tries.
1828 ;; This is because, if left to its own devices, edebug hangs for ages
1829 ;; whilst printing large tries, and you either have to wait for a *very*
1830 ;; long time for it to finish, or kill Emacs entirely. (Even C-g C-g
1833 ;; We do this also for lists of tries, since those occur quite often,
1834 ;; but not for other sequence types or deeper nested structures, to keep
1835 ;; the implementation as simple as possible.
1837 ;; Since the print form of a trie is practically incomprehensible
1838 ;; anyway, we don't lose much by doing this. If you *really* want to
1839 ;; print tries in full whilst edebugging, despite this warning, disable
1842 ;; FIXME: We could use `cedet-edebug-prin1-extensions' instead of advice
1843 ;; when `cedet-edebug' is loaded, though I believe the current
1844 ;; implementation still works in that case.
1852 (defun trie--edebug-pretty-print (object)
1854 ((trie-p object) "#<trie>")
1855 ((null object) "nil")
1856 ((let ((tlist object) (test t))
1857 (while (or (trie-p (car-safe tlist))
1858 (and tlist (setq test nil)))
1859 (setq tlist (cdr tlist)))
1861 (concat "(" (mapconcat (lambda (dummy) "#<trie>") object " ") ")"))
1862 ;; ((vectorp object)
1863 ;; (let ((pretty "[") (len (length object)))
1864 ;; (dotimes (i (1- len))
1867 ;; (if (trie-p (aref object i))
1868 ;; "#<trie>" (prin1-to-string (aref object i))) " ")))
1870 ;; (if (trie-p (aref object (1- len)))
1871 ;; "#<trie>" (prin1-to-string (aref object (1- len))))
1876 (when (fboundp 'ad-define-subr-args)
1877 (ad-define-subr-args 'edebug-prin1 '(object &optional printcharfun)))
1879 (defadvice edebug-prin1
1880 (around trie activate compile preactivate)
1881 (let ((pretty (trie--edebug-pretty-print object)))
1884 (prin1 pretty printcharfun)
1885 (setq ad-return-value pretty))
1889 (when (fboundp 'ad-define-subr-args)
1890 (ad-define-subr-args 'edebug-prin1-to-string '(object &optional noescape)))
1892 (defadvice edebug-prin1-to-string
1893 (around trie activate compile preactivate)
1894 (let ((pretty (trie--edebug-pretty-print object)))
1896 (setq ad-return-value pretty)
1903 ;;; trie.el ends here