1 ;;; trie.el --- Trie data structure
3 ;; Copyright (C) 2008-2010, 2012, 2014 Free Software Foundation, Inc
5 ;; Author: Toby Cubitt <toby-predictive@dr-qubit.org>
7 ;; Keywords: extensions, matching, data structures
8 ;; trie, ternary search tree, tree, completion, regexp
9 ;; Package-Requires: ((tNFA "0.1.1") (heap "0.3"))
10 ;; URL: http://www.dr-qubit.org/emacs.php
11 ;; Repository: http://www.dr-qubit.org/git/predictive.git
13 ;; This file is part of Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify it under
16 ;; the terms of the GNU General Public License as published by the Free
17 ;; Software Foundation, either version 3 of the License, or (at your option)
20 ;; GNU Emacs is distributed in the hope that it will be useful, but WITHOUT
21 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
25 ;; You should have received a copy of the GNU General Public License along
26 ;; with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
33 ;; A trie is a data structure used to store keys that are ordered sequences of
34 ;; elements (vectors, lists or strings in Elisp; strings are by far the most
35 ;; common), in such a way that both storage and retrieval are space- and
36 ;; time-efficient. But, more importantly, a variety of more advanced queries
37 ;; can also be performed efficiently: for example, returning all strings with
38 ;; a given prefix, searching for keys matching a given wildcard pattern or
39 ;; regular expression, or searching for all keys that match any of the above
40 ;; to within a given Lewenstein distance (though this last is not yet
41 ;; implemented in this package - code contributions welcome!).
43 ;; You create a trie using `make-trie', create an association using
44 ;; `trie-insert', retrieve an association using `trie-lookup', and map over a
45 ;; trie using `trie-map', `trie-mapc', `trie-mapcar', or `trie-mapf'. You can
46 ;; find completions of a prefix sequence using `trie-complete', or search for
47 ;; keys matching a regular expression using `trie-regexp-search'. Using
48 ;; `trie-stack', you can create an object that allows the contents of the trie
49 ;; to be used like a stack, useful for building other algorithms on top of
50 ;; tries; `trie-stack-pop' pops elements off the stack one-by-one, in
51 ;; "lexical" order, whilst `trie-stack-push' pushes things onto the
52 ;; stack. Similarly, `trie-complete-stack', and `trie-regexp-stack' create
53 ;; "lexically-ordered" stacks of query results.
55 ;; Note that there are two uses for a trie: as a lookup table, in which case
56 ;; only the presence or absence of a key in the trie is significant, or as an
57 ;; associative array, in which case each key carries some associated
58 ;; data. Libraries for other data structure often only implement lookup
59 ;; tables, leaving it up to you to implement an associative array on top of
60 ;; this (by storing key+data pairs in the data structure's keys, then defining
61 ;; a comparison function that only compares the key part). For a trie,
62 ;; however, the underlying data structures naturally support associative
63 ;; arrays at no extra cost, so this package does the opposite: it implements
64 ;; associative arrays, and leaves it up to you to use them as lookup tables if
68 ;; Different Types of Trie
69 ;; -----------------------
70 ;; There are numerous ways to implement trie data structures internally, each
71 ;; with its own time- and space-efficiency trade-offs. By viewing a trie as a
72 ;; tree whose nodes are themselves lookup tables for key elements, this
73 ;; package is able to support all types of trie in a uniform manner. This
74 ;; relies on there existing (or you writing!) an Elisp implementation of the
75 ;; corresponding type of lookup table. The best type of trie to use will
76 ;; depend on what trade-offs are appropriate for your particular
77 ;; application. The following gives an overview of the advantages and
78 ;; disadvantages of various types of trie. (Not all of the underlying lookup
79 ;; tables have been implemented in Elisp yet, so using some of the trie types
80 ;; described below would require writing the missing Elisp package!)
83 ;; One of the most effective all-round implementations of a trie is a ternary
84 ;; search tree, which can be viewed as a tree of binary trees. If basic binary
85 ;; search trees are used for the nodes of the trie, we get a standard ternary
86 ;; search tree. If self-balancing binary trees are used (e.g. AVL or red-black
87 ;; trees), we get a self-balancing ternary search tree. If splay trees are
88 ;; used, we get yet another self-organising variant of a ternary search
89 ;; tree. All ternary search trees have, in common, good space-efficiency. The
90 ;; time-efficiency of the various trie operations is also good, assuming the
91 ;; underlying binary trees are balanced. Under that assumption, all variants
92 ;; of ternary search trees described below have the same asymptotic
93 ;; time-complexity for all trie operations.
95 ;; Self-balancing trees ensure the underlying binary trees are always close to
96 ;; perfectly balanced, with the usual trade-offs between the different the
97 ;; types of self-balancing binary tree: AVL trees are slightly more efficient
98 ;; for lookup operations than red-black trees, at a cost of slightly less
99 ;; efficienct insertion operations, and less efficient deletion
100 ;; operations. Splay trees give good average-case complexity and are simpler
101 ;; to implement than AVL or red-black trees (which can mean they're faster in
102 ;; practice!), at the expense of poor worst-case complexity.
104 ;; If your tries are going to be static (i.e. created once and rarely
105 ;; modified), then using perfectly balanced binary search trees might be
106 ;; appropriate. Perfectly balancing the binary trees is very inefficient, but
107 ;; it only has to be when the trie is first created or modified. Lookup
108 ;; operations will then be as efficient as possible for ternary search trees,
109 ;; and the implementation will also be simpler (so probably faster) than a
110 ;; self-balancing tree, without the space and time overhead required to keep
111 ;; track of rebalancing.
113 ;; On the other hand, adding data to a binary search tree in a random order
114 ;; usually results in a reasonably balanced tree. If this is the likely
115 ;; scenario, using a basic binary tree without bothering to balance it at all
116 ;; might be quite efficient, and, being even simpler to implement, could be
117 ;; quite fast overall.
120 ;; A digital trie is a different implementation of a trie, which can be viewed
121 ;; as a tree of arrays, and has different space- and time-complexities than a
122 ;; ternary search tree. Roughly speaking, a digital trie has worse
123 ;; space-complexity, but better time-complexity. Using hash tables instead of
124 ;; arrays for the nodes gives something similar to a digital trie, potentially
125 ;; with better space-complexity and the same amortised time-complexity, but at
126 ;; the expense of occasional significant inefficiency when inserting and
127 ;; deleting (whenever a hash table has to be resized). Indeed, an array can be
128 ;; viewed as a perfect hash table, but as such it requires the number of
129 ;; possible values to be known in advance.
131 ;; Finally, if you really need optimal efficiency from your trie, you could
132 ;; even write a custom type of underlying lookup table, optimised for your
135 ;; This package uses the AVL tree package avl-tree.el, the tagged NFA package
136 ;; tNFA.el, and the heap package heap.el.
141 (eval-when-compile (require 'cl))
148 ;;; ================================================================
149 ;;; Pre-defined trie types
151 (defconst trie--types '(avl))
155 (put 'avl :trie-createfun
156 (lambda (cmpfun seq) (avl-tree-create cmpfun)))
157 (put 'avl :trie-insertfun 'avl-tree-enter)
158 (put 'avl :trie-deletefun 'avl-tree-delete)
159 (put 'avl :trie-lookupfun 'avl-tree-member)
160 (put 'avl :trie-mapfun 'avl-tree-mapc)
161 (put 'avl :trie-emptyfun 'avl-tree-empty)
162 (put 'avl :trie-stack-createfun 'avl-tree-stack)
163 (put 'avl :trie-stack-popfun 'avl-tree-stack-pop)
164 (put 'avl :trie-stack-emptyfun 'avl-tree-stack-empty-p)
165 (put 'avl :trie-transform-for-print 'trie--avl-transform-for-print)
166 (put 'avl :trie-transform-from-read 'trie--avl-transform-from-read)
170 ;;; ================================================================
171 ;;; Internal utility functions and macros
173 ;;; ----------------------------------------------------------------
174 ;;; Functions and macros for handling a trie.
176 ;; symbol used to denote a trie leaf node
177 (defconst trie--terminator '--trie--terminator)
183 (:constructor trie--create
184 (comparison-function &optional (type 'avl)
187 (or (memq type trie--types)
188 (error "trie--create: unknown trie TYPE, %s" type)))
189 (createfun (get type :trie-createfun))
190 (insertfun (get type :trie-insertfun))
191 (deletefun (get type :trie-deletefun))
192 (lookupfun (get type :trie-lookupfun))
193 (mapfun (get type :trie-mapfun))
194 (emptyfun (get type :trie-emptyfun))
195 (stack-createfun (get type :trie-stack-createfun))
196 (stack-popfun (get type :trie-stack-popfun))
197 (stack-emptyfun (get type :trie-stack-emptyfun))
198 (transform-for-print (get type :trie-transform-for-print))
199 (transform-from-read (get type :trie-transform-from-read))
200 (cmpfun (trie--wrap-cmpfun comparison-function))
201 (root (trie--node-create-root createfun cmpfun))
203 (:constructor trie--create-custom
206 (createfun 'avl-tree-create-bare)
207 (insertfun 'avl-tree-enter)
208 (deletefun 'avl-tree-delete)
209 (lookupfun 'avl-tree-member)
210 (mapfun 'avl-tree-mapc)
211 (emptyfun 'avl-tree-empty)
212 (stack-createfun 'avl-tree-stack)
213 (stack-popfun 'avl-tree-stack-pop)
214 (stack-emptyfun 'avl-tree-stack-empty-p)
215 (transform-for-print 'trie--avl-transform-for-print)
216 (transform-from-read 'trie--avl-transform-from-read)
218 (cmpfun (trie--wrap-cmpfun comparison-function))
219 (root (trie--node-create-root createfun cmpfun))
222 root comparison-function cmpfun
223 createfun insertfun deletefun lookupfun mapfun emptyfun
224 stack-createfun stack-popfun stack-emptyfun
225 transform-for-print transform-from-read print-form)
228 (defun trie--wrap-cmpfun (cmpfun)
229 ;; wrap CMPFUN for use in a subtree
231 (setq a (trie--node-split a)
232 b (trie--node-split b))
233 (cond ((eq a trie--terminator)
234 (if (eq b trie--terminator) nil t))
235 ((eq b trie--terminator) nil)
239 (defun trie--construct-equality-function (comparison-function)
240 ;; create equality function from trie comparison function
242 (and (not (,comparison-function a b))
243 (not (,comparison-function b a)))))
247 ;;; ----------------------------------------------------------------
248 ;;; Functions and macros for handling a trie node.
254 (:constructor trie--node-create
256 &aux (subtree (funcall (trie--createfun trie)
257 (trie--cmpfun trie) seq))))
258 (:constructor trie--node-create-data
259 (data &aux (split trie--terminator) (subtree data)))
260 (:constructor trie--node-create-dummy
261 (split &aux (subtree nil)))
262 (:constructor trie--node-create-root
266 (subtree (funcall createfun cmpfun []))))
270 ;; data is stored in the subtree cell of a terminal node
271 (defalias 'trie--node-data 'trie--node-subtree)
273 (defsetf trie--node-data (node) (data)
274 `(setf (trie--node-subtree ,node) ,data))
276 (defmacro trie--node-data-p (node)
277 ;; Return t if NODE is a data node, nil otherwise.
278 `(eq (trie--node-split ,node) trie--terminator))
280 (defmacro trie--node-p (node)
281 ;; Return t if NODE is a TRIE trie--node, nil otherwise. Have to
282 ;; define this ourselves, because we created a defstruct without any
283 ;; identifying tags (i.e. (:type vector)) for efficiency, but this
284 ;; means we can only perform a rudimentary and very unreliable test.
285 `(and (vectorp ,node) (= (length ,node) 2)))
288 (defun trie--node-find (node seq lookupfun)
289 ;; Returns the node below NODE corresponding to SEQ, or nil if none
291 (let ((len (length seq))
293 ;; descend trie until we find SEQ or run out of trie
294 (while (and node (< (incf i) len))
297 (trie--node-subtree node)
298 (trie--node-create-dummy (elt seq i))
303 (defmacro trie--find-data-node (node lookupfun)
304 ;; Return data node from NODE's subtree, or nil if NODE has no data
305 ;; node in its subtree.
307 (trie--node-subtree ,node)
308 (trie--node-create-dummy trie--terminator)
312 (defmacro trie--find-data (node lookupfun)
313 ;; Return data associated with sequence corresponding to NODE, or nil
314 ;; if sequence has no associated data.
315 `(let ((node (trie--find-data-node ,node ,lookupfun)))
316 (when node (trie--node-data node))))
320 ;;; ----------------------------------------------------------------
321 ;;; print/read transformation functions
323 (defun trie-transform-for-print (trie)
324 "Transform TRIE to print form."
325 (when (trie--transform-for-print trie)
326 (if (trie--print-form trie)
327 (warn "Trie has already been transformed to print-form")
328 (funcall (trie--transform-for-print trie) trie)
329 (setf (trie--print-form trie) t))))
332 (defun trie-transform-from-read (trie)
333 "Transform TRIE from print form."
334 (when (trie--transform-from-read trie)
335 (if (not (trie--print-form trie))
336 (warn "Trie is not in print-form")
337 (funcall (trie--transform-from-read trie) trie)
338 (setf (trie--print-form trie) nil))))
341 (defmacro trie-transform-from-read-warn (trie)
342 "Transform TRIE from print form, with warning."
343 `(when (trie--print-form ,trie)
344 (warn (concat "Attempt to operate on trie in print-form;\
345 converting to normal form"))
346 (trie-transform-from-read ,trie)))
349 (defun trie--avl-transform-for-print (trie)
350 ;; transform avl-tree based TRIE to print form.
352 (lambda (avl seq) (setf (avl-tree--cmpfun avl) nil))
356 (defun trie--avl-transform-from-read (trie)
357 ;; transform avl-tree based TRIE from print form."
358 (let ((--trie-avl-transform--cmpfun (trie--cmpfun trie)))
361 (setf (avl-tree--cmpfun avl) --trie-avl-transform--cmpfun))
366 ;;; ----------------------------------------------------------------
367 ;;; Replacements for CL functions
369 ;; copied from cl-extra.el
370 (defun trie--subseq (seq start &optional end)
371 "Return the subsequence of SEQ from START to END.
372 If END is omitted, it defaults to the length of the sequence.
373 If START or END is negative, it counts from the end."
374 (if (stringp seq) (substring seq start end)
376 (and end (< end 0) (setq end (+ end (setq len (length seq)))))
378 (setq start (+ start (or len (setq len (length seq))))))
380 (if (> start 0) (setq seq (nthcdr start seq)))
383 (while (>= (setq end (1- end)) start)
384 (push (pop seq) res))
386 (copy-sequence seq)))
388 (or end (setq end (or len (length seq))))
389 (let ((res (make-vector (max (- end start) 0) nil))
392 (aset res i (aref seq start))
393 (setq i (1+ i) start (1+ start)))
397 (defun trie--position (item list)
398 "Find the first occurrence of ITEM in LIST.
399 Return the index of the matching item, or nil of not found.
400 Comparison is done with 'equal."
404 (when (equal item (car list)) (throw 'found i))
406 (setq list (cdr list))))
410 (defsubst trie--seq-append (seq el)
411 "Append EL to the end of sequence SEQ."
413 ((stringp seq) (concat seq (string el)))
414 ((vectorp seq) (vconcat seq (vector el)))
415 ((listp seq) (append seq (list el)))))
418 (defsubst trie--seq-concat (seq &rest sequences)
419 "Concatenate SEQ and SEQUENCES, and make the result the same
420 type of sequence as SEQ."
422 ((stringp seq) (apply 'concat seq sequences))
423 ((vectorp seq) (apply 'vconcat seq sequences))
424 ((listp seq) (apply 'append seq sequences))))
429 ;;; ================================================================
430 ;;; Basic trie operations
433 (defalias 'make-trie 'trie--create
434 "Return a new trie that uses comparison function COMPARISON-FUNCTION.
436 A trie stores sequences (strings, vectors or lists) along with
437 associated data. COMPARISON-FUNCTEION should accept two
438 arguments, each being an element of such a sequence, and return t
439 if the first is strictly smaller than the second.
441 The optional argument TYPE specifies the type of trie to
442 create. However, the only one that is currently implemented is
443 the default, so this argument is useless for now.
445 (See also `make-trie-custom'.)")
449 (defalias 'trie-create 'make-trie)
453 (defalias 'make-trie-custom 'trie--create-custom
454 "Return a new trie that uses comparison function COMPARISON-FUNCTION.
456 A trie stores sequences (strings, vectors or lists) along with
457 associated data. COMPARISON-FUNCTION should accept two arguments,
458 each being an element of such a sequence, and return t if the
459 first is strictly smaller than the second.
461 The remaining keyword arguments: :CREATEFUN, :INSERTFUN, :DELETEFUN,
462 :LOOKUPFUN, :MAPFUN, :EMPTYFUN, :STACK-CREATEFUN, :STACK-POPFUN,
463 :STACK-EMPTYFUN, :TRANSFORM-FOR-PRINT and :TRANSFORM-FROM-READ
464 determine the type of trie that is created.
466 CREATEFUN is called as follows:
468 (CREATEFUN COMPARISON-FUNCTION SEQ)
470 and should return a data structure (\"ARRAY\") that can be used
471 as an associative array, where two elements A and B are equal if
472 the following is non-nil:
474 (and (COMPARISON-FUNCTION b a)
475 (COMPARISON-FUNCTION b a))
477 The SEQ argument is a vector containing the sequence that will
478 correspond to the newly created array in the trie. For most types
479 of trie, this value is ignored. It is passed to CREATEFUN only in
480 order to allow the creation of \"hybrid\" trie structures, in
481 which different types of associative array are used in different
482 parts of the trie. For example, the type of associative array
483 could be chosen based on the depth in the trie, given by \(length
484 SEQ\). (Note that all the other functions described below must be
485 able to correctly handle *any* of the types of associate array
486 that might be created by CREATEFUN.)
488 INSERTFUN, DELETEFUN, LOOKUPFUN, MAPFUN and EMPTYFUN should
489 insert, delete, lookup, map over, and check-if-there-exist-any
490 elements in an associative array. They are called as follows:
492 (INSERTFUN array element &optional updatefun)
493 (DELETEFUN array element &optional predicate nilflag)
494 (LOOKUPFUN array element &optional nilflag)
495 (MAPFUN function array &optional reverse)
498 INSERTFUN should insert ELEMENT into ARRAY and return the new
499 element, which will be ELEMENT itself unless UPDATEFUN is
500 specified. In that case, if and only if an element matching
501 ELEMENT already exists in the associative array, INSERTFUN should
502 instead pass ELEMENT and the matching element as arguments to
503 UPDATEFUN, replace the matching element with the return value,
504 and return that return value.
506 DELETEFUN should delete the element in the associative array that
507 matches ELEMENT, and return the deleted element. However, if
508 PREDICATE is specified and a matching element exists in ARRAY,
509 DELETEFUN should first pass the matching element as an argument
510 to PREDICATE before deleting, and should only delete the element
511 if PREDICATE returns non-nil. DELETEFUN should return NILFLAG if
512 no element was deleted (either becuase no matching element was
513 found, or because TESTFUN returned nil).
515 LOOKUPFUN should return the element from the associative array
516 that matches ELEMENT, or NILFLAG if no matching element exists.
518 MAPFUN should map FUNCTION over all elements in the order defined by
519 COMPARISON-FUNCTION, or in reverse order if REVERSE is non-nil.
522 STACK-CREATEFUN, STACK-POPFUN and STACK-EMPTYFUN should allow the
523 associative array to be used as a stack. STACK-CREATEFUN is
526 (STACK-CREATEFUN array)
528 and should return a data structure (\"STACK\") that behaves like
529 a sorted stack of all elements in the associative array. I.e.
534 should return elements from the associative array in the order
535 defined by COMPARISON-FUNCTION, and
537 (STACK-EMPTYFUN stack)
539 should return non-nil if the stack is empty, nil otherwise.
541 The stack functions are optional, in that all trie operations
542 other than the stack-related ones will work correctly. However,
543 any code that makes use of trie-stacks will complain if supplied
544 with this type of trie.
547 The :TRANSFORM-FOR-PRINT and :TRANSFORM-FROM-READ arguments are
548 optional. If supplied, they can be used to transform the trie
549 into a format suitable for passing to Elisp's `print'
550 functions (typically used to persistently store the trie by
551 writing it to file), and transform from that format back to the
552 original usable form.
555 Warning: to avoid nasty dynamic scoping bugs, the supplied
556 functions must *never* bind any variables with names commencing
561 (defalias 'trie-create-custom 'make-trie-custom)
565 (defalias 'trie-comparison-function 'trie--comparison-function
566 "Return the comparison function for TRIE.")
569 (defalias 'trie-p 'trie--p
570 "Return t if argument is a trie, nil otherwise.")
573 (defun trie-empty (trie)
574 "Return t if the TRIE is empty, nil otherwise."
575 (trie-transform-from-read-warn trie)
576 (funcall (trie--emptyfun trie)
577 (trie--node-subtree (trie--root trie))))
580 (defun trie-construct-sortfun (cmpfun &optional reverse)
581 "Construct function to compare key sequences, based on a CMPFUN
582 that compares individual elements of the sequence. Order is
583 reversed if REVERSE is non-nil."
588 (dotimes (i (min (length a) (length b)))
589 (cond ((,cmpfun (elt b i) (elt a i))
591 ((,cmpfun (elt a i) (elt b i))
592 (throw 'compared nil))))
593 (< (length a) (length b)))))
597 (dotimes (i (min (length a) (length b)))
598 (cond ((,cmpfun (elt a i) (elt b i))
600 ((,cmpfun (elt b i) (elt a i))
601 (throw 'compared nil))))
602 (< (length a) (length b)))))))
606 ;; ----------------------------------------------------------------
609 (defun trie-insert (trie key &optional data updatefun)
610 "Associate DATA with KEY in TRIE.
612 If KEY already exists in TRIE, then DATA replaces the existing
613 association, unless UPDATEFUN is supplied. Note that if DATA is
614 *not* supplied, this means that the existing association of KEY
615 will be replaced by nil.
617 If UPDATEFUN is supplied and KEY already exists in TRIE,
618 UPDATEFUN is called with two arguments: DATA and the existing
619 association of KEY. Its return value becomes the new association
622 Returns the new association of KEY.
624 Note: to avoid nasty dynamic scoping bugs, UPDATEFUN must *not*
625 bind any variables with names commencing \"--\"."
627 ;; convert trie from print-form if necessary
628 (trie-transform-from-read-warn trie)
630 ;; absurd variable names are an attempt to avoid dynamic scoping bugs
631 (let ((--trie-insert--updatefun updatefun)
632 --trie-insert--old-node-flag
633 (node (trie--root trie))
636 ;; Descend trie, adding nodes for non-existent elements of KEY. The
637 ;; update function passed to `trie--insertfun' ensures that existing
638 ;; nodes are left intact.
639 (while (< (incf i) len)
640 (setq --trie-insert--old-node-flag nil)
641 (setq node (funcall (trie--insertfun trie)
642 (trie--node-subtree node)
643 (trie--node-create (elt key i) key trie)
645 (setq --trie-insert--old-node-flag t) b))))
646 ;; Create or update data node.
647 (setq node (funcall (trie--insertfun trie)
648 (trie--node-subtree node)
649 (trie--node-create-data data)
650 ;; if using existing data node, wrap UPDATEFUN
651 ;; if any was supplied
652 (when (and --trie-insert--old-node-flag
653 --trie-insert--updatefun)
655 (setf (trie--node-data old)
656 (funcall --trie-insert--updatefun
657 (trie--node-data new)
658 (trie--node-data old)))
660 (trie--node-data node))) ; return new data
664 ;; ----------------------------------------------------------------
667 (defun trie-delete (trie key &optional test)
668 "Delete KEY and its associated data from TRIE.
670 If KEY was deleted, a cons cell containing KEY and its
671 association is returned. Returns nil if KEY does not exist in
674 If TEST is supplied, it should be a function that accepts two
675 arguments: the key being deleted, and its associated data. The
676 key will then only be deleted if TEST returns non-nil.
678 Note: to avoid nasty dynamic scoping bugs, TEST must *not* bind
679 any variables with names commencing \"--\"."
680 ;; convert trie from print-form if necessary
681 (trie-transform-from-read-warn trie)
682 ;; set up deletion (real work is done by `trie--do-delete'
683 (let (--trie-deleted--node
684 (--trie-delete--key key))
685 (declare (special --trie-deleted--node)
686 (special --trie-delete--key))
687 (trie--do-delete (trie--root trie) key test
688 (trie--deletefun trie)
689 (trie--emptyfun trie)
691 (when --trie-deleted--node
692 (cons key (trie--node-data --trie-deleted--node)))))
695 (defun trie--do-delete (node --trie--do-delete--seq
696 --trie--do-delete--test
697 --trie--do-delete--deletefun
698 --trie--do-delete--emptyfun
699 --trie--do-delete--cmpfun)
700 ;; Delete --TRIE--DO-DELETE--SEQ starting from trie node NODE, and
701 ;; return non-nil if we deleted a node. If --TRIE--DO-DELETE--TEST is
702 ;; supplied, it is called with two arguments, the key being deleted
703 ;; and the associated data, and the deletion is only carried out if it
706 ;; The absurd argument names are to lessen the likelihood of dynamical
707 ;; scoping bugs caused by a supplied function binding a variable with
708 ;; the same name as one of the arguments, which would cause a nasty
709 ;; bug when the lambda's (below) are called.
710 (declare (special --trie-deleted--node)
711 (special --trie-delete--key))
712 ;; if --TRIE--DO-DELETE--SEQ is empty, try to delete data node and
713 ;; return non-nil if we did (return value of
714 ;; --TRIE--DO-DELETE--DELETEFUN is the deleted data, which is always
715 ;; non-nil for a trie)
716 (if (= (length --trie--do-delete--seq) 0)
717 (setq --trie-deleted--node
718 (funcall --trie--do-delete--deletefun
719 (trie--node-subtree node)
720 (trie--node-create-dummy trie--terminator)
721 (when --trie--do-delete--test
723 (funcall --trie--do-delete--test
724 --trie-delete--key (trie--node-data n))))
726 ;; otherwise, delete on down (return value of
727 ;; --TRIE--DO-DELETE--DELETEFUN is the deleted data, which is always
728 ;; non-nil for a trie)
729 (funcall --trie--do-delete--deletefun
730 (trie--node-subtree node)
731 (trie--node-create-dummy (elt --trie--do-delete--seq 0))
733 (and (trie--do-delete
734 n (trie--subseq --trie--do-delete--seq 1)
735 --trie--do-delete--test
736 --trie--do-delete--deletefun
737 --trie--do-delete--emptyfun
738 --trie--do-delete--cmpfun)
739 (funcall --trie--do-delete--emptyfun
740 (trie--node-subtree n))))
745 ;; ----------------------------------------------------------------
748 (defun trie-lookup (trie key &optional nilflag)
749 "Return the data associated with KEY in the TRIE,
750 or nil if KEY does not exist in TRIE.
752 Optional argument NILFLAG specifies a value to return instead of
753 nil if KEY does not exist in TRIE. This allows a non-existent KEY
754 to be distinguished from an element with a null association. (See
755 also `trie-member-p', which does this for you.)"
756 ;; convert trie from print-form if necessary
757 (trie-transform-from-read-warn trie)
758 ;; find node corresponding to key, then find data node, then return
761 (or (and (setq node (trie--node-find (trie--root trie) key
762 (trie--lookupfun trie)))
763 (trie--find-data node (trie--lookupfun trie)))
766 (defalias 'trie-member 'trie-lookup)
769 (defun trie-member-p (trie key)
770 "Return t if KEY exists in TRIE, nil otherwise."
771 ;; convert trie from print-form if necessary
772 (trie-transform-from-read-warn trie)
774 (not (eq flag (trie-member trie key flag)))))
779 ;;; ================================================================
780 ;;; Mapping over tries
782 (defun trie--mapc (--trie--mapc--function --trie--mapc--mapfun
783 --trie--mapc--root --trie--mapc--seq
784 &optional --trie--mapc--reverse)
785 ;; Apply TRIE--MAPC--FUNCTION to all elements in a trie beneath
786 ;; TRIE--MAPC--ROOT, which should correspond to the sequence
787 ;; TRIE--MAPC--SEQ. TRIE--MAPC--FUNCTION is passed two arguments: the
788 ;; trie node itself and the sequence it corresponds to. It is applied
789 ;; in ascending order, or descending order if TRIE--MAPC--REVERSE is
792 ;; The absurd argument names are to lessen the likelihood of dynamical
793 ;; scoping bugs caused by a supplied function binding a variable with
794 ;; the same name as one of the arguments.
797 (lambda (--trie--mapc--node)
798 ;; data node: apply function
799 (if (trie--node-data-p --trie--mapc--node)
800 (funcall --trie--mapc--function
803 ;; internal node: append split value to seq and keep descending
804 (trie--mapc --trie--mapc--function
808 (copy-sequence --trie--mapc--seq)
809 (trie--node-split --trie--mapc--node))
810 --trie--mapc--reverse)))
811 ;; --TRIE--MAPC--MAPFUN target
812 (trie--node-subtree --trie--mapc--root)
813 --trie--mapc--reverse))
816 (defun trie-mapc-internal (function trie &optional type)
817 "Apply FUNCTION to all internal associative arrays within TRIE.
818 FUNCTION is passed two arguments: an associative array, and the
819 sequence it corresponds to.
821 Optional argument TYPE (one of the symbols vector, lisp or
822 string) sets the type of sequence passed to FUNCTION. Defaults to
824 (trie--mapc-internal function (trie--mapfun trie) (trie--root trie)
825 (cond ((eq type 'string) "")
830 (defun trie--mapc-internal (--trie--mapc-internal--function
831 --trie--mapc-internal--mapfun
832 --trie--mapc-internal--root
833 --trie--mapc-internal--seq)
835 --trie--mapc-internal--mapfun
836 (lambda (--trie--mapc-internal--node)
838 (unless (trie--node-data-p --trie--mapc-internal--node)
839 (funcall --trie--mapc-internal--function
840 (trie--node-subtree --trie--mapc-internal--node)
841 --trie--mapc-internal--seq)
843 --trie--mapc-internal--function
844 --trie--mapc-internal--mapfun
845 --trie--mapc-internal--node
847 (copy-sequence --trie--mapc-internal--seq)
848 (trie--node-split --trie--mapc-internal--node)))))
849 (trie--node-subtree --trie--mapc-internal--root)))
852 (defun trie-map (function trie &optional type reverse)
853 "Modify all elements in TRIE by applying FUNCTION to them.
855 FUNCTION should take two arguments: a sequence stored in the trie
856 and its associated data. Its return value replaces the existing
859 Optional argument TYPE (one of the symbols vector, lisp or
860 string) sets the type of sequence passed to FUNCTION. Defaults to
863 FUNCTION is applied in ascending order, or descending order if
866 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
867 bind any variables with names commencing \"--\"."
868 ;; convert from print-form if necessary
869 (trie-transform-from-read-warn trie)
870 ;; map FUNCTION over TRIE
871 (let ((--trie-map--function function)) ; avoid dynamic scoping bugs
874 (setf (trie--node-data node)
875 (funcall --trie-map--function seq (trie--node-data node))))
878 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
882 (defun trie-mapc (function trie &optional type reverse)
883 "Apply FUNCTION to all elements in TRIE for side effect only.
885 FUNCTION should take two arguments: a sequence stored in the trie
886 and its associated data.
888 Optional argument TYPE (one of the symbols vector, lisp or
889 string) sets the type of sequence passed to FUNCTION. Defaults to
892 FUNCTION is applied in ascending order, or descending order if
895 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
896 bind any variables with names commencing \"--\"."
897 ;; convert from print-form if necessary
898 (trie-transform-from-read-warn trie)
899 ;; map FUNCTION over TRIE
900 (let ((--trie-mapc--function function)) ; avoid dynamic scoping bugs
903 (funcall --trie-mapc--function seq (trie--node-data node)))
906 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
910 (defun trie-mapf (function combinator trie &optional type reverse)
911 "Apply FUNCTION to all elements in TRIE, and combine the results
914 FUNCTION should take two arguments: a sequence stored in the
915 trie, and its associated data.
917 Optional argument TYPE (one of the symbols vector, lisp or
918 string; defaults to vector) sets the type of sequence passed to
919 FUNCTION. If TYPE is 'string, it must be possible to apply the
920 function `string' to the individual elements of key sequences
923 The FUNCTION is applied and the results combined in ascending
924 order, or descending order if REVERSE is non-nil.
926 Note: to avoid nasty dynamic scoping bugs, FUNCTION and
927 COMBINATOR must *not* bind any variables with names
929 ;; convert from print-form if necessary
930 (trie-transform-from-read-warn trie)
931 ;; map FUNCTION over TRIE, combining results with COMBINATOR
932 (let ((--trie-mapf--function function) ; avoid dynamic scoping bugs
933 --trie-mapf--accumulate)
936 (setq --trie-mapf--accumulate
938 (funcall --trie-mapf--function
939 seq (trie--node-data node))
940 --trie-mapf--accumulate)))
943 (cond ((eq type 'string) "") ((eq type 'lisp) ()) (t []))
945 --trie-mapf--accumulate))
948 (defun trie-mapcar (function trie &optional type reverse)
949 "Apply FUNCTION to all elements in TRIE,
950 and make a list of the results.
952 FUNCTION should take two arguments: a sequence stored in the trie
953 and its associated data.
955 Optional argument TYPE (one of the symbols vector, lisp or
956 string) sets the type of sequence passed to FUNCTION. Defaults to
959 The FUNCTION is applied and the list constructed in ascending
960 order, or descending order if REVERSE is non-nil.
962 Note that if you don't care about the order in which FUNCTION is
963 applied, just that the resulting list is in the correct order,
966 (trie-mapf function 'cons trie type (not reverse))
970 Note: to avoid nasty dynamic scoping bugs, FUNCTION must *not*
971 bind any variables with names commencing \"--\"."
972 ;; convert from print-form if necessary
973 (trie-transform-from-read-warn trie)
974 ;; map FUNCTION over TRIE and accumulate in a list
975 (nreverse (trie-mapf function 'cons trie type reverse)))
980 ;;; ================================================================
981 ;;; Using tries as stacks
983 (defstruct (trie--stack
992 (comparison-function (trie--comparison-function trie))
993 (lookupfun (trie--lookupfun trie))
994 (stack-createfun (trie--stack-createfun trie))
995 (stack-popfun (trie--stack-popfun trie))
996 (stack-emptyfun (trie--stack-emptyfun trie))
997 (repopulatefun 'trie--stack-repopulate)
999 (if (trie-empty trie)
1001 (trie--stack-repopulate
1003 (cond ((eq type 'list) ())
1004 ((eq type 'string) "")
1008 (trie--node-subtree (trie--root trie))
1011 comparison-function lookupfun
1012 stack-createfun stack-popfun stack-emptyfun)))
1016 trie--completion-stack-create
1021 (comparison-function (trie--comparison-function trie))
1022 (lookupfun (trie--lookupfun trie))
1023 (stack-createfun (trie--stack-createfun trie))
1024 (stack-popfun (trie--stack-popfun trie))
1025 (stack-emptyfun (trie--stack-emptyfun trie))
1026 (repopulatefun 'trie--stack-repopulate)
1027 (store (trie--completion-stack-construct-store
1028 trie prefix reverse))
1032 trie--regexp-stack-create
1037 (comparison-function (trie--comparison-function trie))
1038 (lookupfun (trie--lookupfun trie))
1039 (stack-createfun (trie--stack-createfun trie))
1040 (stack-popfun (trie--stack-popfun trie))
1041 (stack-emptyfun (trie--stack-emptyfun trie))
1042 (repopulatefun 'trie--regexp-stack-repopulate)
1043 (store (trie--regexp-stack-construct-store
1044 trie regexp reverse))
1048 reverse comparison-function lookupfun
1049 stack-createfun stack-popfun stack-emptyfun
1050 repopulatefun store pushed)
1053 (defun trie-stack (trie &optional type reverse)
1054 "Return an object that allows TRIE to be accessed as a stack.
1056 The stack is sorted in \"lexical\" order, i.e. the order defined
1057 by the trie's comparison function, or in reverse order if REVERSE
1058 is non-nil. Calling `trie-stack-pop' pops the top element (a key
1059 and its associated data) from the stack.
1061 Optional argument TYPE (one of the symbols vector, lisp or
1062 string) sets the type of sequence used for the keys.
1064 Note that any modification to TRIE *immediately* invalidates all
1065 trie-stacks created before the modification (in particular,
1066 calling `trie-stack-pop' will give unpredictable results).
1068 Operations on trie-stacks are significantly more efficient than
1069 constructing a real stack from the trie and using standard stack
1070 functions. As such, they can be useful in implementing efficient
1071 algorithms on tries. However, in cases where mapping functions
1072 `trie-mapc', `trie-mapcar' or `trie-mapf' would be sufficient, it
1073 is better to use one of those instead."
1074 ;; convert trie from print-form if necessary
1075 (trie-transform-from-read-warn trie)
1076 ;; if stack functions aren't defined for trie type, throw error
1077 (if (not (functionp (trie--stack-createfun trie)))
1078 (error "Trie type does not support stack operations")
1079 ;; otherwise, create and initialise a stack
1080 (trie--stack-create trie type reverse)))
1083 (defun trie-stack-pop (trie-stack &optional nilflag)
1084 "Pop the first element from TRIE-STACK.
1086 Returns nil if the stack is empty, or NILFLAG if specified. (The
1087 latter allows an empty stack to be distinguished from a null
1088 element stored in the trie.)"
1089 ;; return nilflag if stack is empty
1090 (if (trie-stack-empty-p trie-stack)
1092 ;; if elements have been pushed onto the stack, pop those first
1093 (if (trie--stack-pushed trie-stack)
1094 (pop (trie--stack-pushed trie-stack))
1095 ;; otherwise, pop first element from trie-stack and repopulate it
1097 (pop (trie--stack-store trie-stack))
1098 (setf (trie--stack-store trie-stack)
1099 (funcall (trie--stack-repopulatefun trie-stack)
1100 (trie--stack-store trie-stack)
1101 (trie--stack-reverse trie-stack)
1102 (trie--stack-comparison-function trie-stack)
1103 (trie--stack-lookupfun trie-stack)
1104 (trie--stack-stack-createfun trie-stack)
1105 (trie--stack-stack-popfun trie-stack)
1106 (trie--stack-stack-emptyfun trie-stack)))))))
1109 (defun trie-stack-push (element trie-stack)
1110 "Push ELEMENT onto TRIE-STACK."
1111 (push element (trie--stack-pushed trie-stack)))
1114 (defun trie-stack-first (trie-stack &optional nilflag)
1115 "Return the first element from TRIE-STACK, without removing it
1118 Returns nil if the stack is empty, or NILFLAG if specified. (The
1119 latter allows an empty stack to be distinguished from a null
1120 element stored in the trie.)"
1121 ;; return nilflag if stack is empty
1122 (if (trie-stack-empty-p trie-stack)
1124 ;; if elements have been pushed onto the stack, return first of
1126 (if (trie--stack-pushed trie-stack)
1127 (car (trie--stack-pushed trie-stack))
1128 ;; otherwise, return first element from trie-stack
1129 (car (trie--stack-store trie-stack)))))
1132 (defalias 'trie-stack-p 'trie--stack-p
1133 "Return t if argument is a trie-stack, nil otherwise.")
1136 (defun trie-stack-empty-p (trie-stack)
1137 "Return t if TRIE-STACK is empty, nil otherwise."
1138 (and (null (trie--stack-store trie-stack))
1139 (null (trie--stack-pushed trie-stack))))
1142 (defun trie--stack-repopulate
1143 (store reverse comparison-function lookupfun
1144 stack-createfun stack-popfun stack-emptyfun)
1145 ;; Recursively push children of the node at the head of STORE onto the
1146 ;; front of STORE, until a data node is reached.
1148 ;; nothing to do if stack is empty
1150 (let ((node (funcall stack-popfun (cdar store)))
1152 (when (funcall stack-emptyfun (cdar store))
1153 ;; (pop store) here produces irritating compiler warnings
1154 (setq store (cdr store)))
1156 (while (not (trie--node-data-p node))
1158 (cons (trie--seq-append seq (trie--node-split node))
1159 (funcall stack-createfun
1160 (trie--node-subtree node) reverse))
1162 (setq node (funcall stack-popfun (cdar store))
1164 (when (funcall stack-emptyfun (cdar store))
1165 ;; (pop store) here produces irritating compiler warnings
1166 (setq store (cdr store))))
1168 (push (cons seq (trie--node-data node)) store))))
1173 ;; ================================================================
1174 ;; Query-building utility macros
1176 ;; Implementation Note
1177 ;; -------------------
1178 ;; For queries ranked in anything other than lexical order, we use a
1179 ;; partial heap-sort to find the k=MAXNUM highest ranked matches among
1180 ;; the n possibile matches. This has worst-case time complexity
1181 ;; O(n log k), and is both simple and elegant. An optimal algorithm
1182 ;; (e.g. partial quick-sort discarding the irrelevant partition at each
1183 ;; step) would have complexity O(n + k log k), but is probably not worth
1184 ;; the extra coding effort, and would have worse space complexity unless
1185 ;; coded to work "in-place", which would be highly non-trivial. (I
1186 ;; haven't done any benchmarking, though, so feel free to do so and let
1187 ;; me know the results!)
1189 (defmacro trie--construct-accumulator (maxnum filter resultfun)
1190 ;; Does what it says on the tin! | sed -e 's/tin/macro name/'
1192 ;; filter, maxnum, resultfun
1193 ((and ,filter ,maxnum ,resultfun)
1195 (when (funcall ,filter seq data)
1196 (aset trie--accumulate 0
1197 (cons (funcall ,resultfun seq data)
1198 (aref trie--accumulate 0)))
1199 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1200 (throw 'trie-accumulate--done nil)))))
1201 ;; filter, maxnum, !resultfun
1202 ((and ,filter ,maxnum (not ,resultfun))
1204 (when (funcall ,filter seq data)
1205 (aset trie--accumulate 0
1206 (cons (cons seq data)
1207 (aref trie--accumulate 0)))
1208 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1209 (throw 'trie-accumulate--done nil)))))
1210 ;; filter, !maxnum, resultfun
1211 ((and ,filter (not ,maxnum) ,resultfun)
1213 (when (funcall ,filter seq data)
1214 (aset trie--accumulate 0
1215 (cons (funcall ,resultfun seq data)
1216 (aref trie--accumulate 0))))))
1217 ;; filter, !maxnum, !resultfun
1218 ((and ,filter (not ,maxnum) (not ,resultfun))
1220 (when (funcall ,filter seq data)
1221 (aset trie--accumulate 0
1222 (cons (cons seq data)
1223 (aref trie--accumulate 0))))))
1224 ;; !filter, maxnum, resultfun
1225 ((and (not ,filter) ,maxnum ,resultfun)
1227 (aset trie--accumulate 0
1228 (cons (funcall ,resultfun seq data)
1229 (aref trie--accumulate 0)))
1230 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1231 (throw 'trie-accumulate--done nil))))
1232 ;; !filter, maxnum, !resultfun
1233 ((and (not ,filter) ,maxnum (not ,resultfun))
1235 (aset trie--accumulate 0
1236 (cons (cons seq data)
1237 (aref trie--accumulate 0)))
1238 (and (>= (length (aref trie--accumulate 0)) ,maxnum)
1239 (throw 'trie-accumulate--done nil))))
1240 ;; !filter, !maxnum, resultfun
1241 ((and (not ,filter) (not ,maxnum) ,resultfun)
1243 (aset trie--accumulate 0
1244 (cons (funcall ,resultfun seq data)
1245 (aref trie--accumulate 0)))))
1246 ;; !filter, !maxnum, !resultfun
1247 ((and (not ,filter) (not ,maxnum) (not ,resultfun))
1249 (aset trie--accumulate 0
1250 (cons (cons seq data)
1251 (aref trie--accumulate 0)))))
1256 (defmacro trie--construct-ranked-accumulator (maxnum filter)
1257 ;; Does what it says on the tin! | sed -e 's/tin/macro name/'
1260 ((and ,filter ,maxnum)
1262 (when (funcall ,filter seq data)
1263 (heap-add trie--accumulate (cons seq data))
1264 (and (> (heap-size trie--accumulate) ,maxnum)
1265 (heap-delete-root trie--accumulate)))))
1267 ((and ,filter (not ,maxnum))
1269 (when (funcall ,filter seq data)
1270 (heap-add trie--accumulate (cons seq data)))))
1272 ((and (not ,filter) ,maxnum)
1274 (heap-add trie--accumulate (cons seq data))
1275 (and (> (heap-size trie--accumulate) ,maxnum)
1276 (heap-delete-root trie--accumulate))))
1278 ((and (not ,filter) (not ,maxnum))
1280 (heap-add trie--accumulate (cons seq data))))))
1284 (defmacro trie--accumulate-results
1285 (rankfun maxnum reverse filter resultfun accfun duplicates &rest body)
1286 ;; Accumulate results of running BODY code, and return them in
1287 ;; appropriate order. BODY should call ACCFUN to accumulate a result,
1288 ;; passing it two arguments: a trie data node, and the corresponding
1289 ;; sequence. BODY can throw 'trie-accumulate--done to terminate the
1290 ;; accumulation and return the results. A non-null DUPLICATES flag
1291 ;; signals that the accumulated results might contain duplicates,
1292 ;; which should be deleted. Note that DUPLICATES is ignored if RANKFUN
1293 ;; is null. The other arguments should be passed straight through from
1294 ;; the query function.
1296 ;; rename functions to help avoid dynamic-scoping bugs
1297 `(let* ((--trie-accumulate--rankfun ,rankfun)
1298 (--trie-accumulate--filter ,filter)
1299 (--trie-accumulate--resultfun ,resultfun)
1300 ;; construct structure in which to accumulate results
1303 (heap-create ; heap order is inverse of rank order
1306 (funcall --trie-accumulate--rankfun a b))
1308 (not (funcall --trie-accumulate--rankfun a b))))
1309 (when ,maxnum (1+ ,maxnum)))
1310 (make-vector 1 nil)))
1311 ;; construct function to accumulate completions
1314 (trie--construct-ranked-accumulator
1315 ,maxnum --trie-accumulate--filter)
1316 (trie--construct-accumulator
1317 ,maxnum --trie-accumulate--filter
1318 --trie-accumulate--resultfun))))
1320 ;; accumulate results
1321 (catch 'trie-accumulate--done ,@body)
1323 ;; return list of completions
1325 ;; for a ranked query, extract completions from heap
1328 ;; check for and delete duplicates if flag is set
1330 (while (not (heap-empty trie--accumulate))
1331 (if (equal (car (heap-root trie--accumulate))
1333 (heap-delete-root trie--accumulate)
1334 (push (heap-delete-root trie--accumulate)
1336 ;; skip duplicate checking if flag is not set
1337 (while (not (heap-empty trie--accumulate))
1339 (let ((res (heap-delete-root trie--accumulate)))
1340 (push (funcall ,resultfun (car res) (cdr res))
1342 (push (heap-delete-root trie--accumulate)
1346 ;; for lexical query, reverse result list if MAXNUM supplied
1347 (,maxnum (nreverse (aref trie--accumulate 0)))
1348 ;; otherwise, just return list
1349 (t (aref trie--accumulate 0)))))
1354 ;; ================================================================
1357 (defun trie-complete
1358 (trie prefix &optional rankfun maxnum reverse filter resultfun)
1359 "Return an alist containing all completions of PREFIX in TRIE
1360 along with their associated data, in the order defined by
1361 RANKFUN, defaulting to \"lexical\" order (i.e. the order defined
1362 by the trie's comparison function). If REVERSE is non-nil, the
1363 completions are sorted in the reverse order. Returns nil if no
1364 completions are found.
1366 PREFIX must be a sequence (vector, list or string) containing
1367 elements of the type used to reference data in the trie. (If
1368 PREFIX is a string, it must be possible to apply `string' to
1369 individual elements of the sequences stored in the trie.) The
1370 completions returned in the alist will be sequences of the same
1371 type as KEY. If PREFIX is a list of sequences, completions of all
1372 sequences in the list are included in the returned alist. All
1373 sequences in the list must be of the same type.
1375 The optional integer argument MAXNUM limits the results to the
1376 first MAXNUM completions. Otherwise, all completions are
1379 If specified, RANKFUN must accept two arguments, both cons
1380 cells. The car contains a sequence from the trie (of the same
1381 type as PREFIX), the cdr contains its associated data. It should
1382 return non-nil if first argument is ranked strictly higher than
1383 the second, nil otherwise.
1385 The FILTER argument sets a filter function for the
1386 completions. If supplied, it is called for each possible
1387 completion with two arguments: the completion, and its associated
1388 data. If the filter function returns nil, the completion is not
1389 included in the results, and does not count towards MAXNUM.
1391 RESULTFUN defines a function used to process results before
1392 adding them to the final result list. If specified, it should
1393 accept two arguments: a key and its associated data. It's return
1394 value is what gets added to the final result list, instead of the
1395 default key-data cons cell."
1397 ;; convert trie from print-form if necessary
1398 (trie-transform-from-read-warn trie)
1399 ;; wrap prefix in a list if necessary
1400 ;; FIXME: the test for a list of prefixes, below, will fail if the
1401 ;; PREFIX sequence is a list, and the elements of PREFIX are
1402 ;; themselves lists (there might be no easy way to fully fix
1404 (if (or (atom prefix)
1405 (and (listp prefix) (not (sequencep (car prefix)))))
1406 (setq prefix (list prefix))
1407 ;; sort list of prefixes if sorting completions lexically
1408 (when (null rankfun)
1410 (sort prefix (trie-construct-sortfun
1411 (trie--comparison-function trie))))))
1413 ;; accumulate completions
1415 (declare (special accumulator))
1416 (trie--accumulate-results
1417 rankfun maxnum reverse filter resultfun accumulator nil
1419 (setq node (trie--node-find (trie--root trie) pfx
1420 (trie--lookupfun trie)))
1424 (funcall accumulator seq (trie--node-data node)))
1425 (trie--mapfun trie) node pfx
1426 (if maxnum reverse (not reverse)))))
1432 (defun trie-complete-stack (trie prefix &optional reverse)
1433 "Return an object that allows completions of PREFIX to be accessed
1434 as if they were a stack.
1436 The stack is sorted in \"lexical\" order, i.e. the order defined
1437 by TRIE's comparison function, or in reverse order if REVERSE is
1438 non-nil. Calling `trie-stack-pop' pops the top element (a key and
1439 its associated data) from the stack.
1441 PREFIX must be a sequence (vector, list or string) that forms the
1442 initial part of a TRIE key, or a list of such sequences. (If
1443 PREFIX is a string, it must be possible to apply `string' to
1444 individual elements of TRIE keys.) The completions returned in
1445 the alist will be sequences of the same type as KEY. If PREFIX is
1446 a list of sequences, completions of all sequences in the list are
1447 included in the stack. All sequences in the list must be of the
1450 Note that any modification to TRIE *immediately* invalidates all
1451 trie-stacks created before the modification (in particular,
1452 calling `trie-stack-pop' will give unpredictable results).
1454 Operations on trie-stacks are significantly more efficient than
1455 constructing a real stack from completions of PREFIX in TRIE and
1456 using standard stack functions. As such, they can be useful in
1457 implementing efficient algorithms on tries. However, in cases
1458 where `trie-complete' or `trie-complete-ordered' is sufficient,
1459 it is better to use one of those instead."
1460 ;; convert trie from print-form if necessary
1461 (trie-transform-from-read-warn trie)
1462 ;; if stack functions aren't defined for trie type, throw error
1463 (if (not (functionp (trie--stack-createfun trie)))
1464 (error "Trie type does not support stack operations")
1465 ;; otherwise, create and initialise a stack
1466 (trie--completion-stack-create trie prefix reverse)))
1469 (defun trie--completion-stack-construct-store (trie prefix reverse)
1470 ;; Construct store for completion stack based on TRIE.
1472 (if (or (atom prefix)
1474 (not (sequencep (car prefix)))))
1475 (setq prefix (list prefix))
1478 (trie-construct-sortfun
1479 (trie--comparison-function trie)
1481 (dolist (pfx prefix)
1482 (when (setq node (trie--node-find (trie--root trie) pfx
1483 (trie--lookupfun trie)))
1484 (push (cons pfx (funcall (trie--stack-createfun trie)
1485 (trie--node-subtree node)
1488 (trie--stack-repopulate
1490 (trie--comparison-function trie)
1491 (trie--lookupfun trie)
1492 (trie--stack-createfun trie)
1493 (trie--stack-popfun trie)
1494 (trie--stack-emptyfun trie))))
1499 ;; ================================================================
1502 (defun trie-regexp-search
1503 (trie regexp &optional rankfun maxnum reverse filter resultfun type)
1504 "Return an alist containing all matches for REGEXP in TRIE
1505 along with their associated data, in the order defined by
1506 RANKFUN, defauling to \"lexical\" order (i.e. the order defined
1507 by the trie's comparison function). If REVERSE is non-nil, the
1508 completions are sorted in the reverse order. Returns nil if no
1509 completions are found.
1511 REGEXP is a regular expression, but it need not necessarily be a
1512 string. It must be a sequence (vector, list of string) whose
1513 elements are either elements of the same type as elements of the
1514 trie keys (which behave as literals in the regexp), or any of the
1515 usual regexp special characters and backslash constructs. If
1516 REGEXP is a string, it must be possible to apply `string' to
1517 individual elements of the keys stored in the trie. The matches
1518 returned in the alist will be sequences of the same type as KEY.
1520 Only a subset of the full Emacs regular expression syntax is
1521 supported. There is no support for regexp constructs that are
1522 only meaningful for strings (character ranges and character
1523 classes inside character alternatives, and syntax-related
1524 backslash constructs). Back-references and non-greedy postfix
1525 operators are not supported, so `?' after a postfix operator
1526 loses its special meaning. Also, matches are always anchored, so
1527 `$' and `^' lose their special meanings (use `.*' at the
1528 beginning and end of the regexp to get an unanchored match).
1530 If the regexp contains any non-shy grouping constructs, subgroup
1531 match data is included in the results. In this case, the car of
1532 each match is no longer just a key. Instead, it is a list whose
1533 first element is the matching key, and whose remaining elements
1534 are cons cells whose cars and cdrs give the start and end indices
1535 of the elements that matched the corresponding groups, in order.
1537 The optional integer argument MAXNUM limits the results to the
1538 first MAXNUM matches. Otherwise, all matches are returned.
1540 If specified, RANKFUN must accept two arguments, both cons
1541 cells. The car contains a sequence from the trie (of the same
1542 type as PREFIX), the cdr contains its associated data. It should
1543 return non-nil if first argument is ranked strictly higher than
1544 the second, nil otherwise.
1546 The FILTER argument sets a filter function for the matches. If
1547 supplied, it is called for each possible match with two
1548 arguments: the matching key, and its associated data. If the
1549 filter function returns nil, the match is not included in the
1550 results, and does not count towards MAXNUM.
1552 RESULTFUN defines a function used to process results before
1553 adding them to the final result list. If specified, it should
1554 accept two arguments: a key and its associated data. It's return
1555 value is what gets added to the final result list, instead of the
1556 default key-data cons cell."
1558 ;; convert trie from print-form if necessary
1559 (trie-transform-from-read-warn trie)
1561 ;; massage rankfun to cope with grouping data
1562 ;; FIXME: could skip this if REGEXP contains no grouping constructs
1566 ;; if car of argument contains a key+group list rather than
1567 ;; a straight key, remove group list
1568 ;; FIXME: the test for straight key, below, will fail if
1569 ;; the key is a list, and the first element of the
1570 ;; key is itself a list (there might be no easy way
1571 ;; to fully fix this...)
1572 (unless (or (atom (car a))
1573 (and (listp (car a))
1574 (not (sequencep (caar a)))))
1575 (setq a (cons (caar a) (cdr a))))
1576 (unless (or (atom (car b))
1577 (and (listp (car b))
1578 (not (sequencep (caar b)))))
1579 (setq b (cons (caar b) (cdr b))))
1580 ;; call rankfun on massaged arguments
1583 ;; accumulate completions
1584 (declare (special accumulator))
1585 (trie--accumulate-results
1586 rankfun maxnum reverse filter resultfun accumulator nil
1587 (trie--do-regexp-search
1589 (tNFA-from-regexp regexp :test (trie--construct-equality-function
1590 (trie--comparison-function trie)))
1591 (cond ((stringp regexp) "") ((listp regexp) ()) (t [])) 0
1592 (or (and maxnum reverse) (and (not maxnum) (not reverse)))
1593 (trie--comparison-function trie)
1594 (trie--lookupfun trie)
1595 (trie--mapfun trie))))
1599 (defun trie--do-regexp-search
1600 (--trie--regexp-search--node tNFA seq pos reverse
1601 comparison-function lookupfun mapfun)
1602 ;; Search everything below the node --TRIE--REGEXP-SEARCH-NODE for
1603 ;; matches to the regexp encoded in tNFA. SEQ is the sequence
1604 ;; corresponding to NODE, POS is it's length. REVERSE is the usual
1605 ;; query argument, and the remaining arguments are the corresponding
1607 (declare (special accumulator))
1609 ;; if NFA has matched and we're accumulating in normal order, check if
1610 ;; trie contains current string
1611 (when (and (not reverse) (tNFA-match-p tNFA))
1613 (when (setq node (trie--find-data-node
1614 --trie--regexp-search--node lookupfun))
1615 (setq groups (tNFA-group-data tNFA))
1616 (funcall accumulator
1617 (if groups (cons seq groups) seq)
1618 (trie--node-data node)))))
1622 ;; ((trie--node-data-p --trie--regexp-search--node)
1623 ;; (when (tNFA-match-p tNFA)
1624 ;; (let ((groups (tNFA-group-data tNFA)))
1625 ;; (funcall accumulator
1626 ;; (if groups (cons seq groups) seq)
1627 ;; (trie--node-data --trie--regexp-search--node)))))
1629 ;; wildcard transition: map over all nodes in subtree
1630 ((tNFA-wildcard-p tNFA)
1634 (unless (trie--node-data-p node)
1635 ;; (when (tNFA-match-p tNFA)
1636 ;; (setq groups (tNFA-group-data tNFA))
1637 ;; (funcall accumulator
1638 ;; (if groups (cons seq groups) seq)
1639 ;; (trie--node-data node)))
1640 (when (setq state (tNFA-next-state
1641 tNFA (trie--node-split node) pos))
1642 (trie--do-regexp-search
1644 (trie--seq-append seq (trie--node-split node))
1645 (1+ pos) reverse comparison-function
1646 lookupfun mapfun))))
1647 (trie--node-subtree --trie--regexp-search--node)
1650 (t ;; no wildcard transition: loop over all transitions
1652 (dolist (chr (sort (tNFA-transitions tNFA)
1654 `(lambda (a b) (,comparison-function b a))
1655 comparison-function)))
1656 (when (and (setq node (trie--node-find
1657 --trie--regexp-search--node
1658 (vector chr) lookupfun))
1659 (setq state (tNFA-next-state tNFA chr pos)))
1660 (trie--do-regexp-search
1661 node state (trie--seq-append seq chr) (1+ pos)
1662 reverse comparison-function lookupfun mapfun))))))
1664 ;; if NFA has matched and we're accumulating in reverse order, check if
1665 ;; trie contains current string
1666 (when (and reverse (tNFA-match-p tNFA))
1668 (when (setq node (trie--find-data-node
1669 --trie--regexp-search--node lookupfun))
1670 (setq groups (tNFA-group-data tNFA))
1671 (funcall accumulator
1672 (if groups (cons seq groups) seq)
1673 (trie--node-data node))))))
1677 (defun trie-regexp-stack (trie regexp &optional reverse)
1678 "Return an object that allows matches to REGEXP to be accessed
1679 as if they were a stack.
1681 The stack is sorted in \"lexical\" order, i.e. the order defined
1682 by TRIE's comparison function, or in reverse order if REVERSE is
1683 non-nil. Calling `trie-stack-pop' pops the top element (a cons
1684 cell containing a key and its associated data) from the stack.
1686 REGEXP is a regular expression, but it need not necessarily be a
1687 string. It must be a sequence (vector, list of string) whose
1688 elements are either elements of the same type as elements of the
1689 trie keys (which behave as literals in the regexp), or any of the
1690 usual regexp special characters and backslash constructs. If
1691 REGEXP is a string, it must be possible to apply `string' to
1692 individual elements of the keys stored in the trie. The matches
1693 returned in the alist will be sequences of the same type as KEY.
1695 Back-references and non-greedy postfix operators are *not*
1696 supported, and the matches are always anchored, so `$' and `^'
1697 lose their special meanings.
1699 If the regexp contains any non-shy grouping constructs, subgroup
1700 match data is included in the results. In this case, the car of
1701 each match (as returned by a call to `trie-stack-pop' is no
1702 longer just a key. Instead, it is a list whose first element is
1703 the matching key, and whose remaining elements are cons cells
1704 whose cars and cdrs give the start and end indices of the
1705 elements that matched the corresponding groups, in order."
1707 ;; convert trie from print-form if necessary
1708 (trie-transform-from-read-warn trie)
1709 ;; if stack functions aren't defined for trie type, throw error
1710 (if (not (functionp (trie--stack-createfun trie)))
1711 (error "Trie type does not support stack operations")
1712 ;; otherwise, create and initialise a regexp stack
1713 (trie--regexp-stack-create trie regexp reverse)))
1716 (defun trie--regexp-stack-construct-store
1717 (trie regexp &optional reverse)
1718 ;; Construct store for regexp stack based on TRIE.
1719 (let ((seq (cond ((stringp regexp) "") ((listp regexp) ()) (t [])))
1721 (push (list seq (trie--root trie)
1723 regexp :test (trie--construct-equality-function
1724 (trie--comparison-function trie)))
1727 (trie--regexp-stack-repopulate
1729 (trie--comparison-function trie)
1730 (trie--lookupfun trie)
1731 (trie--stack-createfun trie)
1732 (trie--stack-popfun trie)
1733 (trie--stack-emptyfun trie))))
1736 (defun trie--regexp-stack-repopulate
1737 (store reverse comparison-function lookupfun
1738 stack-createfun stack-popfun stack-emptyfun)
1739 ;; Recursively push matching children of the node at the head of STORE
1740 ;; onto STORE, until a data node is reached. REVERSE is the usual
1741 ;; query argument, and the remaining arguments are the corresponding
1743 (let (state seq node pos groups n s)
1746 (setq pos (pop store)
1752 ;; if stack is empty, we're done
1755 ;; if stack element is a trie node...
1756 ((trie--node-p node)
1758 ;; matching data node: add data to the stack and we're done
1759 ((trie--node-data-p node)
1760 (when (tNFA-match-p state)
1761 (setq groups (tNFA-group-data state))
1762 (push (cons (if groups (cons groups seq) seq)
1763 (trie--node-data node))
1765 nil) ; return nil to exit loop
1767 ;; wildcard transition: add new node stack
1768 ((tNFA-wildcard-p state)
1770 (funcall stack-createfun
1771 (trie--node-subtree node) reverse)
1775 (t ;; non-wildcard transition: add all possible next nodes
1776 (dolist (chr (sort (tNFA-transitions state)
1780 (,comparison-function b a)))))
1781 (when (and (setq n (trie--node-find
1782 node (vector chr) lookupfun))
1783 (setq s (tNFA-next-state state chr pos)))
1784 (push (list (trie--seq-append seq chr) n s (1+ pos))
1786 t))) ; return t to keep looping
1788 ;; otherwise, stack element is a node stack...
1790 ;; if node stack is empty, dump it and keep repopulating
1791 (if (funcall stack-emptyfun node)
1792 t ; return t to keep looping
1793 ;; otherwise, add node stack back, and add next node from
1795 (push (list seq node state pos) store)
1796 (setq node (funcall stack-popfun node)
1797 state (tNFA-next-state state
1798 (trie--node-split node) pos))
1800 ;; matching data node: add data to the stack and we're
1802 (if (trie--node-data-p node)
1804 (push (cons seq (trie--node-data node)) store)
1805 nil) ; return nil to exit loop
1806 ;; normal node: add it to the stack and keep
1809 (trie--seq-append seq (trie--node-split node))
1810 node state (1+ pos))
1817 ;; ----------------------------------------------------------------
1818 ;; Pretty-print tries during edebug
1823 ;; We advise the `edebug-prin1' and `edebug-prin1-to-string' functions
1824 ;; (actually, aliases) so that they print "#<trie>" instead of the full
1825 ;; print form for tries.
1827 ;; This is because, if left to its own devices, edebug hangs for ages
1828 ;; whilst printing large tries, and you either have to wait for a *very*
1829 ;; long time for it to finish, or kill Emacs entirely. (Even C-g C-g
1832 ;; We do this also for lists of tries, since those occur quite often,
1833 ;; but not for other sequence types or deeper nested structures, to keep
1834 ;; the implementation as simple as possible.
1836 ;; Since the print form of a trie is practically incomprehensible
1837 ;; anyway, we don't lose much by doing this. If you *really* want to
1838 ;; print tries in full whilst edebugging, despite this warning, disable
1841 ;; FIXME: We could use `cedet-edebug-prin1-extensions' instead of advice
1842 ;; when `cedet-edebug' is loaded, though I believe the current
1843 ;; implementation still works in that case.
1851 (defun trie--edebug-pretty-print (object)
1853 ((trie-p object) "#<trie>")
1854 ((null object) "nil")
1855 ((let ((tlist object) (test t))
1856 (while (or (trie-p (car-safe tlist))
1857 (and tlist (setq test nil)))
1858 (setq tlist (cdr tlist)))
1860 (concat "(" (mapconcat (lambda (dummy) "#<trie>") object " ") ")"))
1861 ;; ((vectorp object)
1862 ;; (let ((pretty "[") (len (length object)))
1863 ;; (dotimes (i (1- len))
1866 ;; (if (trie-p (aref object i))
1867 ;; "#<trie>" (prin1-to-string (aref object i))) " ")))
1869 ;; (if (trie-p (aref object (1- len)))
1870 ;; "#<trie>" (prin1-to-string (aref object (1- len))))
1874 (defun trie--edebug-prin1 (orig object &optional printcharfun)
1875 (let ((pretty (trie--edebug-pretty-print object)))
1878 (prin1 pretty printcharfun)
1880 (funcall orig object printcharfun))))
1882 (defun trie--edebug-prin1-to-string (orig object &optional noescape)
1883 (or (trie--edebug-pretty-print object)
1884 (funcall orig object noescape)))
1886 (if (fboundp 'advice-add)
1888 (advice-add 'edebug-prin1 :around #'trie--edebug-prin1)
1889 (advice-add 'edebug-prin1-to-string
1890 :around #'trie--edebug-prin1-to-string))
1892 (when (fboundp 'ad-define-subr-args)
1893 (ad-define-subr-args 'edebug-prin1 '(object &optional printcharfun)))
1895 (defadvice edebug-prin1
1896 (around trie activate compile preactivate)
1897 (setq ad-return-value
1898 (trie--edebug-prin1 (lambda (object printcharfun) ad-do-it)
1899 object printcharfun)))
1901 (when (fboundp 'ad-define-subr-args)
1902 (ad-define-subr-args 'edebug-prin1-to-string '(object &optional noescape)))
1904 (defadvice edebug-prin1-to-string
1905 (around trie activate compile preactivate)
1906 (setq ad-return-value
1907 (trie--edebug-prin1-to-string (lambda (object noescape) ad-do-it)
1914 ;;; trie.el ends here