]> code.delx.au - gnu-emacs-elpa/blob - packages/vlf/vlf-search.el
* packages/vlf: Version 1.6. Automatically tune batch size to
[gnu-emacs-elpa] / packages / vlf / vlf-search.el
1 ;;; vlf-search.el --- Search functionality for VLF -*- lexical-binding: t -*-
2
3 ;; Copyright (C) 2014 Free Software Foundation, Inc.
4
5 ;; Keywords: large files, search
6 ;; Author: Andrey Kotlarski <m00naticus@gmail.com>
7 ;; URL: https://github.com/m00natic/vlfi
8
9 ;; This file is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; any later version.
13
14 ;; This file is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
18
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to
21 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
23
24 ;;; Commentary:
25 ;; This package provides search utilities for dealing with large files
26 ;; in constant memory.
27
28 ;;; Code:
29
30 (require 'vlf)
31
32 (defun vlf-re-search (regexp count backward batch-step
33 &optional reporter time)
34 "Search for REGEXP COUNT number of times forward or BACKWARD.
35 BATCH-STEP is amount of overlap between successive chunks.
36 Use existing REPORTER and start TIME if given.
37 Return t if search has been at least partially successful."
38 (if (<= count 0)
39 (error "Count must be positive"))
40 (run-hook-with-args 'vlf-before-batch-functions 'search)
41 (or reporter (setq reporter (make-progress-reporter
42 (concat "Searching for " regexp "...")
43 (if backward
44 (- vlf-file-size vlf-end-pos)
45 vlf-start-pos)
46 vlf-file-size)))
47 (or time (setq time (float-time)))
48 (let* ((tramp-verbose (if (boundp 'tramp-verbose)
49 (min tramp-verbose 2)))
50 (case-fold-search t)
51 (match-chunk-start vlf-start-pos)
52 (match-chunk-end vlf-end-pos)
53 (match-start-pos (+ vlf-start-pos (position-bytes (point))))
54 (match-end-pos match-start-pos)
55 (to-find count)
56 (is-hexl (derived-mode-p 'hexl-mode))
57 (tune-types (if is-hexl '(:hexl :dehexlify :insert :encode)
58 '(:insert :encode)))
59 (font-lock font-lock-mode))
60 (font-lock-mode 0)
61 (vlf-with-undo-disabled
62 (unwind-protect
63 (catch 'end-of-file
64 (if backward
65 (while (not (zerop to-find))
66 (cond ((re-search-backward regexp nil t)
67 (setq to-find (1- to-find)
68 match-chunk-start vlf-start-pos
69 match-chunk-end vlf-end-pos
70 match-start-pos (+ vlf-start-pos
71 (position-bytes
72 (match-beginning 0)))
73 match-end-pos (+ vlf-start-pos
74 (position-bytes
75 (match-end 0)))))
76 ((zerop vlf-start-pos)
77 (throw 'end-of-file nil))
78 (t (vlf-tune-batch tune-types)
79 (let ((batch-move (- vlf-start-pos
80 (- vlf-batch-size
81 batch-step))))
82 (vlf-move-to-batch
83 (if (or is-hexl
84 (<= batch-move match-start-pos))
85 batch-move
86 (- match-start-pos vlf-batch-size)) t))
87 (goto-char (if (or is-hexl
88 (<= vlf-end-pos
89 match-start-pos))
90 (point-max)
91 (or (byte-to-position
92 (- match-start-pos
93 vlf-start-pos))
94 (point-max))))
95 (progress-reporter-update
96 reporter (- vlf-file-size
97 vlf-start-pos)))))
98 (while (not (zerop to-find))
99 (cond ((re-search-forward regexp nil t)
100 (setq to-find (1- to-find)
101 match-chunk-start vlf-start-pos
102 match-chunk-end vlf-end-pos
103 match-start-pos (+ vlf-start-pos
104 (position-bytes
105 (match-beginning 0)))
106 match-end-pos (+ vlf-start-pos
107 (position-bytes
108 (match-end 0)))))
109 ((= vlf-end-pos vlf-file-size)
110 (throw 'end-of-file nil))
111 (t (vlf-tune-batch tune-types)
112 (let ((batch-move (- vlf-end-pos batch-step)))
113 (vlf-move-to-batch
114 (if (or is-hexl
115 (< match-end-pos batch-move))
116 batch-move
117 match-end-pos) t))
118 (goto-char (if (or is-hexl
119 (<= match-end-pos vlf-start-pos))
120 (point-min)
121 (or (byte-to-position
122 (- match-end-pos
123 vlf-start-pos))
124 (point-min))))
125 (progress-reporter-update reporter
126 vlf-end-pos)))))
127 (progress-reporter-done reporter))
128 (set-buffer-modified-p nil)
129 (if is-hexl (vlf-tune-hexlify))
130 (if font-lock (font-lock-mode 1))
131 (let ((result
132 (if backward
133 (vlf-goto-match match-chunk-start match-chunk-end
134 match-end-pos match-start-pos
135 count to-find time)
136 (vlf-goto-match match-chunk-start match-chunk-end
137 match-start-pos match-end-pos
138 count to-find time))))
139 (run-hook-with-args 'vlf-after-batch-functions 'search)
140 result)))))
141
142 (defun vlf-goto-match (match-chunk-start match-chunk-end
143 match-pos-start match-pos-end
144 count to-find time)
145 "Move to MATCH-CHUNK-START MATCH-CHUNK-END surrounding\
146 MATCH-POS-START and MATCH-POS-END.
147 According to COUNT and left TO-FIND, show if search has been
148 successful. Use start TIME to report how much it took.
149 Return nil if nothing found."
150 (if (= count to-find)
151 (progn (vlf-move-to-chunk match-chunk-start match-chunk-end)
152 (goto-char (or (byte-to-position (- match-pos-start
153 vlf-start-pos))
154 (point-max)))
155 (message "Not found (%f secs)" (- (float-time) time))
156 nil)
157 (let ((success (zerop to-find)))
158 (if success
159 (vlf-update-buffer-name)
160 (vlf-move-to-chunk match-chunk-start match-chunk-end))
161 (setq vlf-batch-size (vlf-tune-optimal-load
162 (if (derived-mode-p 'hexl-mode)
163 '(:hexl :dehexlify :insert :encode)
164 '(:insert :encode))))
165 (let* ((match-end (or (byte-to-position (- match-pos-end
166 vlf-start-pos))
167 (point-max)))
168 (overlay (make-overlay (byte-to-position
169 (- match-pos-start
170 vlf-start-pos))
171 match-end)))
172 (overlay-put overlay 'face 'match)
173 (if success
174 (message "Match found (%f secs)" (- (float-time) time))
175 (goto-char match-end)
176 (message "Moved to the %d match which is last (%f secs)"
177 (- count to-find) (- (float-time) time)))
178 (unwind-protect (sit-for 3)
179 (delete-overlay overlay))
180 t))))
181
182 (defun vlf-re-search-forward (regexp count)
183 "Search forward for REGEXP prefix COUNT number of times.
184 Search is performed chunk by chunk in `vlf-batch-size' memory."
185 (interactive (if (vlf-no-modifications)
186 (list (read-regexp "Search whole file"
187 (if regexp-history
188 (car regexp-history)))
189 (or current-prefix-arg 1))))
190 (let ((batch-size vlf-batch-size))
191 (or (vlf-re-search regexp count nil (min 1024 (/ vlf-batch-size 8)))
192 (setq vlf-batch-size batch-size))))
193
194 (defun vlf-re-search-backward (regexp count)
195 "Search backward for REGEXP prefix COUNT number of times.
196 Search is performed chunk by chunk in `vlf-batch-size' memory."
197 (interactive (if (vlf-no-modifications)
198 (list (read-regexp "Search whole file backward"
199 (if regexp-history
200 (car regexp-history)))
201 (or current-prefix-arg 1))))
202 (let ((batch-size vlf-batch-size))
203 (or (vlf-re-search regexp count t (min 1024 (/ vlf-batch-size 8)))
204 (setq vlf-batch-size batch-size))))
205
206 (defun vlf-goto-line (n)
207 "Go to line N. If N is negative, count from the end of file."
208 (interactive (if (vlf-no-modifications)
209 (list (read-number "Go to line: "))))
210 (run-hook-with-args 'vlf-before-batch-functions 'goto-line)
211 (vlf-verify-size)
212 (let ((tramp-verbose (if (boundp 'tramp-verbose)
213 (min tramp-verbose 2)))
214 (start-pos vlf-start-pos)
215 (end-pos vlf-end-pos)
216 (batch-size vlf-batch-size)
217 (pos (point))
218 (is-hexl (derived-mode-p 'hexl-mode))
219 (font-lock font-lock-mode)
220 (time (float-time))
221 (success nil))
222 (font-lock-mode 0)
223 (vlf-tune-batch '(:raw))
224 (unwind-protect
225 (if (< 0 n)
226 (let ((start 0)
227 (end (min vlf-batch-size vlf-file-size))
228 (reporter (make-progress-reporter
229 (concat "Searching for line "
230 (number-to-string n) "...")
231 0 vlf-file-size))
232 (inhibit-read-only t))
233 (setq n (1- n))
234 (vlf-with-undo-disabled
235 (or is-hexl
236 (while (and (< (- end start) n)
237 (< n (- vlf-file-size start)))
238 (erase-buffer)
239 (vlf-tune-insert-file-contents-literally start end)
240 (goto-char (point-min))
241 (while (re-search-forward "[\n\C-m]" nil t)
242 (setq n (1- n)))
243 (vlf-verify-size)
244 (vlf-tune-batch '(:raw))
245 (setq start end
246 end (min vlf-file-size
247 (+ start vlf-batch-size)))
248 (progress-reporter-update reporter start)))
249 (when (< n (- vlf-file-size end))
250 (vlf-tune-batch (if is-hexl
251 '(:hexl :dehexlify :insert :encode)
252 '(:insert :encode)))
253 (vlf-move-to-chunk-2 start (+ start vlf-batch-size))
254 (goto-char (point-min))
255 (setq success (vlf-re-search "[\n\C-m]" n nil 0
256 reporter time)))))
257 (let ((start (max 0 (- vlf-file-size vlf-batch-size)))
258 (end vlf-file-size)
259 (reporter (make-progress-reporter
260 (concat "Searching for line -"
261 (number-to-string n) "...")
262 0 vlf-file-size))
263 (inhibit-read-only t))
264 (setq n (- n))
265 (vlf-with-undo-disabled
266 (or is-hexl
267 (while (and (< (- end start) n) (< n end))
268 (erase-buffer)
269 (vlf-tune-insert-file-contents-literally start end)
270 (goto-char (point-max))
271 (while (re-search-backward "[\n\C-m]" nil t)
272 (setq n (1- n)))
273 (vlf-tune-batch '(:raw))
274 (setq end start
275 start (max 0 (- end vlf-batch-size)))
276 (progress-reporter-update reporter
277 (- vlf-file-size end))))
278 (when (< n end)
279 (vlf-tune-batch (if is-hexl
280 '(:hexl :dehexlify :insert :encode)
281 '(:insert :encode)))
282 (vlf-move-to-chunk-2 (- end vlf-batch-size) end)
283 (goto-char (point-max))
284 (setq success (vlf-re-search "[\n\C-m]" n t 0
285 reporter time))))))
286 (if font-lock (font-lock-mode 1))
287 (unless success
288 (vlf-with-undo-disabled
289 (vlf-move-to-chunk-2 start-pos end-pos))
290 (vlf-update-buffer-name)
291 (goto-char pos)
292 (setq vlf-batch-size batch-size)
293 (message "Unable to find line"))
294 (run-hook-with-args 'vlf-after-batch-functions 'goto-line))))
295
296 (provide 'vlf-search)
297
298 ;;; vlf-search.el ends here