]> code.delx.au - gnu-emacs/blob - lisp/progmodes/cc-guess.el
Update copyright year to 2016
[gnu-emacs] / lisp / progmodes / cc-guess.el
1 ;;; cc-guess.el --- guess indentation values by scanning existing code
2
3 ;; Copyright (C) 1985, 1987, 1992-2006, 2011-2016 Free Software
4 ;; Foundation, Inc.
5
6 ;; Author: 1994-1995 Barry A. Warsaw
7 ;; 2011- Masatake YAMATO
8 ;; Maintainer: bug-cc-mode@gnu.org
9 ;; Created: August 1994, split from cc-mode.el
10 ;; Keywords: c languages oop
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software: you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation, either version 3 of the License, or
17 ;; (at your option) any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26
27 ;;; Commentary:
28 ;;
29 ;; This file contains routines that help guess the cc-mode style in a
30 ;; particular region/buffer. Here style means `c-offsets-alist' and
31 ;; `c-basic-offset'.
32 ;;
33 ;; The main entry point of this program is `c-guess' command but there
34 ;; are some variants.
35 ;;
36 ;; Suppose the major mode for the current buffer is one of the modes
37 ;; provided by cc-mode. `c-guess' guesses the indentation style by
38 ;; examining the indentation in the region between beginning of buffer
39 ;; and `c-guess-region-max'.
40
41 ;; and installs the guessed style. The name for installed style is given
42 ;; by `c-guess-style-name'.
43 ;;
44 ;; `c-guess-buffer' does the same but in the whole buffer.
45 ;; `c-guess-region' does the same but in the region between the point
46 ;; and the mark. `c-guess-no-install', `c-guess-buffer-no-install'
47 ;; and `c-guess-region-no-install' guess the indentation style but
48 ;; don't install it. You can review a guessed style with `c-guess-view'.
49 ;; After reviewing, use `c-guess-install' to install the style
50 ;; if you prefer it.
51 ;;
52 ;; If you want to reuse the guessed style in another buffer,
53 ;; run `c-set-style' command with the name of the guessed style:
54 ;; "*c-guess*:<name-of-file-which-examined-when-guessing>".
55 ;; Once the guessed style is installed explicitly with `c-guess-install'
56 ;; or implicitly with `c-guess', `c-guess-buffer', or `c-guess-region',
57 ;; a style name is given by `c-guess-style-name' with the above form.
58 ;;
59 ;; If you want to reuse the guessed style in future emacs sessions,
60 ;; you may want to put it to your .emacs. `c-guess-view' is for
61 ;; you. It emits emacs lisp code which defines the last guessed
62 ;; style, in a temporary buffer. You can put the emitted code into
63 ;; your .emacs. This command was suggested by Alan Mackenzie.
64
65 ;;; Code:
66
67 (eval-when-compile
68 (let ((load-path
69 (if (and (boundp 'byte-compile-dest-file)
70 (stringp byte-compile-dest-file))
71 (cons (file-name-directory byte-compile-dest-file) load-path)
72 load-path)))
73 (load "cc-bytecomp" nil t)))
74
75 (cc-require 'cc-defs)
76 (cc-require 'cc-engine)
77 (cc-require 'cc-styles)
78
79 \f
80
81 (defcustom c-guess-offset-threshold 10
82 "Threshold of acceptable offsets when examining indent information.
83 Discard an examined offset if its absolute value is greater than this.
84
85 The offset of a line included in the indent information returned by
86 `c-guess-basic-syntax'."
87 :version "24.1"
88 :type 'integer
89 :group 'c)
90
91 (defcustom c-guess-region-max 50000
92 "The maximum region size for examining indent information with `c-guess'.
93 It takes a long time to examine indent information from a large region;
94 this option helps you limit that time. nil means no limit."
95 :version "24.1"
96 :type 'integer
97 :group 'c)
98
99 \f
100 ;;;###autoload
101 (defvar c-guess-guessed-offsets-alist nil
102 "Currently guessed offsets-alist.")
103 ;;;###autoload
104 (defvar c-guess-guessed-basic-offset nil
105 "Currently guessed basic-offset.")
106
107 (defvar c-guess-accumulator nil)
108 ;; Accumulated examined indent information. Information is represented
109 ;; in a list. Each element in it has following structure:
110 ;;
111 ;; (syntactic-symbol ((indentation-offset1 . number-of-times1)
112 ;; (indentation-offset2 . number-of-times2)
113 ;; ...))
114 ;;
115 ;; This structure is built by `c-guess-accumulate-offset'.
116 ;;
117 ;; Here we call the pair (indentation-offset1 . number-of-times1) a
118 ;; counter. `c-guess-sort-accumulator' sorts the order of
119 ;; counters by number-of-times.
120 ;; Use `c-guess-dump-accumulator' to see the value.
121
122 (defconst c-guess-conversions
123 '((c . c-lineup-C-comments)
124 (inher-cont . c-lineup-multi-inher)
125 (string . -1000)
126 (comment-intro . c-lineup-comment)
127 (arglist-cont-nonempty . c-lineup-arglist)
128 (arglist-close . c-lineup-close-paren)
129 (cpp-macro . -1000)))
130
131
132 ;;;###autoload
133 (defun c-guess (&optional accumulate)
134 "Guess the style in the region up to `c-guess-region-max', and install it.
135
136 The style is given a name based on the file's absolute file name.
137
138 If given a prefix argument (or if the optional argument ACCUMULATE is
139 non-nil) then the previous guess is extended, otherwise a new guess is
140 made from scratch."
141 (interactive "P")
142 (c-guess-region (point-min)
143 (min (point-max) (or c-guess-region-max
144 (point-max)))
145 accumulate))
146
147 ;;;###autoload
148 (defun c-guess-no-install (&optional accumulate)
149 "Guess the style in the region up to `c-guess-region-max'; don't install it.
150
151 If given a prefix argument (or if the optional argument ACCUMULATE is
152 non-nil) then the previous guess is extended, otherwise a new guess is
153 made from scratch."
154 (interactive "P")
155 (c-guess-region-no-install (point-min)
156 (min (point-max) (or c-guess-region-max
157 (point-max)))
158 accumulate))
159
160 ;;;###autoload
161 (defun c-guess-buffer (&optional accumulate)
162 "Guess the style on the whole current buffer, and install it.
163
164 The style is given a name based on the file's absolute file name.
165
166 If given a prefix argument (or if the optional argument ACCUMULATE is
167 non-nil) then the previous guess is extended, otherwise a new guess is
168 made from scratch."
169 (interactive "P")
170 (c-guess-region (point-min)
171 (point-max)
172 accumulate))
173
174 ;;;###autoload
175 (defun c-guess-buffer-no-install (&optional accumulate)
176 "Guess the style on the whole current buffer; don't install it.
177
178 If given a prefix argument (or if the optional argument ACCUMULATE is
179 non-nil) then the previous guess is extended, otherwise a new guess is
180 made from scratch."
181 (interactive "P")
182 (c-guess-region-no-install (point-min)
183 (point-max)
184 accumulate))
185
186 ;;;###autoload
187 (defun c-guess-region (start end &optional accumulate)
188 "Guess the style on the region and install it.
189
190 The style is given a name based on the file's absolute file name.
191
192 If given a prefix argument (or if the optional argument ACCUMULATE is
193 non-nil) then the previous guess is extended, otherwise a new guess is
194 made from scratch."
195 (interactive "r\nP")
196 (c-guess-region-no-install start end accumulate)
197 (c-guess-install))
198
199
200 (defsubst c-guess-empty-line-p ()
201 (eq (line-beginning-position)
202 (line-end-position)))
203
204 ;;;###autoload
205 (defun c-guess-region-no-install (start end &optional accumulate)
206 "Guess the style on the region; don't install it.
207
208 Every line of code in the region is examined and values for the following two
209 variables are guessed:
210
211 * `c-basic-offset', and
212 * the indentation values of the various syntactic symbols in
213 `c-offsets-alist'.
214
215 The guessed values are put into `c-guess-guessed-basic-offset' and
216 `c-guess-guessed-offsets-alist'.
217
218 Frequencies of use are taken into account when guessing, so minor
219 inconsistencies in the indentation style shouldn't produce wrong guesses.
220
221 If given a prefix argument (or if the optional argument ACCUMULATE is
222 non-nil) then the previous examination is extended, otherwise a new
223 guess is made from scratch.
224
225 Note that the larger the region to guess in, the slower the guessing.
226 So you can limit the region with `c-guess-region-max'."
227 (interactive "r\nP")
228 (let ((accumulator (when accumulate c-guess-accumulator)))
229 (setq c-guess-accumulator (c-guess-examine start end accumulator))
230 (let ((pair (c-guess-guess c-guess-accumulator)))
231 (setq c-guess-guessed-basic-offset (car pair)
232 c-guess-guessed-offsets-alist (cdr pair)))))
233
234
235 (defun c-guess-examine (start end accumulator)
236 (let ((reporter (when (fboundp 'make-progress-reporter)
237 (make-progress-reporter "Examining Indentation "
238 start
239 end))))
240 (save-excursion
241 (goto-char start)
242 (while (< (point) end)
243 (unless (c-guess-empty-line-p)
244 (mapc (lambda (s)
245 (setq accumulator (or (c-guess-accumulate accumulator s)
246 accumulator)))
247 (c-save-buffer-state () (c-guess-basic-syntax))))
248 (when reporter (progress-reporter-update reporter (point)))
249 (forward-line 1)))
250 (when reporter (progress-reporter-done reporter)))
251 (c-guess-sort-accumulator accumulator))
252
253 (defun c-guess-guess (accumulator)
254 ;; Guess basic-offset and offsets-alist from ACCUMULATOR,
255 ;; then return them as a cons: (basic-offset . offsets-alist).
256 ;; See the comments at `c-guess-accumulator' about the format
257 ;; ACCUMULATOR.
258 (let* ((basic-offset (c-guess-make-basic-offset accumulator))
259 (typical-offsets-alist (c-guess-make-offsets-alist
260 accumulator))
261 (symbolic-offsets-alist (c-guess-symbolize-offsets-alist
262 typical-offsets-alist
263 basic-offset))
264 (merged-offsets-alist (c-guess-merge-offsets-alists
265 (copy-tree c-guess-conversions)
266 symbolic-offsets-alist)))
267 (cons basic-offset merged-offsets-alist)))
268
269 (defun c-guess-current-offset (relpos)
270 ;; Calculate relative indentation (point) to RELPOS.
271 (- (progn (back-to-indentation)
272 (current-column))
273 (save-excursion
274 (goto-char relpos)
275 (current-column))))
276
277 (defun c-guess-accumulate (accumulator syntax-element)
278 ;; Add SYNTAX-ELEMENT to ACCUMULATOR.
279 (let ((symbol (car syntax-element))
280 (relpos (cadr syntax-element)))
281 (when (numberp relpos)
282 (let ((offset (c-guess-current-offset relpos)))
283 (when (< (abs offset) c-guess-offset-threshold)
284 (c-guess-accumulate-offset accumulator
285 symbol
286 offset))))))
287
288 (defun c-guess-accumulate-offset (accumulator symbol offset)
289 ;; Added SYMBOL and OFFSET to ACCUMULATOR. See
290 ;; `c-guess-accumulator' about the structure of ACCUMULATOR.
291 (let* ((entry (assoc symbol accumulator))
292 (counters (cdr entry))
293 counter)
294 (if entry
295 (progn
296 (setq counter (assoc offset counters))
297 (if counter
298 (setcdr counter (1+ (cdr counter)))
299 (setq counters (cons (cons offset 1) counters))
300 (setcdr entry counters))
301 accumulator)
302 (cons (cons symbol (cons (cons offset 1) nil)) accumulator))))
303
304 (defun c-guess-sort-accumulator (accumulator)
305 ;; Sort each element of ACCUMULATOR by the number-of-times. See
306 ;; `c-guess-accumulator' for more details.
307 (mapcar
308 (lambda (entry)
309 (let ((symbol (car entry))
310 (counters (cdr entry)))
311 (cons symbol (sort counters
312 (lambda (a b)
313 (if (> (cdr a) (cdr b))
314 t
315 (and
316 (eq (cdr a) (cdr b))
317 (< (car a) (car b)))))))))
318 accumulator))
319
320 (defun c-guess-make-offsets-alist (accumulator)
321 ;; Throw away the rare cases in accumulator and make an offsets-alist structure.
322 (mapcar
323 (lambda (entry)
324 (cons (car entry)
325 (car (car (cdr entry)))))
326 accumulator))
327
328 (defun c-guess-merge-offsets-alists (strong weak)
329 ;; Merge two offsets-alists into one.
330 ;; When two offsets-alists have the same symbol
331 ;; entry, give STRONG priority over WEAK.
332 (mapc
333 (lambda (weak-elt)
334 (unless (assoc (car weak-elt) strong)
335 (setq strong (cons weak-elt strong))))
336 weak)
337 strong)
338
339 (defun c-guess-make-basic-offset (accumulator)
340 ;; As candidate for `c-basic-offset', find the most frequently appearing
341 ;; indentation-offset in ACCUMULATOR.
342 (let* (;; Drop the value related to `c' syntactic-symbol.
343 ;; (`c': Inside a multiline C style block comment.)
344 ;; The impact for values of `c' is too large for guessing
345 ;; `basic-offset' if the target source file is small and its license
346 ;; notice is at top of the file.
347 (accumulator (assq-delete-all 'c (copy-tree accumulator)))
348 ;; Drop syntactic-symbols from ACCUMULATOR.
349 (alist (apply #'append (mapcar (lambda (elts)
350 (mapcar (lambda (elt)
351 (cons (abs (car elt))
352 (cdr elt)))
353 (cdr elts)))
354 accumulator)))
355 ;; Gather all indentation-offsets other than 0.
356 ;; 0 is meaningless as `basic-offset'.
357 (offset-list (delete 0
358 (delete-dups (mapcar
359 (lambda (elt) (car elt))
360 alist))))
361 ;; Sum of number-of-times for offset:
362 ;; (offset . sum)
363 (summed (mapcar (lambda (offset)
364 (cons offset
365 (apply #'+
366 (mapcar (lambda (a)
367 (if (eq (car a) offset)
368 (cdr a)
369 0))
370 alist))))
371 offset-list)))
372 ;;
373 ;; Find the majority.
374 ;;
375 (let ((majority '(nil . 0)))
376 (while summed
377 (when (< (cdr majority) (cdr (car summed)))
378 (setq majority (car summed)))
379 (setq summed (cdr summed)))
380 (car majority))))
381
382 (defun c-guess-symbolize-offsets-alist (offsets-alist basic-offset)
383 ;; Convert the representation of OFFSETS-ALIST to an alist using
384 ;; `+', `-', `++', `--', `*', or `/'. These symbols represent
385 ;; a value relative to BASIC-OFFSET. Their meaning can be found
386 ;; in the CC Mode manual.
387 (mapcar
388 (lambda (elt)
389 (let ((s (car elt))
390 (v (cdr elt)))
391 (cond
392 ((integerp v)
393 (cons s (c-guess-symbolize-integer v
394 basic-offset)))
395 (t elt))))
396 offsets-alist))
397
398 (defun c-guess-symbolize-integer (int basic-offset)
399 (let ((aint (abs int)))
400 (cond
401 ((eq int basic-offset) '+)
402 ((eq aint basic-offset) '-)
403 ((eq int (* 2 basic-offset)) '++)
404 ((eq aint (* 2 basic-offset)) '--)
405 ((eq (* 2 int) basic-offset) '*)
406 ((eq (* 2 aint) basic-offset) '-)
407 (t int))))
408
409 (defun c-guess-style-name ()
410 ;; Make a style name for the guessed style.
411 (format "*c-guess*:%s" (buffer-file-name)))
412
413 (defun c-guess-make-style (basic-offset offsets-alist)
414 (when basic-offset
415 ;; Make a style from guessed values.
416 (let* ((offsets-alist (c-guess-merge-offsets-alists
417 offsets-alist
418 c-offsets-alist)))
419 `((c-basic-offset . ,basic-offset)
420 (c-offsets-alist . ,offsets-alist)))))
421
422 ;;;###autoload
423 (defun c-guess-install (&optional style-name)
424 "Install the latest guessed style into the current buffer.
425 \(This guessed style is a combination of `c-guess-guessed-basic-offset',
426 `c-guess-guessed-offsets-alist' and `c-offsets-alist'.)
427
428 The style is entered into CC Mode's style system by
429 `c-add-style'. Its name is either STYLE-NAME, or a name based on
430 the absolute file name of the file if STYLE-NAME is nil."
431 (interactive "sNew style name (empty for default name): ")
432 (let* ((style (c-guess-make-style c-guess-guessed-basic-offset
433 c-guess-guessed-offsets-alist)))
434 (if style
435 (let ((style-name (or (if (equal style-name "")
436 nil
437 style-name)
438 (c-guess-style-name))))
439 (c-add-style style-name style t)
440 (message "Style \"%s\" is installed" style-name))
441 (error "Not yet guessed"))))
442
443 (defun c-guess-dump-accumulator ()
444 "Show `c-guess-accumulator'."
445 (interactive)
446 (with-output-to-temp-buffer "*Accumulated Examined Indent Information*"
447 (pp c-guess-accumulator)))
448
449 (defun c-guess-reset-accumulator ()
450 "Reset `c-guess-accumulator'."
451 (interactive)
452 (setq c-guess-accumulator nil))
453
454 (defun c-guess-dump-guessed-values ()
455 "Show `c-guess-guessed-basic-offset' and `c-guess-guessed-offsets-alist'."
456 (interactive)
457 (with-output-to-temp-buffer "*Guessed Values*"
458 (princ "basic-offset: \n\t")
459 (pp c-guess-guessed-basic-offset)
460 (princ "\n\n")
461 (princ "offsets-alist: \n")
462 (pp c-guess-guessed-offsets-alist)
463 ))
464
465 (defun c-guess-dump-guessed-style (&optional printer)
466 "Show the guessed style.
467 `pp' is used to print the style but if PRINTER is given,
468 PRINTER is used instead. If PRINTER is not nil, it
469 is called with one argument, the guessed style."
470 (interactive)
471 (let ((style (c-guess-make-style c-guess-guessed-basic-offset
472 c-guess-guessed-offsets-alist)))
473 (if style
474 (with-output-to-temp-buffer "*Guessed Style*"
475 (funcall (if printer printer 'pp) style))
476 (error "Not yet guessed"))))
477
478 (defun c-guess-guessed-syntactic-symbols ()
479 ;; Return syntactic symbols in c-guess-guessed-offsets-alist
480 ;; but not in c-guess-conversions.
481 (let ((alist c-guess-guessed-offsets-alist)
482 elt
483 (symbols nil))
484 (while alist
485 (setq elt (car alist)
486 alist (cdr alist))
487 (unless (assq (car elt) c-guess-conversions)
488 (setq symbols (cons (car elt)
489 symbols))))
490 symbols))
491
492 (defun c-guess-view-reorder-offsets-alist-in-style (style guessed-syntactic-symbols)
493 ;; Reorder the `c-offsets-alist' field of STYLE.
494 ;; If an entry in `c-offsets-alist' holds a guessed value, move it to
495 ;; front in the field. In addition alphabetical sort by entry name is done.
496 (setq style (copy-tree style))
497 (let ((offsets-alist-cell (assq 'c-offsets-alist style))
498 (guessed-syntactic-symbols (c-guess-guessed-syntactic-symbols)))
499 (setcdr offsets-alist-cell
500 (sort (cdr offsets-alist-cell)
501 (lambda (a b)
502 (let ((a-guessed? (memq (car a) guessed-syntactic-symbols))
503 (b-guessed? (memq (car b) guessed-syntactic-symbols)))
504 (cond
505 ((or (and a-guessed? b-guessed?)
506 (not (or a-guessed? b-guessed?)))
507 (string-lessp (car a) (car b)))
508 (a-guessed? t)
509 (b-guessed? nil)))))))
510 style)
511
512 (defun c-guess-view-mark-guessed-entries (guessed-syntactic-symbols)
513 ;; Put " ; Guess value" markers on all entries which hold
514 ;; guessed values.
515 ;; `c-basic-offset' is always considered as holding a guessed value.
516 (let ((needs-markers (cons 'c-basic-offset
517 guessed-syntactic-symbols)))
518 (while needs-markers
519 (goto-char (point-min))
520 (when (search-forward (concat "("
521 (symbol-name (car needs-markers))
522 " ")
523 nil t)
524 (move-end-of-line 1)
525 (comment-dwim nil)
526 (insert " Guessed value"))
527 (setq needs-markers
528 (cdr needs-markers)))))
529
530 (defun c-guess-view (&optional with-name)
531 "Emit emacs lisp code which defines the last guessed style.
532 So you can put the code into .emacs if you prefer the
533 guessed code.
534 \"STYLE NAME HERE\" is used as the name for the style in the
535 emitted code. If WITH-NAME is given, it is used instead.
536 WITH-NAME is expected as a string but if this function
537 called interactively with prefix argument, the value for
538 WITH-NAME is asked to the user."
539 (interactive "P")
540 (let* ((temporary-style-name (cond
541 ((stringp with-name) with-name)
542 (with-name (read-from-minibuffer
543 "New style name: "))
544 (t
545 "STYLE NAME HERE")))
546 (guessed-style-name (c-guess-style-name))
547 (current-style-name c-indentation-style)
548 (parent-style-name (if (string-equal guessed-style-name
549 current-style-name)
550 ;; The guessed style is already installed.
551 ;; It cannot be used as the parent style.
552 ;; Use the default style for the current
553 ;; major mode as the parent style.
554 (cc-choose-style-for-mode
555 major-mode
556 c-default-style)
557 ;; The guessed style is not installed yet.
558 current-style-name)))
559 (c-guess-dump-guessed-style
560 (lambda (style)
561 (let ((guessed-syntactic-symbols (c-guess-guessed-syntactic-symbols)))
562 (pp `(c-add-style ,temporary-style-name
563 ',(cons parent-style-name
564 (c-guess-view-reorder-offsets-alist-in-style
565 style
566 guessed-syntactic-symbols))))
567 (with-current-buffer standard-output
568 (lisp-interaction-mode)
569 (c-guess-view-mark-guessed-entries
570 guessed-syntactic-symbols)
571 (buffer-enable-undo)))))))
572
573 \f
574 (cc-provide 'cc-guess)
575
576 ;; Local Variables:
577 ;; indent-tabs-mode: t
578 ;; tab-width: 8
579 ;; End:
580 ;;; cc-guess.el ends here