]> code.delx.au - gnu-emacs/blob - lisp/progmodes/cc-guess.el
Add 2012 to FSF copyright years for Emacs files
[gnu-emacs] / lisp / progmodes / cc-guess.el
1 ;;; cc-guess.el --- guess indentation values by scanning existing code
2
3 ;; Copyright (C) 1985, 1987, 1992-2006, 2011-2012
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: 1994-1995 Barry A. Warsaw
7 ;; 2011- Masatake YAMATO
8 ;; Maintainer: bug-cc-mode@gnu.org
9 ;; Created: August 1994, split from cc-mode.el
10 ;; Version: See cc-mode.el
11 ;; Keywords: c languages oop
12
13 ;; This file is part of GNU Emacs.
14
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
19
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
24
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27
28 ;;; Commentary:
29 ;;
30 ;; This file contains routines that help guess the cc-mode style in a
31 ;; particular region/buffer. Here style means `c-offsets-alist' and
32 ;; `c-basic-offset'.
33 ;;
34 ;; The main entry point of this program is `c-guess' command but there
35 ;; are some variants.
36 ;;
37 ;; Suppose the major mode for the current buffer is one of the modes
38 ;; provided by cc-mode. `c-guess' guesses the indentation style by
39 ;; examining the indentation in the region between beginning of buffer
40 ;; and `c-guess-region-max'.
41
42 ;; and installs the guessed style. The name for installed style is given
43 ;; by `c-guess-style-name'.
44 ;;
45 ;; `c-guess-buffer' does the same but in the whole buffer.
46 ;; `c-guess-region' does the same but in the region between the point
47 ;; and the mark. `c-guess-no-install', `c-guess-buffer-no-install'
48 ;; and `c-guess-region-no-install' guess the indentation style but
49 ;; don't install it. You can review a guessed style with `c-guess-view'.
50 ;; After reviewing, use `c-guess-install' to install the style
51 ;; if you prefer it.
52 ;;
53 ;; If you want to reuse the guessed style in another buffer,
54 ;; run `c-set-style' command with the name of the guessed style:
55 ;; "*c-guess*:<name-of-file-which-examined-when-guessing>".
56 ;; Once the guessed style is installed explicitly with `c-guess-install'
57 ;; or implicitly with `c-guess', `c-guess-buffer', or `c-guess-region',
58 ;; a style name is given by `c-guess-style-name' with the above form.
59 ;;
60 ;; If you want to reuse the guessed style in future emacs sessions,
61 ;; you may want to put it to your .emacs. `c-guess-view' is for
62 ;; you. It emits emacs lisp code which defines the last guessed
63 ;; style, in a temporary buffer. You can put the emitted code into
64 ;; your .emacs. This command was suggested by Alan Mackenzie.
65
66 ;;; Code:
67
68 (eval-when-compile
69 (let ((load-path
70 (if (and (boundp 'byte-compile-dest-file)
71 (stringp byte-compile-dest-file))
72 (cons (file-name-directory byte-compile-dest-file) load-path)
73 load-path)))
74 (load "cc-bytecomp" nil t)))
75
76 (cc-require 'cc-defs)
77 (cc-require 'cc-engine)
78 (cc-require 'cc-styles)
79
80 \f
81
82 (defcustom c-guess-offset-threshold 10
83 "Threshold of acceptable offsets when examining indent information.
84 Discard an examined offset if its absolute value is greater than this.
85
86 The offset of a line included in the indent information returned by
87 `c-guess-basic-syntax'."
88 :type 'integer
89 :group 'c)
90
91 (defcustom c-guess-region-max 50000
92 "The maximum region size for examining indent information with `c-guess'.
93 It takes a long time to examine indent information from a large region;
94 this option helps you limit that time. `nil' means no limit."
95 :type 'integer
96 :group 'c)
97
98 \f
99 ;;;###autoload
100 (defvar c-guess-guessed-offsets-alist nil
101 "Currently guessed offsets-alist.")
102 ;;;###autoload
103 (defvar c-guess-guessed-basic-offset nil
104 "Currently guessed basic-offset.")
105
106 (defvar c-guess-accumulator nil)
107 ;; Accumulated examined indent information. Information is represented
108 ;; in a list. Each element in it has following structure:
109 ;;
110 ;; (syntactic-symbol ((indentation-offset1 . number-of-times1)
111 ;; (indentation-offset2 . number-of-times2)
112 ;; ...))
113 ;;
114 ;; This structure is built by `c-guess-accumulate-offset'.
115 ;;
116 ;; Here we call the pair (indentation-offset1 . number-of-times1) a
117 ;; counter. `c-guess-sort-accumulator' sorts the order of
118 ;; counters by number-of-times.
119 ;; Use `c-guess-dump-accumulator' to see the value.
120
121 (defconst c-guess-conversions
122 '((c . c-lineup-C-comments)
123 (inher-cont . c-lineup-multi-inher)
124 (string . -1000)
125 (comment-intro . c-lineup-comment)
126 (arglist-cont-nonempty . c-lineup-arglist)
127 (arglist-close . c-lineup-close-paren)
128 (cpp-macro . -1000)))
129
130
131 ;;;###autoload
132 (defun c-guess (&optional accumulate)
133 "Guess the style in the region up to `c-guess-region-max', and install it.
134
135 The style is given a name based on the file's absolute file name.
136
137 If given a prefix argument (or if the optional argument ACCUMULATE is
138 non-nil) then the previous guess is extended, otherwise a new guess is
139 made from scratch."
140 (interactive "P")
141 (c-guess-region (point-min)
142 (min (point-max) (or c-guess-region-max
143 (point-max)))
144 accumulate))
145
146 ;;;###autoload
147 (defun c-guess-no-install (&optional accumulate)
148 "Guess the style in the region up to `c-guess-region-max'; don't install it.
149
150 If given a prefix argument (or if the optional argument ACCUMULATE is
151 non-nil) then the previous guess is extended, otherwise a new guess is
152 made from scratch."
153 (interactive "P")
154 (c-guess-region-no-install (point-min)
155 (min (point-max) (or c-guess-region-max
156 (point-max)))
157 accumulate))
158
159 ;;;###autoload
160 (defun c-guess-buffer (&optional accumulate)
161 "Guess the style on the whole current buffer, and install it.
162
163 The style is given a name based on the file's absolute file name.
164
165 If given a prefix argument (or if the optional argument ACCUMULATE is
166 non-nil) then the previous guess is extended, otherwise a new guess is
167 made from scratch."
168 (interactive "P")
169 (c-guess-region (point-min)
170 (point-max)
171 accumulate))
172
173 ;;;###autoload
174 (defun c-guess-buffer-no-install (&optional accumulate)
175 "Guess the style on the whole current buffer; don't install it.
176
177 If given a prefix argument (or if the optional argument ACCUMULATE is
178 non-nil) then the previous guess is extended, otherwise a new guess is
179 made from scratch."
180 (interactive "P")
181 (c-guess-region-no-install (point-min)
182 (point-max)
183 accumulate))
184
185 ;;;###autoload
186 (defun c-guess-region (start end &optional accumulate)
187 "Guess the style on the region and install it.
188
189 The style is given a name based on the file's absolute file name.
190
191 If given a prefix argument (or if the optional argument ACCUMULATE is
192 non-nil) then the previous guess is extended, otherwise a new guess is
193 made from scratch."
194 (interactive "r\nP")
195 (c-guess-region-no-install start end accumulate)
196 (c-guess-install))
197
198
199 (defsubst c-guess-empty-line-p ()
200 (eq (line-beginning-position)
201 (line-end-position)))
202
203 ;;;###autoload
204 (defun c-guess-region-no-install (start end &optional accumulate)
205 "Guess the style on the region; don't install it.
206
207 Every line of code in the region is examined and values for the following two
208 variables are guessed:
209
210 * `c-basic-offset', and
211 * the indentation values of the various syntactic symbols in
212 `c-offsets-alist'.
213
214 The guessed values are put into `c-guess-guessed-basic-offset' and
215 `c-guess-guessed-offsets-alist'.
216
217 Frequencies of use are taken into account when guessing, so minor
218 inconsistencies in the indentation style shouldn't produce wrong guesses.
219
220 If given a prefix argument (or if the optional argument ACCUMULATE is
221 non-nil) then the previous examination is extended, otherwise a new
222 guess is made from scratch.
223
224 Note that the larger the region to guess in, the slower the guessing.
225 So you can limit the region with `c-guess-region-max'."
226 (interactive "r\nP")
227 (let ((accumulator (when accumulate c-guess-accumulator)))
228 (setq c-guess-accumulator (c-guess-examine start end accumulator))
229 (let ((pair (c-guess-guess c-guess-accumulator)))
230 (setq c-guess-guessed-basic-offset (car pair)
231 c-guess-guessed-offsets-alist (cdr pair)))))
232
233
234 (defun c-guess-examine (start end accumulator)
235 (let ((reporter (when (fboundp 'make-progress-reporter)
236 (make-progress-reporter "Examining Indentation "
237 start
238 end))))
239 (save-excursion
240 (goto-char start)
241 (while (< (point) end)
242 (unless (c-guess-empty-line-p)
243 (mapc (lambda (s)
244 (setq accumulator (or (c-guess-accumulate accumulator s)
245 accumulator)))
246 (c-save-buffer-state () (c-guess-basic-syntax))))
247 (when reporter (progress-reporter-update reporter (point)))
248 (forward-line 1)))
249 (when reporter (progress-reporter-done reporter)))
250 (c-guess-sort-accumulator accumulator))
251
252 (defun c-guess-guess (accumulator)
253 ;; Guess basic-offset and offsets-alist from ACCUMULATOR,
254 ;; then return them as a cons: (basic-offset . offsets-alist).
255 ;; See the comments at `c-guess-accumulator' about the format
256 ;; ACCUMULATOR.
257 (let* ((basic-offset (c-guess-make-basic-offset accumulator))
258 (typical-offsets-alist (c-guess-make-offsets-alist
259 accumulator))
260 (symbolic-offsets-alist (c-guess-symbolize-offsets-alist
261 typical-offsets-alist
262 basic-offset))
263 (merged-offsets-alist (c-guess-merge-offsets-alists
264 (copy-tree c-guess-conversions)
265 symbolic-offsets-alist)))
266 (cons basic-offset merged-offsets-alist)))
267
268 (defun c-guess-current-offset (relpos)
269 ;; Calculate relative indentation (point) to RELPOS.
270 (- (progn (back-to-indentation)
271 (current-column))
272 (save-excursion
273 (goto-char relpos)
274 (current-column))))
275
276 (defun c-guess-accumulate (accumulator syntax-element)
277 ;; Add SYNTAX-ELEMENT to ACCUMULATOR.
278 (let ((symbol (car syntax-element))
279 (relpos (cadr syntax-element)))
280 (when (numberp relpos)
281 (let ((offset (c-guess-current-offset relpos)))
282 (when (< (abs offset) c-guess-offset-threshold)
283 (c-guess-accumulate-offset accumulator
284 symbol
285 offset))))))
286
287 (defun c-guess-accumulate-offset (accumulator symbol offset)
288 ;; Added SYMBOL and OFFSET to ACCUMULATOR. See
289 ;; `c-guess-accumulator' about the structure of ACCUMULATOR.
290 (let* ((entry (assoc symbol accumulator))
291 (counters (cdr entry))
292 counter)
293 (if entry
294 (progn
295 (setq counter (assoc offset counters))
296 (if counter
297 (setcdr counter (1+ (cdr counter)))
298 (setq counters (cons (cons offset 1) counters))
299 (setcdr entry counters))
300 accumulator)
301 (cons (cons symbol (cons (cons offset 1) nil)) accumulator))))
302
303 (defun c-guess-sort-accumulator (accumulator)
304 ;; Sort each element of ACCUMULATOR by the number-of-times. See
305 ;; `c-guess-accumulator' for more details.
306 (mapcar
307 (lambda (entry)
308 (let ((symbol (car entry))
309 (counters (cdr entry)))
310 (cons symbol (sort counters
311 (lambda (a b)
312 (if (> (cdr a) (cdr b))
313 t
314 (and
315 (eq (cdr a) (cdr b))
316 (< (car a) (car b)))))))))
317 accumulator))
318
319 (defun c-guess-make-offsets-alist (accumulator)
320 ;; Throw away the rare cases in accumulator and make an offsets-alist structure.
321 (mapcar
322 (lambda (entry)
323 (cons (car entry)
324 (car (car (cdr entry)))))
325 accumulator))
326
327 (defun c-guess-merge-offsets-alists (strong weak)
328 ;; Merge two offsets-alists into one.
329 ;; When two offsets-alists have the same symbol
330 ;; entry, give STRONG priority over WEAK.
331 (mapc
332 (lambda (weak-elt)
333 (unless (assoc (car weak-elt) strong)
334 (setq strong (cons weak-elt strong))))
335 weak)
336 strong)
337
338 (defun c-guess-make-basic-offset (accumulator)
339 ;; As candidate for `c-basic-offset', find the most frequently appearing
340 ;; indentation-offset in ACCUMULATOR.
341 (let* (;; Drop the value related to `c' syntactic-symbol.
342 ;; (`c': Inside a multiline C style block comment.)
343 ;; The impact for values of `c' is too large for guessing
344 ;; `basic-offset' if the target source file is small and its license
345 ;; notice is at top of the file.
346 (accumulator (assq-delete-all 'c (copy-tree accumulator)))
347 ;; Drop syntactic-symbols from ACCUMULATOR.
348 (alist (apply #'append (mapcar (lambda (elts)
349 (mapcar (lambda (elt)
350 (cons (abs (car elt))
351 (cdr elt)))
352 (cdr elts)))
353 accumulator)))
354 ;; Gather all indentation-offsets other than 0.
355 ;; 0 is meaningless as `basic-offset'.
356 (offset-list (delete 0
357 (delete-dups (mapcar
358 (lambda (elt) (car elt))
359 alist))))
360 ;; Sum of number-of-times for offset:
361 ;; (offset . sum)
362 (summed (mapcar (lambda (offset)
363 (cons offset
364 (apply #'+
365 (mapcar (lambda (a)
366 (if (eq (car a) offset)
367 (cdr a)
368 0))
369 alist))))
370 offset-list)))
371 ;;
372 ;; Find the majority.
373 ;;
374 (let ((majority '(nil . 0)))
375 (while summed
376 (when (< (cdr majority) (cdr (car summed)))
377 (setq majority (car summed)))
378 (setq summed (cdr summed)))
379 (car majority))))
380
381 (defun c-guess-symbolize-offsets-alist (offsets-alist basic-offset)
382 ;; Convert the representation of OFFSETS-ALIST to an alist using
383 ;; `+', `-', `++', `--', `*', or `/'. These symbols represent
384 ;; a value relative to BASIC-OFFSET. Their meaning can be found
385 ;; in the CC Mode manual.
386 (mapcar
387 (lambda (elt)
388 (let ((s (car elt))
389 (v (cdr elt)))
390 (cond
391 ((integerp v)
392 (cons s (c-guess-symbolize-integer v
393 basic-offset)))
394 (t elt))))
395 offsets-alist))
396
397 (defun c-guess-symbolize-integer (int basic-offset)
398 (let ((aint (abs int)))
399 (cond
400 ((eq int basic-offset) '+)
401 ((eq aint basic-offset) '-)
402 ((eq int (* 2 basic-offset)) '++)
403 ((eq aint (* 2 basic-offset)) '--)
404 ((eq (* 2 int) basic-offset) '*)
405 ((eq (* 2 aint) basic-offset) '-)
406 (t int))))
407
408 (defun c-guess-style-name ()
409 ;; Make a style name for the guessed style.
410 (format "*c-guess*:%s" (buffer-file-name)))
411
412 (defun c-guess-make-style (basic-offset offsets-alist)
413 (when basic-offset
414 ;; Make a style from guessed values.
415 (let* ((offsets-alist (c-guess-merge-offsets-alists
416 offsets-alist
417 c-offsets-alist)))
418 `((c-basic-offset . ,basic-offset)
419 (c-offsets-alist . ,offsets-alist)))))
420
421 ;;;###autoload
422 (defun c-guess-install (&optional style-name)
423 "Install the latest guessed style into the current buffer.
424 \(This guessed style is a combination of `c-guess-guessed-basic-offset',
425 `c-guess-guessed-offsets-alist' and `c-offsets-alist'.)
426
427 The style is entered into CC Mode's style system by
428 `c-add-style'. Its name is either STYLE-NAME, or a name based on
429 the absolute file name of the file if STYLE-NAME is nil."
430 (interactive "sNew style name (empty for default name): ")
431 (let* ((style (c-guess-make-style c-guess-guessed-basic-offset
432 c-guess-guessed-offsets-alist)))
433 (if style
434 (let ((style-name (or (if (equal style-name "")
435 nil
436 style-name)
437 (c-guess-style-name))))
438 (c-add-style style-name style t)
439 (message "Style \"%s\" is installed" style-name))
440 (error "Not yet guessed"))))
441
442 (defun c-guess-dump-accumulator ()
443 "Show `c-guess-accumulator'."
444 (interactive)
445 (with-output-to-temp-buffer "*Accumulated Examined Indent Information*"
446 (pp c-guess-accumulator)))
447
448 (defun c-guess-reset-accumulator ()
449 "Reset `c-guess-accumulator'."
450 (interactive)
451 (setq c-guess-accumulator nil))
452
453 (defun c-guess-dump-guessed-values ()
454 "Show `c-guess-guessed-basic-offset' and `c-guess-guessed-offsets-alist'."
455 (interactive)
456 (with-output-to-temp-buffer "*Guessed Values*"
457 (princ "basic-offset: \n\t")
458 (pp c-guess-guessed-basic-offset)
459 (princ "\n\n")
460 (princ "offsets-alist: \n")
461 (pp c-guess-guessed-offsets-alist)
462 ))
463
464 (defun c-guess-dump-guessed-style (&optional printer)
465 "Show the guessed style.
466 `pp' is used to print the style but if PRINTER is given,
467 PRINTER is used instead. If PRINTER is not `nil', it
468 is called with one argument, the guessed style."
469 (interactive)
470 (let ((style (c-guess-make-style c-guess-guessed-basic-offset
471 c-guess-guessed-offsets-alist)))
472 (if style
473 (with-output-to-temp-buffer "*Guessed Style*"
474 (funcall (if printer printer 'pp) style))
475 (error "Not yet guessed"))))
476
477 (defun c-guess-guessed-syntactic-symbols ()
478 ;; Return syntactic symbols in c-guess-guessed-offsets-alist
479 ;; but not in c-guess-conversions.
480 (let ((alist c-guess-guessed-offsets-alist)
481 elt
482 (symbols nil))
483 (while alist
484 (setq elt (car alist)
485 alist (cdr alist))
486 (unless (assq (car elt) c-guess-conversions)
487 (setq symbols (cons (car elt)
488 symbols))))
489 symbols))
490
491 (defun c-guess-view-reorder-offsets-alist-in-style (style guessed-syntactic-symbols)
492 ;; Reorder the `c-offsets-alist' field of STYLE.
493 ;; If an entry in `c-offsets-alist' holds a guessed value, move it to
494 ;; front in the field. In addition alphabetical sort by entry name is done.
495 (setq style (copy-tree style))
496 (let ((offsets-alist-cell (assq 'c-offsets-alist style))
497 (guessed-syntactic-symbols (c-guess-guessed-syntactic-symbols)))
498 (setcdr offsets-alist-cell
499 (sort (cdr offsets-alist-cell)
500 (lambda (a b)
501 (let ((a-guessed? (memq (car a) guessed-syntactic-symbols))
502 (b-guessed? (memq (car b) guessed-syntactic-symbols)))
503 (cond
504 ((or (and a-guessed? b-guessed?)
505 (not (or a-guessed? b-guessed?)))
506 (string-lessp (symbol-name (car a))
507 (symbol-name (car b))))
508 (a-guessed? t)
509 (b-guessed? nil)))))))
510 style)
511
512 (defun c-guess-view-mark-guessed-entries (guessed-syntactic-symbols)
513 ;; Put " ; Guess value" markers on all entries which hold
514 ;; guessed values.
515 ;; `c-basic-offset' is always considered as holding a guessed value.
516 (let ((needs-markers (cons 'c-basic-offset
517 guessed-syntactic-symbols)))
518 (while needs-markers
519 (goto-char (point-min))
520 (when (search-forward (concat "("
521 (symbol-name (car needs-markers))
522 " ") nil t)
523 (move-end-of-line 1)
524 (comment-dwim nil)
525 (insert " Guessed value"))
526 (setq needs-markers
527 (cdr needs-markers)))))
528
529 (defun c-guess-view (&optional with-name)
530 "Emit emacs lisp code which defines the last guessed style.
531 So you can put the code into .emacs if you prefer the
532 guessed code.
533 \"STYLE NAME HERE\" is used as the name for the style in the
534 emitted code. If WITH-NAME is given, it is used instead.
535 WITH-NAME is expected as a string but if this function
536 called interactively with prefix argument, the value for
537 WITH-NAME is asked to the user."
538 (interactive "P")
539 (let* ((temporary-style-name (cond
540 ((stringp with-name) with-name)
541 (with-name (read-from-minibuffer
542 "New style name: "))
543 (t
544 "STYLE NAME HERE")))
545 (guessed-style-name (c-guess-style-name))
546 (current-style-name c-indentation-style)
547 (parent-style-name (if (string-equal guessed-style-name
548 current-style-name)
549 ;; The guessed style is already installed.
550 ;; It cannot be used as the parent style.
551 ;; Use the default style for the current
552 ;; major mode as the parent style.
553 (cc-choose-style-for-mode
554 major-mode
555 c-default-style)
556 ;; The guessed style is not installed yet.
557 current-style-name)))
558 (c-guess-dump-guessed-style
559 (lambda (style)
560 (let ((guessed-syntactic-symbols (c-guess-guessed-syntactic-symbols)))
561 (pp `(c-add-style ,temporary-style-name
562 ',(cons parent-style-name
563 (c-guess-view-reorder-offsets-alist-in-style
564 style
565 guessed-syntactic-symbols))))
566 (with-current-buffer standard-output
567 (lisp-interaction-mode)
568 (c-guess-view-mark-guessed-entries
569 guessed-syntactic-symbols)
570 (buffer-enable-undo)))))))
571
572 \f
573 (cc-provide 'cc-guess)
574 ;;; cc-guess.el ends here