]> code.delx.au - gnu-emacs/blob - lisp/cedet/semantic/wisent/python.el
Add 2012 to FSF copyright years for Emacs files
[gnu-emacs] / lisp / cedet / semantic / wisent / python.el
1 ;;; wisent-python.el --- Semantic support for Python
2
3 ;; Copyright (C) 2002, 2004, 2006-2012 Free Software Foundation, Inc.
4
5 ;; Author: Richard Kim <emacs18@gmail.com>
6 ;; Maintainer: Richard Kim <emacs18@gmail.com>
7 ;; Created: June 2002
8 ;; Keywords: syntax
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24
25 ;;; Commentary:
26 ;;
27 ;; Parser support for Python.
28
29 ;;; Code:
30
31 (require 'semantic/wisent)
32 (require 'semantic/wisent/python-wy)
33 (require 'semantic/dep)
34 (require 'semantic/ctxt)
35
36 \f
37 ;;; Lexical analysis
38 ;;
39
40 ;; Python strings are delimited by either single quotes or double
41 ;; quotes, e.g., "I'm a string" and 'I too am s string'.
42 ;; In addition a string can have either a 'r' and/or 'u' prefix.
43 ;; The 'r' prefix means raw, i.e., normal backslash substitutions are
44 ;; to be suppressed. For example, r"01\n34" is a string with six
45 ;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following
46 ;; string is Unicode.
47 (defconst wisent-python-string-re
48 (concat (regexp-opt '("r" "u" "ur" "R" "U" "UR" "Ur" "uR") t)
49 "?['\"]")
50 "Regexp matching beginning of a Python string.")
51
52 (defvar wisent-python-EXPANDING-block nil
53 "Non-nil when expanding a paren block for Python lexical analyzer.")
54
55 (defun wisent-python-implicit-line-joining-p ()
56 "Return non-nil if implicit line joining is active.
57 That is, if inside an expression in parentheses, square brackets or
58 curly braces."
59 wisent-python-EXPANDING-block)
60
61 (defsubst wisent-python-forward-string ()
62 "Move point at the end of the Python string at point."
63 (when (looking-at wisent-python-string-re)
64 ;; skip the prefix
65 (and (match-end 1) (goto-char (match-end 1)))
66 ;; skip the quoted part
67 (cond
68 ((looking-at "\"\"\"[^\"]")
69 (search-forward "\"\"\"" nil nil 2))
70 ((looking-at "'''[^']")
71 (search-forward "'''" nil nil 2))
72 ((forward-sexp 1)))))
73
74 (defun wisent-python-forward-line ()
75 "Move point to the beginning of the next logical line.
76 Usually this is simply the next physical line unless strings,
77 implicit/explicit line continuation, blank lines, or comment lines are
78 encountered. This function skips over such items so that the point is
79 at the beginning of the next logical line. If the current logical
80 line ends at the end of the buffer, leave the point there."
81 (while (not (eolp))
82 (when (= (point)
83 (progn
84 (cond
85 ;; Skip over python strings.
86 ((looking-at wisent-python-string-re)
87 (wisent-python-forward-string))
88 ;; At a comment start just goto end of line.
89 ((looking-at "\\s<")
90 (end-of-line))
91 ;; Skip over generic lists and strings.
92 ((looking-at "\\(\\s(\\|\\s\"\\)")
93 (forward-sexp 1))
94 ;; At the explicit line continuation character
95 ;; (backslash) move to next line.
96 ((looking-at "\\s\\")
97 (forward-line 1))
98 ;; Skip over white space, word, symbol, punctuation,
99 ;; and paired delimiter (backquote) characters.
100 ((skip-syntax-forward "-w_.$)")))
101 (point)))
102 (error "python-forward-line endless loop detected")))
103 ;; The point is at eol, skip blank and comment lines.
104 (forward-comment (point-max))
105 ;; Goto the beginning of the next line.
106 (or (eobp) (beginning-of-line)))
107
108 (defun wisent-python-forward-line-skip-indented ()
109 "Move point to the next logical line, skipping indented lines.
110 That is the next line whose indentation is less than or equal to the
111 indentation of the current line."
112 (let ((indent (current-indentation)))
113 (while (progn (wisent-python-forward-line)
114 (and (not (eobp))
115 (> (current-indentation) indent))))))
116
117 (defun wisent-python-end-of-block ()
118 "Move point to the end of the current block."
119 (let ((indent (current-indentation)))
120 (while (and (not (eobp)) (>= (current-indentation) indent))
121 (wisent-python-forward-line-skip-indented))
122 ;; Don't include final comments in current block bounds
123 (forward-comment (- (point-max)))
124 (or (bolp) (forward-line 1))
125 ))
126
127 ;; Indentation stack, what the Python (2.3) language spec. says:
128 ;;
129 ;; The indentation levels of consecutive lines are used to generate
130 ;; INDENT and DEDENT tokens, using a stack, as follows.
131 ;;
132 ;; Before the first line of the file is read, a single zero is pushed
133 ;; on the stack; this will never be popped off again. The numbers
134 ;; pushed on the stack will always be strictly increasing from bottom
135 ;; to top. At the beginning of each logical line, the line's
136 ;; indentation level is compared to the top of the stack. If it is
137 ;; equal, nothing happens. If it is larger, it is pushed on the stack,
138 ;; and one INDENT token is generated. If it is smaller, it must be one
139 ;; of the numbers occurring on the stack; all numbers on the stack
140 ;; that are larger are popped off, and for each number popped off a
141 ;; DEDENT token is generated. At the end of the file, a DEDENT token
142 ;; is generated for each number remaining on the stack that is larger
143 ;; than zero.
144 (defvar wisent-python-indent-stack)
145
146 (define-lex-analyzer wisent-python-lex-beginning-of-line
147 "Detect and create Python indentation tokens at beginning of line."
148 (and
149 (bolp) (not (wisent-python-implicit-line-joining-p))
150 (let ((last-indent (car wisent-python-indent-stack))
151 (last-pos (point))
152 (curr-indent (current-indentation)))
153 (skip-syntax-forward "-")
154 (cond
155 ;; Skip comments and blank lines. No change in indentation.
156 ((or (eolp) (looking-at semantic-lex-comment-regex))
157 (forward-comment (point-max))
158 (or (eobp) (beginning-of-line))
159 (setq semantic-lex-end-point (point))
160 ;; Loop lexer to handle the next line.
161 t)
162 ;; No change in indentation.
163 ((= curr-indent last-indent)
164 (setq semantic-lex-end-point (point))
165 ;; Try next analyzers.
166 nil)
167 ;; Indentation increased
168 ((> curr-indent last-indent)
169 (if (or (not semantic-lex-maximum-depth)
170 (< semantic-lex-current-depth semantic-lex-maximum-depth))
171 (progn
172 ;; Return an INDENT lexical token
173 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
174 (push curr-indent wisent-python-indent-stack)
175 (semantic-lex-push-token
176 (semantic-lex-token 'INDENT last-pos (point))))
177 ;; Add an INDENT_BLOCK token
178 (semantic-lex-push-token
179 (semantic-lex-token
180 'INDENT_BLOCK
181 (progn (beginning-of-line) (point))
182 (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK
183 (wisent-python-end-of-block)
184 (point)))))
185 ;; Loop lexer to handle tokens in current line.
186 t)
187 ;; Indentation decreased
188 (t
189 ;; Pop items from indentation stack
190 (while (< curr-indent last-indent)
191 (pop wisent-python-indent-stack)
192 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)
193 last-indent (car wisent-python-indent-stack))
194 (semantic-lex-push-token
195 (semantic-lex-token 'DEDENT last-pos (point))))
196 ;; If pos did not change, then we must return nil so that
197 ;; other lexical analyzers can be run.
198 (/= last-pos (point))))))
199 ;; All the work was done in the above analyzer matching condition.
200 )
201
202 (define-lex-regex-analyzer wisent-python-lex-end-of-line
203 "Detect and create Python newline tokens.
204 Just skip the newline character if the following line is an implicit
205 continuation of current line."
206 "\\(\n\\|\\s>\\)"
207 (if (wisent-python-implicit-line-joining-p)
208 (setq semantic-lex-end-point (match-end 0))
209 (semantic-lex-push-token
210 (semantic-lex-token 'NEWLINE (point) (match-end 0)))))
211
212 (define-lex-regex-analyzer wisent-python-lex-string
213 "Detect and create python string tokens."
214 wisent-python-string-re
215 (semantic-lex-push-token
216 (semantic-lex-token
217 'STRING_LITERAL
218 (point)
219 (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL
220 (wisent-python-forward-string)
221 (point)))))
222
223 (define-lex-regex-analyzer wisent-python-lex-ignore-backslash
224 "Detect and skip over backslash (explicit line joining) tokens.
225 A backslash must be the last token of a physical line, it is illegal
226 elsewhere on a line outside a string literal."
227 "\\s\\\\s-*$"
228 ;; Skip over the detected backslash and go to the first
229 ;; non-whitespace character in the next physical line.
230 (forward-line)
231 (skip-syntax-forward "-")
232 (setq semantic-lex-end-point (point)))
233
234 (define-lex wisent-python-lexer
235 "Lexical Analyzer for Python code."
236 ;; Must analyze beginning of line first to handle indentation.
237 wisent-python-lex-beginning-of-line
238 wisent-python-lex-end-of-line
239 ;; Must analyze string before symbol to handle string prefix.
240 wisent-python-lex-string
241 ;; Analyzers auto-generated from grammar.
242 wisent-python-wy--<number>-regexp-analyzer
243 wisent-python-wy--<keyword>-keyword-analyzer
244 wisent-python-wy--<symbol>-regexp-analyzer
245 wisent-python-wy--<block>-block-analyzer
246 wisent-python-wy--<punctuation>-string-analyzer
247 ;; Ignored things.
248 wisent-python-lex-ignore-backslash
249 semantic-lex-ignore-whitespace
250 semantic-lex-ignore-comments
251 ;; Signal error on unhandled syntax.
252 semantic-lex-default-action)
253 \f
254 ;;; Overridden Semantic API.
255 ;;
256 (define-mode-local-override semantic-lex python-mode
257 (start end &optional depth length)
258 "Lexically analyze Python code in current buffer.
259 See the function `semantic-lex' for the meaning of the START, END,
260 DEPTH and LENGTH arguments.
261 This function calls `wisent-python-lexer' to actually perform the
262 lexical analysis, then emits the necessary Python DEDENT tokens from
263 what remains in the `wisent-python-indent-stack'."
264 (let* ((wisent-python-indent-stack (list 0))
265 (stream (wisent-python-lexer start end depth length))
266 (semantic-lex-token-stream nil))
267 ;; Emit DEDENT tokens if something remains in the INDENT stack.
268 (while (> (pop wisent-python-indent-stack) 0)
269 (semantic-lex-push-token (semantic-lex-token 'DEDENT end end)))
270 (nconc stream (nreverse semantic-lex-token-stream))))
271
272 (define-mode-local-override semantic-get-local-variables python-mode ()
273 "Get the local variables based on point's context.
274 To be implemented for Python! For now just return nil."
275 nil)
276
277 (defcustom-mode-local-semantic-dependency-system-include-path
278 python-mode semantic-python-dependency-system-include-path
279 nil
280 "The system include path used by Python language.")
281
282 ;;; Enable Semantic in `python-mode'.
283 ;;
284
285 ;;;###autoload
286 (defun wisent-python-default-setup ()
287 "Setup buffer for parse."
288 (wisent-python-wy--install-parser)
289 (set (make-local-variable 'parse-sexp-ignore-comments) t)
290 (setq
291 ;; Character used to separation a parent/child relationship
292 semantic-type-relation-separator-character '(".")
293 semantic-command-separation-character ";"
294 ;; The following is no more necessary as semantic-lex is overridden
295 ;; in python-mode.
296 ;; semantic-lex-analyzer 'wisent-python-lexer
297
298 ;; Semantic to take over from the one provided by python.
299 ;; The python one, if it uses the senator advice, will hang
300 ;; Emacs unrecoverably.
301 imenu-create-index-function 'semantic-create-imenu-index
302
303 ;; I need a python guru to update this list:
304 semantic-symbol->name-assoc-list-for-type-parts '((variable . "Variables")
305 (function . "Methods"))
306 semantic-symbol->name-assoc-list '((type . "Classes")
307 (variable . "Variables")
308 (function . "Functions")
309 (include . "Imports")
310 (package . "Package")
311 (code . "Code")))
312 )
313
314 ;;;###autoload
315 (add-hook 'python-mode-hook 'wisent-python-default-setup)
316
317 ;; Make sure the newer python modes pull in the same python
318 ;; mode overrides.
319 (define-child-mode python-2-mode python-mode "Python 2 mode")
320 (define-child-mode python-3-mode python-mode "Python 3 mode")
321
322 \f
323 ;;; Test
324 ;;
325 (defun wisent-python-lex-buffer ()
326 "Run `wisent-python-lexer' on current buffer."
327 (interactive)
328 (semantic-lex-init)
329 (let ((token-stream (semantic-lex (point-min) (point-max) 0)))
330 (with-current-buffer (get-buffer-create "*wisent-python-lexer*")
331 (erase-buffer)
332 (pp token-stream (current-buffer))
333 (goto-char (point-min))
334 (pop-to-buffer (current-buffer)))))
335
336 (provide 'semantic/wisent/python)
337
338 ;; Local variables:
339 ;; generated-autoload-file: "../loaddefs.el"
340 ;; generated-autoload-load-name: "semantic/wisent/python"
341 ;; End:
342
343 ;;; semantic/wisent/python.el ends here