]> code.delx.au - gnu-emacs/blob - lisp/url/url-parse.el
(ido-enable-prefix): Improve previous doc fix.
[gnu-emacs] / lisp / url / url-parse.el
1 ;;; url-parse.el --- Uniform Resource Locator parser
2
3 ;; Copyright (C) 1996, 1997, 1998, 1999, 2004,
4 ;; 2005, 2006 Free Software Foundation, Inc.
5
6 ;; Keywords: comm, data, processes
7
8 ;; This file is part of GNU Emacs.
9 ;;
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14 ;;
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19 ;;
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
24
25 ;;; Commentary:
26
27 ;;; Code:
28
29 (require 'url-vars)
30
31 (autoload 'url-scheme-get-property "url-methods")
32
33 (defmacro url-type (urlobj)
34 `(aref ,urlobj 0))
35
36 (defmacro url-user (urlobj)
37 `(aref ,urlobj 1))
38
39 (defmacro url-password (urlobj)
40 `(aref ,urlobj 2))
41
42 (defmacro url-host (urlobj)
43 `(aref ,urlobj 3))
44
45 (defmacro url-port (urlobj)
46 `(or (aref ,urlobj 4)
47 (if (url-fullness ,urlobj)
48 (url-scheme-get-property (url-type ,urlobj) 'default-port))))
49
50 (defmacro url-filename (urlobj)
51 `(aref ,urlobj 5))
52
53 (defmacro url-target (urlobj)
54 `(aref ,urlobj 6))
55
56 (defmacro url-attributes (urlobj)
57 `(aref ,urlobj 7))
58
59 (defmacro url-fullness (urlobj)
60 `(aref ,urlobj 8))
61
62 (defmacro url-set-type (urlobj type)
63 `(aset ,urlobj 0 ,type))
64
65 (defmacro url-set-user (urlobj user)
66 `(aset ,urlobj 1 ,user))
67
68 (defmacro url-set-password (urlobj pass)
69 `(aset ,urlobj 2 ,pass))
70
71 (defmacro url-set-host (urlobj host)
72 `(aset ,urlobj 3 ,host))
73
74 (defmacro url-set-port (urlobj port)
75 `(aset ,urlobj 4 ,port))
76
77 (defmacro url-set-filename (urlobj file)
78 `(aset ,urlobj 5 ,file))
79
80 (defmacro url-set-target (urlobj targ)
81 `(aset ,urlobj 6 ,targ))
82
83 (defmacro url-set-attributes (urlobj targ)
84 `(aset ,urlobj 7 ,targ))
85
86 (defmacro url-set-full (urlobj val)
87 `(aset ,urlobj 8 ,val))
88
89 ;;;###autoload
90 (defun url-recreate-url (urlobj)
91 "Recreate a URL string from the parsed URLOBJ."
92 (concat (url-type urlobj) ":" (if (url-host urlobj) "//" "")
93 (if (url-user urlobj)
94 (concat (url-user urlobj)
95 (if (url-password urlobj)
96 (concat ":" (url-password urlobj)))
97 "@"))
98 (url-host urlobj)
99 (if (and (url-port urlobj)
100 (not (equal (url-port urlobj)
101 (url-scheme-get-property (url-type urlobj) 'default-port))))
102 (format ":%d" (url-port urlobj)))
103 (or (url-filename urlobj) "/")
104 (url-recreate-url-attributes urlobj)
105 (if (url-target urlobj)
106 (concat "#" (url-target urlobj)))))
107
108 (defun url-recreate-url-attributes (urlobj)
109 "Recreate the attributes of an URL string from the parsed URLOBJ."
110 (when (url-attributes urlobj)
111 (concat "?"
112 (mapconcat (lambda (x)
113 (if (cdr x)
114 (concat (car x) "=" (cdr x))
115 (car x)))
116 (url-attributes urlobj) ";"))))
117
118 ;;;###autoload
119 (defun url-generic-parse-url (url)
120 "Return a vector of the parts of URL.
121 Format is:
122 \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]"
123 ;; See RFC 3986.
124 (cond
125 ((null url)
126 (make-vector 9 nil))
127 ((or (not (string-match url-nonrelative-link url))
128 (= ?/ (string-to-char url)))
129 ;; This isn't correct, as a relative URL can be a fragment link
130 ;; (e.g. "#foo") and many other things (see section 4.2).
131 ;; However, let's not fix something that isn't broken, especially
132 ;; when close to a release.
133 (let ((retval (make-vector 9 nil)))
134 (url-set-filename retval url)
135 (url-set-full retval nil)
136 retval))
137 (t
138 (save-excursion
139 (set-buffer (get-buffer-create " *urlparse*"))
140 (set-syntax-table url-parse-syntax-table)
141 (let ((save-pos nil)
142 (prot nil)
143 (user nil)
144 (pass nil)
145 (host nil)
146 (port nil)
147 (file nil)
148 (refs nil)
149 (attr nil)
150 (full nil)
151 (inhibit-read-only t))
152 (erase-buffer)
153 (insert url)
154 (goto-char (point-min))
155 (setq save-pos (point))
156
157 ;; 3.1. Scheme
158 (if (not (looking-at "//"))
159 (progn
160 (skip-chars-forward "a-zA-Z+.\\-")
161 (downcase-region save-pos (point))
162 (setq prot (buffer-substring save-pos (point)))
163 (skip-chars-forward ":")
164 (setq save-pos (point))))
165
166 ;; 3.2. Authority
167 (if (looking-at "//")
168 (progn
169 (setq full t)
170 (forward-char 2)
171 (setq save-pos (point))
172 (skip-chars-forward "^/\\?#")
173 (setq host (buffer-substring save-pos (point)))
174 (if (string-match "^\\([^@]+\\)@" host)
175 (setq user (match-string 1 host)
176 host (substring host (match-end 0) nil)))
177 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user))
178 (setq pass (match-string 2 user)
179 user (match-string 1 user)))
180 ;; This gives wrong results for IPv6 literal addresses.
181 (if (string-match ":\\([0-9+]+\\)" host)
182 (setq port (string-to-number (match-string 1 host))
183 host (substring host 0 (match-beginning 0))))
184 (if (string-match ":$" host)
185 (setq host (substring host 0 (match-beginning 0))))
186 (setq host (downcase host)
187 save-pos (point))))
188
189 (if (not port)
190 (setq port (url-scheme-get-property prot 'default-port)))
191
192 ;; 3.3. Path
193 (setq save-pos (point))
194 (skip-chars-forward "^#?")
195 (setq file (buffer-substring save-pos (point)))
196
197 ;; 3.4. Query
198 (when (looking-at "\\?")
199 (forward-char 1)
200 (setq save-pos (point))
201 (skip-chars-forward "^#")
202 ;; RFC 3986 specifies no general way of parsing the query
203 ;; string, but `url-parse-args' seems universal enough.
204 (setq attr (url-parse-args (buffer-substring save-pos (point)) t)
205 attr (nreverse attr)))
206
207 ;; 3.5. Fragment
208 (when (looking-at "#")
209 (forward-char 1)
210 (setq refs (buffer-substring (point) (point-max))))
211
212 (if (and host (string-match "%[0-9][0-9]" host))
213 (setq host (url-unhex-string host)))
214 (vector prot user pass host port file refs attr full))))))
215
216 (provide 'url-parse)
217
218 ;; arch-tag: f338325f-71ab-4bee-93cc-78fb9a03d403
219 ;;; url-parse.el ends here