changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/obj/uri/uri.lisp

changeset 698: 96958d3eb5b0
parent: 78ef6145e272
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; lib/obj/uri.lisp --- URIs -*- mode: common-lisp; -*-
2 
3 ;; URI/IRI/URN support based on Franz's URI support library for
4 ;; Allegro.
5 
6 ;; For general URI information see RFC 3986.
7 
8 ;; For general IRI information see RFC 3987.
9 
10 ;; For general URN information see RFC 8141.
11 
12 ;; For IPv6 changes see RFC 6874.
13 
14 ;; examples of URIs:
15 #|
16 ftp://ftp.is.co.za/rfc/rfc1808.txt
17 https://www.ietf.org/rfc/rfc2396.txt
18 ldap://[2001:db8::7]/c=GB?objectClass?one
19 mailto:John.Doe@example.com
20 news:comp.infosystems.www.servers.unix
21 tel:+1-816-555-1212
22 telnet://192.0.2.16:80/
23 urn:oasis:names:specification:docbook:dtd:xml:4.1.2
24 |#
25 
26 ;;; Code:
27 (in-package :obj/uri)
28 
29 ;; This does not persist past the end of compile-file
30 (eval-when (:compile-toplevel) (declaim (optimize (speed 3))))
31 
32 (eval-always
33  (defvar *strict-parse* t))
34 
35 
36 (defclass uri ()
37  (
38 ;;;; external:
39  ;; uri-host is computed and cached. See the hand-written method below.
40  ;; uri-ipv6 and uri-zone-id are read-only by users, so they are in the
41  ;; internal section below.
42 
43 ;;;; These slots are special: when they are changed, the string and
44 ;;;; hashcode slots need to be set to nil. For path, parsed-path also
45 ;;;; needs to be set to nil. See define-special-uri-slot-setters below.
46  (scheme :initarg :scheme :initform nil :accessor uri-scheme)
47  (userinfo :initarg :userinfo :initform nil :accessor uri-userinfo)
48  (port :initarg :port :initform nil :accessor uri-port)
49  (path :initarg :path :initform nil :accessor uri-path)
50  (query :initarg :query :initform nil :accessor uri-query)
51  (fragment :initarg :fragment :initform nil :accessor uri-fragment)
52 ;;;; ...end special slots.
53  (plist :initarg :plist :initform nil :accessor uri-plist)
54 ;;;; internal:
55  (%host ;; where part of the value for uri-host is stored
56  ;; The values stored here are for URIs with names or IPv4 addresses.
57  ;; IPv6 addresses are stored in the .ipv6 and .zone-id slots.
58  ;; NOTE:
59  ;; I'm conflicted over the fact that .host is both computed and NOT
60  ;; computed. It is computed for IPv6, but it holds the actual values
61  ;; from the parse for names or IPv4 addresses. It might be a tiny bit
62  ;; more clear to have a separate slot for the computed value, but
63  ;; would that extra clarity be worth the extra space at runtime?
64  :initarg :host :initform nil :accessor %uri-host)
65  (%ipv6 ;; the pure IPv6 portion of the uri-host, nil otherwise
66  ;; This value is the actual IPv6 address that would be suitable for use
67  ;; in networking functions. It does NOT include the zone-id or the
68  ;; URI [] syntax.
69  :initarg :ipv6 :initform nil :accessor %uri-ipv6)
70  (%zone-id ;; used if IPv6 has a zone ID
71  :initarg :zone-id :initform nil :accessor %uri-zone-id)
72  (escaped ;; non-nil if parsed input contained pct encoded characters
73  :initarg :escaped :initform nil :accessor uri-escaped)
74  (string ;; the cached printable representation of the URI
75  ;; It might be different than the original string, because of percent
76  ;; encoding. Use of slot setf methods may reset this slot to nil,
77  ;; causing it to be recomputed when needed.
78  :initarg :string :initform nil :accessor uri-string)
79  (parsed-path ;; the cached parsed representation of the URI path
80  :initarg :parsed-path
81  :initform nil
82  :accessor %uri-parsed-path)
83  (hashcode ;; cached sxhash, so we don't have to compute it more than once
84  :initarg :hashcode :initform nil :accessor uri-hashcode)))
85 
86  #+has-clos-fixed-index-feature (:metaclass fixed-index-class)
87 
88 ;;; IRI
89 ;; - The grammar for IRIs is identical to that of URIs, except the allowed
90 ;; character set for URIs is limited to ASCII, while IRIs characters can
91 ;; be from the sequence of characters from the Universal Character Set
92 ;; (Unicode/ISO 10646).
93 ;; - The actual grammar differences are:
94 ;; - `unreserved' is now `iunreserved', which adds the alternation case
95 ;; `ucschar' (see ucscharp below).
96 ;; - `query' is now `iquery', which adds the alternation case
97 ;; `iprivate' (see iprivatep below).
98 ;; - The IRI parser, string-to-iri, uses the URI parser, but it binds
99 ;; .iri-mode. to T, which changes how character validation is done. In
100 ;; IRI mode, ucscharp and iprivatep are used in the appropriate places.
101 ;;
102 ;; See the comments for make-char-bitvector for more details.
103 
104 (defclass iri (uri) ())
105 
106 (defvar %iri-mode
107  ;; Bound to T when we are parsing in IRI mode
108  nil)
109 
110 (defmethod uri-host ((uri uri))
111  ;; Return the computed host for URI. It is the value which could be used
112  ;; by networking functions or programs to perform communication with the
113  ;; resource designated by URI.
114  (let ((host (%uri-host uri))
115  ipv6 zone-id)
116  ;; If HOST has a value, then use that. Otherwise, if IPV6 has a value,
117  ;; then return the IPv6 address, which will include the zone-id, if
118  ;; non-nil. Otherwise, return nil.
119  (if* host
120  thenret
121  elseif (setq ipv6 (%uri-ipv6 uri))
122  then ;; This setf clears the cached printed value (string slot)
123  (setf (%uri-host uri)
124  (if* (setq zone-id (%uri-zone-id uri))
125  then (concatenate 'string ipv6 "%" zone-id)
126  else ipv6)))))
127 
128 ;; It is by design there are no public setf methods for these
129 (defmethod uri-ipv6 ((uri uri)) (%uri-ipv6 uri))
130 (defmethod uri-zone-id ((uri uri)) (%uri-zone-id uri))
131 
132 ;; The .HOST slot is computed, for IPv6, or the actual name or IPv4
133 ;; address. To ensure all three slots are kept consistent, define a
134 ;; function to set them.
135 (defun set-host (uri name-or-ipv4 ipv6 zone-id)
136  (when (and name-or-ipv4 ipv6)
137  (error "Both the IPv4/name and IPv6 values cannot be non-nil: ~s, ~s."
138  name-or-ipv4 ipv6))
139  (setf (%uri-host uri) name-or-ipv4
140  (%uri-ipv6 uri) ipv6
141  (%uri-zone-id uri) zone-id))
142 
143 (defmethod (setf uri-host) (v (uri uri))
144  (prog1
145  (if* (null v)
146  then (set-host uri nil nil nil)
147  elseif (stringp v)
148  then (multiple-value-bind (found whole ipv6 zone-id)
149  ;; This embodies knowledge of the URI IPv6 syntax
150  (cl-ppcre:scan "^(.*:.*?)(%.*)?$" v)
151  (declare (ignore whole))
152  (if* found
153  then (set-host uri nil ipv6 zone-id)
154  else (set-host uri v nil nil))
155  v)
156  else (error "host value must be a string: ~s." v))
157  ;; This slot doesn't use clear-computed-uri-slots, so we must do this
158  ;; manually:
159  (setf (uri-string uri) nil)
160  (setf (uri-hashcode uri) nil)))
161 
162 (defclass urn (uri)
163  ;; NOTE: the q-component is stored in the `query' slot and the
164  ;; f-component is stored in the `fragment' slot of the of the
165  ;; parent class (uri).
166  ;; The slots below have no place in the parent class.
167  ((nid :initarg :nid :initform nil :accessor urn-nid)
168  (nss :initarg :nss :initform nil :accessor urn-nss)
169  ;; q-component is stored in the `query'
170  ;; f-component is stored in the `fragment'
171  (r-component ;; ignored in comparisons
172  :initarg :r-component :initform nil :accessor urn-r-component)))
173 
174 #+has-clos-fixed-index-feature (:metaclass fixed-index-class)
175 
176 (defmethod make-load-form ((self uri) &optional env)
177  (declare (ignore env))
178  `(make-instance ',(class-name (class-of self))
179  :scheme ,(uri-scheme self)
180  :host ,(%uri-host self)
181  :ipv6 ,(%uri-ipv6 self)
182  :zone-id ,(%uri-zone-id self)
183  :userinfo ,(uri-userinfo self)
184  :port ,(uri-port self)
185  :path ',(uri-path self)
186  :query ,(uri-query self)
187  :fragment ,(uri-fragment self)
188  :plist ',(uri-plist self)
189  :string ,(uri-string self)
190  :parsed-path ',(%uri-parsed-path self)))
191 
192 (defmethod make-load-form ((self urn) &optional env)
193  (declare (ignore env))
194  `(make-instance ',(class-name (class-of self))
195  :scheme ,(uri-scheme self)
196  :host ,(%uri-host self)
197  :ipv6 ,(%uri-ipv6 self)
198  :zone-id ,(%uri-zone-id self)
199  :userinfo ,(uri-userinfo self)
200  :port ,(uri-port self)
201  :path ',(uri-path self)
202  :query ,(uri-query self) ; q-component
203  :fragment ,(uri-fragment self) ; f-component
204  :plist ',(uri-plist self)
205  :string ,(uri-string self)
206  :parsed-path ',(%uri-parsed-path self)
207  ;;; URN-specific:
208  :nid ,(urn-nid self)
209  :nss ,(urn-nss self)
210  :r-component ,(urn-r-component self)))
211 
212 (define-condition uri-parse-error (parse-error)
213  ((string :initarg :string :reader uri-parse-error-string)))
214 
215 (defmethod uri-p ((thing uri)) t)
216 (defmethod uri-p ((thing t)) nil)
217 
218 (defmethod iri-p ((thing iri)) t)
219 (defmethod iri-p ((thing t)) nil)
220 
221 (defun copy-uri (uri
222  &key place
223  (scheme (when uri (uri-scheme uri)))
224  (host (when uri (%uri-host uri)))
225  (ipv6 (when uri (%uri-ipv6 uri)))
226  (zone-id (when uri (%uri-zone-id uri)))
227  (userinfo (when uri (uri-userinfo uri)))
228  (port (when uri (uri-port uri)))
229  (path (when uri (uri-path uri)))
230  (parsed-path
231  (when uri (copy-list (%uri-parsed-path uri))))
232  (query (when uri (uri-query uri)))
233  (fragment (when uri (uri-fragment uri)))
234  (plist (when uri (copy-list (uri-plist uri))))
235  (class (when uri (class-of uri)))
236  &aux (escaped (when uri (uri-escaped uri))))
237  (if* place
238  then (setf (uri-scheme place) scheme)
239  (set-host place host ipv6 zone-id)
240  (setf (uri-userinfo place) userinfo)
241  (setf (uri-port place) port)
242  (setf (uri-path place) path)
243  (setf (%uri-parsed-path place) parsed-path)
244  (setf (uri-query place) query)
245  (setf (uri-fragment place) fragment)
246  (setf (uri-plist place) plist)
247  (setf (uri-escaped place) escaped)
248  (setf (uri-hashcode place) nil)
249  place
250  elseif (eq 'uri class)
251  then ;; allow the compiler to optimize the call to make-instance:
252  (make-instance 'uri
253  :scheme scheme :host host :ipv6 ipv6 :zone-id zone-id
254  :userinfo userinfo :port port
255  :path path :parsed-path parsed-path
256  :query query :fragment fragment :plist plist
257  :escaped escaped :string nil :hashcode nil)
258  else (make-instance class
259  :scheme scheme :host host :ipv6 ipv6 :zone-id zone-id
260  :userinfo userinfo :port port
261  :path path :parsed-path parsed-path
262  :query query :fragment fragment :plist plist
263  :escaped escaped :string nil :hashcode nil)))
264 
265 (defmethod uri-parsed-path ((uri uri))
266  (let ((p (uri-path uri)))
267  (when p
268  (if* (%uri-parsed-path uri)
269  thenret
270  else (setf (%uri-parsed-path uri)
271  (parse-path (uri-path uri) (uri-escaped uri)))))))
272 
273 (defmethod (setf uri-parsed-path) (path-list (uri uri))
274  (if* (null path-list)
275  then (setf (uri-path uri) nil)
276  (setf (%uri-parsed-path uri) nil)
277  path-list
278  else (when (not (and (consp path-list)
279  (or (member (car path-list) '(:absolute :relative)
280  :test #'eq))))
281  (error "internal error: path-list is ~s." path-list))
282  (setf (uri-path uri) (render-parsed-path path-list t))
283  (setf (%uri-parsed-path uri) path-list)
284  path-list))
285 
286 (defun uri-authority (uri)
287  (when (uri-host uri)
288  (let ((*print-pretty* nil))
289  (format nil "~@[~a@~]~a~@[:~a~]" (uri-userinfo uri)
290  (uri-host uri) (uri-port uri)))))
291 
292 (defun uri-nid (uri)
293  (if* (equalp "urn" (uri-scheme uri))
294  then ;; Intentionally did not use .uri-host:
295  (uri-host uri)
296  else (error "URI is not a URN: ~s." uri)))
297 
298 (defun uri-nss (uri)
299  (if* (equalp "urn" (uri-scheme uri))
300  then (uri-path uri)
301  else (error "URI is not a URN: ~s." uri)))
302 
303 (defmethod urn-q-component ((urn urn)) (uri-query urn))
304 (defmethod urn-f-component ((urn urn)) (uri-fragment urn))
305 
306 (defmethod uri ((thing uri)) thing)
307 (defmethod uri ((thing string)) (parse-uri thing))
308 (defmethod uri ((thing t)) (error "Cannot coerce ~s to a uri." thing))