Mercurial > core / lisp/lib/obj/uri/uri.lisp
changeset 698: |
96958d3eb5b0 |
parent: |
78ef6145e272
|
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Fri, 04 Oct 2024 22:04:59 -0400 |
permissions: |
-rw-r--r-- |
description: |
fixes |
1 ;;; lib/obj/uri.lisp --- URIs -*- mode: common-lisp; -*- 3 ;; URI/IRI/URN support based on Franz's URI support library for 6 ;; For general URI information see RFC 3986. 8 ;; For general IRI information see RFC 3987. 10 ;; For general URN information see RFC 8141. 12 ;; For IPv6 changes see RFC 6874. 16 ftp://ftp.is.co.za/rfc/rfc1808.txt 17 https://www.ietf.org/rfc/rfc2396.txt 18 ldap://[2001:db8::7]/c=GB?objectClass?one 19 mailto:John.Doe@example.com 20 news:comp.infosystems.www.servers.unix 22 telnet://192.0.2.16:80/ 23 urn:oasis:names:specification:docbook:dtd:xml:4.1.2 29 ;; This does not persist past the end of compile-file 30 (eval-when (:compile-toplevel) (declaim (optimize (speed 3)))) 33 (defvar *strict-parse* t)) 39 ;; uri-host is computed and cached. See the hand-written method below. 40 ;; uri-ipv6 and uri-zone-id are read-only by users, so they are in the 41 ;; internal section below. 43 ;;;; These slots are special: when they are changed, the string and 44 ;;;; hashcode slots need to be set to nil. For path, parsed-path also 45 ;;;; needs to be set to nil. See define-special-uri-slot-setters below. 46 (scheme :initarg :scheme :initform nil :accessor uri-scheme) 47 (userinfo :initarg :userinfo :initform nil :accessor uri-userinfo) 48 (port :initarg :port :initform nil :accessor uri-port) 49 (path :initarg :path :initform nil :accessor uri-path) 50 (query :initarg :query :initform nil :accessor uri-query) 51 (fragment :initarg :fragment :initform nil :accessor uri-fragment) 52 ;;;; ...end special slots. 53 (plist :initarg :plist :initform nil :accessor uri-plist) 55 (%host ;; where part of the value for uri-host is stored 56 ;; The values stored here are for URIs with names or IPv4 addresses. 57 ;; IPv6 addresses are stored in the .ipv6 and .zone-id slots. 59 ;; I'm conflicted over the fact that .host is both computed and NOT 60 ;; computed. It is computed for IPv6, but it holds the actual values 61 ;; from the parse for names or IPv4 addresses. It might be a tiny bit 62 ;; more clear to have a separate slot for the computed value, but 63 ;; would that extra clarity be worth the extra space at runtime? 64 :initarg :host :initform nil :accessor %uri-host) 65 (%ipv6 ;; the pure IPv6 portion of the uri-host, nil otherwise 66 ;; This value is the actual IPv6 address that would be suitable for use 67 ;; in networking functions. It does NOT include the zone-id or the 69 :initarg :ipv6 :initform nil :accessor %uri-ipv6) 70 (%zone-id ;; used if IPv6 has a zone ID 71 :initarg :zone-id :initform nil :accessor %uri-zone-id) 72 (escaped ;; non-nil if parsed input contained pct encoded characters 73 :initarg :escaped :initform nil :accessor uri-escaped) 74 (string ;; the cached printable representation of the URI 75 ;; It might be different than the original string, because of percent 76 ;; encoding. Use of slot setf methods may reset this slot to nil, 77 ;; causing it to be recomputed when needed. 78 :initarg :string :initform nil :accessor uri-string) 79 (parsed-path ;; the cached parsed representation of the URI path 82 :accessor %uri-parsed-path) 83 (hashcode ;; cached sxhash, so we don't have to compute it more than once 84 :initarg :hashcode :initform nil :accessor uri-hashcode))) 86 #+has-clos-fixed-index-feature (:metaclass fixed-index-class) 89 ;; - The grammar for IRIs is identical to that of URIs, except the allowed 90 ;; character set for URIs is limited to ASCII, while IRIs characters can 91 ;; be from the sequence of characters from the Universal Character Set 92 ;; (Unicode/ISO 10646). 93 ;; - The actual grammar differences are: 94 ;; - `unreserved' is now `iunreserved', which adds the alternation case 95 ;; `ucschar' (see ucscharp below). 96 ;; - `query' is now `iquery', which adds the alternation case 97 ;; `iprivate' (see iprivatep below). 98 ;; - The IRI parser, string-to-iri, uses the URI parser, but it binds 99 ;; .iri-mode. to T, which changes how character validation is done. In 100 ;; IRI mode, ucscharp and iprivatep are used in the appropriate places. 102 ;; See the comments for make-char-bitvector for more details. 104 (defclass iri (uri) ()) 107 ;; Bound to T when we are parsing in IRI mode 110 (defmethod uri-host ((uri uri)) 111 ;; Return the computed host for URI. It is the value which could be used 112 ;; by networking functions or programs to perform communication with the 113 ;; resource designated by URI. 114 (let ((host (%uri-host uri)) 116 ;; If HOST has a value, then use that. Otherwise, if IPV6 has a value, 117 ;; then return the IPv6 address, which will include the zone-id, if 118 ;; non-nil. Otherwise, return nil. 121 elseif (setq ipv6 (%uri-ipv6 uri)) 122 then ;; This setf clears the cached printed value (string slot) 123 (setf (%uri-host uri) 124 (if* (setq zone-id (%uri-zone-id uri)) 125 then (concatenate 'string ipv6 "%" zone-id) 128 ;; It is by design there are no public setf methods for these 129 (defmethod uri-ipv6 ((uri uri)) (%uri-ipv6 uri)) 130 (defmethod uri-zone-id ((uri uri)) (%uri-zone-id uri)) 132 ;; The .HOST slot is computed, for IPv6, or the actual name or IPv4 133 ;; address. To ensure all three slots are kept consistent, define a 134 ;; function to set them. 135 (defun set-host (uri name-or-ipv4 ipv6 zone-id) 136 (when (and name-or-ipv4 ipv6) 137 (error "Both the IPv4/name and IPv6 values cannot be non-nil: ~s, ~s." 139 (setf (%uri-host uri) name-or-ipv4 141 (%uri-zone-id uri) zone-id)) 143 (defmethod (setf uri-host) (v (uri uri)) 146 then (set-host uri nil nil nil) 148 then (multiple-value-bind (found whole ipv6 zone-id) 149 ;; This embodies knowledge of the URI IPv6 syntax 150 (cl-ppcre:scan "^(.*:.*?)(%.*)?$" v) 151 (declare (ignore whole)) 153 then (set-host uri nil ipv6 zone-id) 154 else (set-host uri v nil nil)) 156 else (error "host value must be a string: ~s." v)) 157 ;; This slot doesn't use clear-computed-uri-slots, so we must do this 159 (setf (uri-string uri) nil) 160 (setf (uri-hashcode uri) nil))) 163 ;; NOTE: the q-component is stored in the `query' slot and the 164 ;; f-component is stored in the `fragment' slot of the of the 165 ;; parent class (uri). 166 ;; The slots below have no place in the parent class. 167 ((nid :initarg :nid :initform nil :accessor urn-nid) 168 (nss :initarg :nss :initform nil :accessor urn-nss) 169 ;; q-component is stored in the `query' 170 ;; f-component is stored in the `fragment' 171 (r-component ;; ignored in comparisons 172 :initarg :r-component :initform nil :accessor urn-r-component))) 174 #+has-clos-fixed-index-feature (:metaclass fixed-index-class) 176 (defmethod make-load-form ((self uri) &optional env) 177 (declare (ignore env)) 178 `(make-instance ',(class-name (class-of self)) 179 :scheme ,(uri-scheme self) 180 :host ,(%uri-host self) 181 :ipv6 ,(%uri-ipv6 self) 182 :zone-id ,(%uri-zone-id self) 183 :userinfo ,(uri-userinfo self) 184 :port ,(uri-port self) 185 :path ',(uri-path self) 186 :query ,(uri-query self) 187 :fragment ,(uri-fragment self) 188 :plist ',(uri-plist self) 189 :string ,(uri-string self) 190 :parsed-path ',(%uri-parsed-path self))) 192 (defmethod make-load-form ((self urn) &optional env) 193 (declare (ignore env)) 194 `(make-instance ',(class-name (class-of self)) 195 :scheme ,(uri-scheme self) 196 :host ,(%uri-host self) 197 :ipv6 ,(%uri-ipv6 self) 198 :zone-id ,(%uri-zone-id self) 199 :userinfo ,(uri-userinfo self) 200 :port ,(uri-port self) 201 :path ',(uri-path self) 202 :query ,(uri-query self) ; q-component 203 :fragment ,(uri-fragment self) ; f-component 204 :plist ',(uri-plist self) 205 :string ,(uri-string self) 206 :parsed-path ',(%uri-parsed-path self) 210 :r-component ,(urn-r-component self))) 212 (define-condition uri-parse-error (parse-error) 213 ((string :initarg :string :reader uri-parse-error-string))) 215 (defmethod uri-p ((thing uri)) t) 216 (defmethod uri-p ((thing t)) nil) 218 (defmethod iri-p ((thing iri)) t) 219 (defmethod iri-p ((thing t)) nil) 223 (scheme (when uri (uri-scheme uri))) 224 (host (when uri (%uri-host uri))) 225 (ipv6 (when uri (%uri-ipv6 uri))) 226 (zone-id (when uri (%uri-zone-id uri))) 227 (userinfo (when uri (uri-userinfo uri))) 228 (port (when uri (uri-port uri))) 229 (path (when uri (uri-path uri))) 231 (when uri (copy-list (%uri-parsed-path uri)))) 232 (query (when uri (uri-query uri))) 233 (fragment (when uri (uri-fragment uri))) 234 (plist (when uri (copy-list (uri-plist uri)))) 235 (class (when uri (class-of uri))) 236 &aux (escaped (when uri (uri-escaped uri)))) 238 then (setf (uri-scheme place) scheme) 239 (set-host place host ipv6 zone-id) 240 (setf (uri-userinfo place) userinfo) 241 (setf (uri-port place) port) 242 (setf (uri-path place) path) 243 (setf (%uri-parsed-path place) parsed-path) 244 (setf (uri-query place) query) 245 (setf (uri-fragment place) fragment) 246 (setf (uri-plist place) plist) 247 (setf (uri-escaped place) escaped) 248 (setf (uri-hashcode place) nil) 250 elseif (eq 'uri class) 251 then ;; allow the compiler to optimize the call to make-instance: 253 :scheme scheme :host host :ipv6 ipv6 :zone-id zone-id 254 :userinfo userinfo :port port 255 :path path :parsed-path parsed-path 256 :query query :fragment fragment :plist plist 257 :escaped escaped :string nil :hashcode nil) 258 else (make-instance class 259 :scheme scheme :host host :ipv6 ipv6 :zone-id zone-id 260 :userinfo userinfo :port port 261 :path path :parsed-path parsed-path 262 :query query :fragment fragment :plist plist 263 :escaped escaped :string nil :hashcode nil))) 265 (defmethod uri-parsed-path ((uri uri)) 266 (let ((p (uri-path uri))) 268 (if* (%uri-parsed-path uri) 270 else (setf (%uri-parsed-path uri) 271 (parse-path (uri-path uri) (uri-escaped uri))))))) 273 (defmethod (setf uri-parsed-path) (path-list (uri uri)) 274 (if* (null path-list) 275 then (setf (uri-path uri) nil) 276 (setf (%uri-parsed-path uri) nil) 278 else (when (not (and (consp path-list) 279 (or (member (car path-list) '(:absolute :relative) 281 (error "internal error: path-list is ~s." path-list)) 282 (setf (uri-path uri) (render-parsed-path path-list t)) 283 (setf (%uri-parsed-path uri) path-list) 286 (defun uri-authority (uri) 288 (let ((*print-pretty* nil)) 289 (format nil "~@[~a@~]~a~@[:~a~]" (uri-userinfo uri) 290 (uri-host uri) (uri-port uri))))) 293 (if* (equalp "urn" (uri-scheme uri)) 294 then ;; Intentionally did not use .uri-host: 296 else (error "URI is not a URN: ~s." uri))) 299 (if* (equalp "urn" (uri-scheme uri)) 301 else (error "URI is not a URN: ~s." uri))) 303 (defmethod urn-q-component ((urn urn)) (uri-query urn)) 304 (defmethod urn-f-component ((urn urn)) (uri-fragment urn)) 306 (defmethod uri ((thing uri)) thing) 307 (defmethod uri ((thing string)) (parse-uri thing)) 308 (defmethod uri ((thing t)) (error "Cannot coerce ~s to a uri." thing))