Mercurial > core / lisp/lib/obj/uri/parse.lisp
changeset 698: |
96958d3eb5b0 |
parent: |
78ef6145e272
|
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Fri, 04 Oct 2024 22:04:59 -0400 |
permissions: |
-rw-r--r-- |
description: |
fixes |
1 ;;; obj/uri/parse.lisp --- URI Parsers 8 (eval-when (:compile-toplevel :execute :load-toplevel) 9 ;; Generate the parser for URI or IRI. The only difference is the name 10 ;; of the parser and for IRIs the binding of %iri-mode to T. 11 (defmacro gen-xri-parser (name irip) 13 &aux ,@(when irip '((%iri-mode t))) 16 real-host ipv6 zone-id) 17 (declare (optimize (safety 0)) 20 (check-xri-string string) 22 (multiple-value-bind (i scheme userinfo host port path query fragment) 23 (state-absolute-uri string 0 end) 25 (if* (and host (consp host)) 26 then (setq real-host (first host)) 27 (setq ipv6 (second host)) 28 (setq zone-id (third host)) 29 else (setq real-host host)) 31 (setq port (val string port)) 32 (setq port (parse-integer port :radix 10))) 34 (values (val string scheme) 35 (val string real-host) 40 ;; This is only non-nil for URNs 44 (val string zone-id))))) 46 (multiple-value-bind (i scheme userinfo host port path query fragment) 47 (state-uri-reference string 0 end) 49 (if* (and host (consp host)) 50 then (setq real-host (first host)) 51 (setq ipv6 (second host)) 52 (setq zone-id (third host)) 53 else (setq real-host host)) 55 (setq port (val string port)) 56 (setq port (parse-integer port :radix 10))) 58 (values (val string scheme) 59 (val string real-host) 67 (val string zone-id))))) 69 (uri-parse-error string "Couldn't parse uri: ~s." string)))) 71 (defun uri-parse-error (string format-string &rest format-arguments) 72 (error 'uri-parse-error 74 :format-control format-string 75 :format-arguments format-arguments)) 77 (gen-xri-parser parse-uri-string-rfc3986 nil) 78 (gen-xri-parser parse-iri-string-rfc3987 :iri-mode) 80 ;; TODO fix string escapes 81 (defun parse-uri (thing &key (class 'uri) (escape t)) 82 ;; Parse THING into a URI object, an instance of CLASS. 84 ;; If ESCAPE is non-nil, then decode percent-encoded characters in places 85 ;; where they can legally appear, into the raw characters. The exception 86 ;; to this is when those characters are reserved for the component in 87 ;; which they appear, and in this case the percent-encoded character 90 (when (uri-p thing) (return-from parse-uri thing)) 92 (multiple-value-bind (scheme host userinfo port path query fragment 93 pct-encoded ipv6 zone-id) 94 (parse-uri-string-rfc3986 thing) 99 ;; Ordered from most common to least, and the set of known schemes 100 ;; hardwired for efficiency. 101 ((string-equal scheme "https") :https) 102 ((string-equal scheme "http") :http) 103 ((string-equal scheme "ftp") :ftp) 104 ((string-equal scheme "file") :file) 105 ((string-equal scheme "urn") :urn) 106 ((string-equal scheme "telnet") :telnet) 115 (load-time-value (find-package :keyword))))))) 117 (when (and scheme (eq :urn scheme)) 118 (return-from parse-uri 119 (make-instance 'urn :scheme scheme :nid host :nss path 120 :query query :fragment fragment 121 :r-component userinfo))) 123 (when (and escape host) 124 (setq host (percent-decode-string host *reg-name-bitvector*))) 125 (when (and escape userinfo) 126 (setq userinfo (percent-decode-string userinfo *userinfo-bitvector*))) 128 (when (not (numberp port)) (error "port is not a number: ~s." port)) 129 (when (not (plusp port)) 130 (error "port is not a positive integer: ~d." port)) 131 ;; Use `eql' instead of `=' so that scheme's other than the small set 132 ;; below are possible. 133 (when (eql port (case scheme 139 (when (= 0 (length path)) 141 (when (and escape path) 142 (setq path (percent-decode-string path *pchar-bitvector*))) 143 (when (and escape query) 145 (percent-decode-string query 147 then *decode-query-bitvector-strict* 148 else *decode-query-bitvector-non-strict*)))) 149 (when (and escape fragment) 151 (percent-decode-string fragment 153 then *fragment-bitvector-strict* 154 else *fragment-bitvector-non-strict*)))) 156 then ;; allow the compiler to optimize the make-instance call: 167 :escaped (when escape pct-encoded)) 168 else ;; do it the slow way: 177 :escaped (when escape pct-encoded))))) 179 (defmacro gen-string-to-xri (name parser class) 180 `(defun ,name (string) 181 ;; Parse STRING as a xRI and either signal an error if it cannot be 182 ;; parsed or return the xRI object. This function differs from 183 ;; parse-uri in that the query is not decoded. The knowledge of how 184 ;; to properly decode the query is outside the bounds of RFC 3986/7. 185 (multiple-value-bind (scheme host userinfo port path query fragment 186 pct-encoded ;; non-nil if any %xx in any slot 193 ;; Ordered from most common to least, and the set of known schemes 194 ;; hardwired for efficiency. 195 ((string-equal scheme "https") :https) 196 ((string-equal scheme "http") :http) 197 ((string-equal scheme "ftp") :ftp) 198 ((string-equal scheme "file") :file) 199 ((string-equal scheme "urn") :urn) 200 ((string-equal scheme "telnet") :telnet) 209 (load-time-value (find-package :keyword))))))) 211 (when (and scheme (eq :urn scheme)) 213 ;; NOTE: for now, we treat URNs like parse-uri, and do no 215 (make-instance 'urn :scheme scheme :nid host :nss path 216 :query query :fragment fragment 217 :r-component userinfo))) 219 (when (and pct-encoded host) 220 (setq host (percent-decode-string host *reg-name-bitvector*))) 222 (when (and pct-encoded userinfo) 223 (setq userinfo (percent-decode-string userinfo *userinfo-bitvector*))) 226 (when (not (numberp port)) (error "port is not a number: ~s." port)) 227 (when (not (plusp port)) 228 (error "port is not a positive integer: ~d." port)) 229 ;; Use `eql' instead of `=' so that scheme's other than the small set 230 ;; below are possible. 231 (when (eql port (case scheme 238 (when (= 0 (length path)) 240 (when (and pct-encoded path) 241 (setq path (percent-decode-string path *pchar-bitvector*))) 243 ;; query is left alone 245 (when (and pct-encoded fragment) 247 (percent-decode-string fragment 249 then *fragment-bitvector-strict* 250 else *fragment-bitvector-non-strict*)))) 252 (make-instance ,class 262 :escaped pct-encoded)))) 264 (gen-string-to-xri string-to-uri parse-uri-string-rfc3986 'uri) 265 (gen-string-to-xri string-to-iri parse-iri-string-rfc3987 'iri) 267 (defun parse-path (path-string escape) 268 (do* ((xpath-list (uiop:split-string path-string :separator '(#\/))) 270 (let (#+mswindows temp #+mswindows c) 271 (cond ((string= "" (car xpath-list)) 272 (setf (car xpath-list) :absolute)) 273 (t (push :relative xpath-list))) 275 (pl (cdr path-list) (cdr pl)) 277 ((null pl) path-list) 279 (if* (symbolp (car pl)) 280 then ;; Only happens on Windows when we see a path with a drive 281 ;; letter. The lack of #+mswindows doesn't matter here. 283 elseif (cdr (setq segments 284 (if* (string= "" (car pl)) 286 else (uiop:split-string (car pl) :separator '(#\:))))) 287 then ;; there is a param 289 (mapcar #'(lambda (s) 291 then (percent-decode-string s nil) 297 then (percent-decode-string (car segments) nil) 298 else (car segments))))))