changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/obj/uri/parse.lisp

changeset 698: 96958d3eb5b0
parent: 78ef6145e272
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; obj/uri/parse.lisp --- URI Parsers
2 
3 ;;
4 
5 ;;; Code:
6 (in-package :obj/uri)
7 
8 (eval-when (:compile-toplevel :execute :load-toplevel)
9  ;; Generate the parser for URI or IRI. The only difference is the name
10  ;; of the parser and for IRIs the binding of %iri-mode to T.
11  (defmacro gen-xri-parser (name irip)
12  `(defun ,name (string
13  &aux ,@(when irip '((%iri-mode t)))
14  (end (length string))
15  (.pct-encoded. nil)
16  real-host ipv6 zone-id)
17  (declare (optimize (safety 0))
18  (fixnum end))
19 
20  (check-xri-string string)
21 
22  (multiple-value-bind (i scheme userinfo host port path query fragment)
23  (state-absolute-uri string 0 end)
24  (when i
25  (if* (and host (consp host))
26  then (setq real-host (first host))
27  (setq ipv6 (second host))
28  (setq zone-id (third host))
29  else (setq real-host host))
30  (when port
31  (setq port (val string port))
32  (setq port (parse-integer port :radix 10)))
33  (return-from ,name
34  (values (val string scheme)
35  (val string real-host)
36  (val string userinfo)
37  port
38  (val string path)
39  (val string query)
40  ;; This is only non-nil for URNs
41  (val string fragment)
42  .pct-encoded.
43  (val string ipv6)
44  (val string zone-id)))))
45 
46  (multiple-value-bind (i scheme userinfo host port path query fragment)
47  (state-uri-reference string 0 end)
48  (when i
49  (if* (and host (consp host))
50  then (setq real-host (first host))
51  (setq ipv6 (second host))
52  (setq zone-id (third host))
53  else (setq real-host host))
54  (when port
55  (setq port (val string port))
56  (setq port (parse-integer port :radix 10)))
57  (return-from ,name
58  (values (val string scheme)
59  (val string real-host)
60  (val string userinfo)
61  port
62  (val string path)
63  (val string query)
64  (val string fragment)
65  .pct-encoded.
66  (val string ipv6)
67  (val string zone-id)))))
68 
69  (uri-parse-error string "Couldn't parse uri: ~s." string))))
70 
71 (defun uri-parse-error (string format-string &rest format-arguments)
72  (error 'uri-parse-error
73  :string string
74  :format-control format-string
75  :format-arguments format-arguments))
76 
77 (gen-xri-parser parse-uri-string-rfc3986 nil)
78 (gen-xri-parser parse-iri-string-rfc3987 :iri-mode)
79 
80 ;; TODO fix string escapes
81 (defun parse-uri (thing &key (class 'uri) (escape t))
82  ;; Parse THING into a URI object, an instance of CLASS.
83  ;;
84  ;; If ESCAPE is non-nil, then decode percent-encoded characters in places
85  ;; where they can legally appear, into the raw characters. The exception
86  ;; to this is when those characters are reserved for the component in
87  ;; which they appear, and in this case the percent-encoded character
88  ;; stays encoded.
89 
90  (when (uri-p thing) (return-from parse-uri thing))
91 
92  (multiple-value-bind (scheme host userinfo port path query fragment
93  pct-encoded ipv6 zone-id)
94  (parse-uri-string-rfc3986 thing)
95 
96  (when scheme
97  (setq scheme
98  (cond
99  ;; Ordered from most common to least, and the set of known schemes
100  ;; hardwired for efficiency.
101  ((string-equal scheme "https") :https)
102  ((string-equal scheme "http") :http)
103  ((string-equal scheme "ftp") :ftp)
104  ((string-equal scheme "file") :file)
105  ((string-equal scheme "urn") :urn)
106  ((string-equal scheme "telnet") :telnet)
107  (t
108  (intern (funcall
109  (case *print-case*
110  ((:upcase)
111  #'string-upcase)
112  ((:downcase)
113  #'string-downcase))
114  scheme)
115  (load-time-value (find-package :keyword)))))))
116 
117  (when (and scheme (eq :urn scheme))
118  (return-from parse-uri
119  (make-instance 'urn :scheme scheme :nid host :nss path
120  :query query :fragment fragment
121  :r-component userinfo)))
122 
123  (when (and escape host)
124  (setq host (percent-decode-string host *reg-name-bitvector*)))
125  (when (and escape userinfo)
126  (setq userinfo (percent-decode-string userinfo *userinfo-bitvector*)))
127  (when port
128  (when (not (numberp port)) (error "port is not a number: ~s." port))
129  (when (not (plusp port))
130  (error "port is not a positive integer: ~d." port))
131  ;; Use `eql' instead of `=' so that scheme's other than the small set
132  ;; below are possible.
133  (when (eql port (case scheme
134  (:http 80)
135  (:https 443)
136  (:ftp 21)
137  (:telnet 23)))
138  (setq port nil)))
139  (when (= 0 (length path))
140  (setq path nil))
141  (when (and escape path)
142  (setq path (percent-decode-string path *pchar-bitvector*)))
143  (when (and escape query)
144  (setq query
145  (percent-decode-string query
146  (if* *strict-parse*
147  then *decode-query-bitvector-strict*
148  else *decode-query-bitvector-non-strict*))))
149  (when (and escape fragment)
150  (setq fragment
151  (percent-decode-string fragment
152  (if* *strict-parse*
153  then *fragment-bitvector-strict*
154  else *fragment-bitvector-non-strict*))))
155  (if* (eq 'uri class)
156  then ;; allow the compiler to optimize the make-instance call:
157  (make-instance 'uri
158  :scheme scheme
159  :host host
160  :ipv6 ipv6
161  :zone-id zone-id
162  :userinfo userinfo
163  :port port
164  :path path
165  :query query
166  :fragment fragment
167  :escaped (when escape pct-encoded))
168  else ;; do it the slow way:
169  (make-instance class
170  :scheme scheme
171  :host host
172  :userinfo userinfo
173  :port port
174  :path path
175  :query query
176  :fragment fragment
177  :escaped (when escape pct-encoded)))))
178 
179  (defmacro gen-string-to-xri (name parser class)
180  `(defun ,name (string)
181  ;; Parse STRING as a xRI and either signal an error if it cannot be
182  ;; parsed or return the xRI object. This function differs from
183  ;; parse-uri in that the query is not decoded. The knowledge of how
184  ;; to properly decode the query is outside the bounds of RFC 3986/7.
185  (multiple-value-bind (scheme host userinfo port path query fragment
186  pct-encoded ;; non-nil if any %xx in any slot
187  ipv6 zone-id)
188  (,parser string)
189 
190  (when scheme
191  (setq scheme
192  (cond
193  ;; Ordered from most common to least, and the set of known schemes
194  ;; hardwired for efficiency.
195  ((string-equal scheme "https") :https)
196  ((string-equal scheme "http") :http)
197  ((string-equal scheme "ftp") :ftp)
198  ((string-equal scheme "file") :file)
199  ((string-equal scheme "urn") :urn)
200  ((string-equal scheme "telnet") :telnet)
201  (t
202  (intern (funcall
203  (case *print-case*
204  ((:upcase)
205  #'string-upcase)
206  ((:downcase)
207  #'string-downcase))
208  scheme)
209  (load-time-value (find-package :keyword)))))))
210 
211  (when (and scheme (eq :urn scheme))
212  (return-from ,name
213  ;; NOTE: for now, we treat URNs like parse-uri, and do no
214  ;; decoding.
215  (make-instance 'urn :scheme scheme :nid host :nss path
216  :query query :fragment fragment
217  :r-component userinfo)))
218 
219  (when (and pct-encoded host)
220  (setq host (percent-decode-string host *reg-name-bitvector*)))
221 
222  (when (and pct-encoded userinfo)
223  (setq userinfo (percent-decode-string userinfo *userinfo-bitvector*)))
224 
225  (when port
226  (when (not (numberp port)) (error "port is not a number: ~s." port))
227  (when (not (plusp port))
228  (error "port is not a positive integer: ~d." port))
229  ;; Use `eql' instead of `=' so that scheme's other than the small set
230  ;; below are possible.
231  (when (eql port (case scheme
232  (:http 80)
233  (:https 443)
234  (:ftp 21)
235  (:telnet 23)))
236  (setq port nil)))
237 
238  (when (= 0 (length path))
239  (setq path nil))
240  (when (and pct-encoded path)
241  (setq path (percent-decode-string path *pchar-bitvector*)))
242 
243  ;; query is left alone
244 
245  (when (and pct-encoded fragment)
246  (setq fragment
247  (percent-decode-string fragment
248  (if* *strict-parse*
249  then *fragment-bitvector-strict*
250  else *fragment-bitvector-non-strict*))))
251 
252  (make-instance ,class
253  :scheme scheme
254  :host host
255  :ipv6 ipv6
256  :zone-id zone-id
257  :userinfo userinfo
258  :port port
259  :path path
260  :query query
261  :fragment fragment
262  :escaped pct-encoded))))
263 
264 (gen-string-to-xri string-to-uri parse-uri-string-rfc3986 'uri)
265 (gen-string-to-xri string-to-iri parse-iri-string-rfc3987 'iri)
266 
267 (defun parse-path (path-string escape)
268  (do* ((xpath-list (uiop:split-string path-string :separator '(#\/)))
269  (path-list
270  (let (#+mswindows temp #+mswindows c)
271  (cond ((string= "" (car xpath-list))
272  (setf (car xpath-list) :absolute))
273  (t (push :relative xpath-list)))
274  xpath-list))
275  (pl (cdr path-list) (cdr pl))
276  segments)
277  ((null pl) path-list)
278 
279  (if* (symbolp (car pl))
280  then ;; Only happens on Windows when we see a path with a drive
281  ;; letter. The lack of #+mswindows doesn't matter here.
282  nil
283  elseif (cdr (setq segments
284  (if* (string= "" (car pl))
285  then '("")
286  else (uiop:split-string (car pl) :separator '(#\:)))))
287  then ;; there is a param
288  (setf (car pl)
289  (mapcar #'(lambda (s)
290  (if* escape
291  then (percent-decode-string s nil)
292  else s))
293  segments))
294  else ;; no param
295  (setf (car pl)
296  (if* escape
297  then (percent-decode-string (car segments) nil)
298  else (car segments))))))