changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/obj/uri/domain.lisp

changeset 698: 96958d3eb5b0
parent: 49c3f3d11432
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; obj/uri/domain.lisp --- URI Domains
2 
3 ;;
4 
5 ;;; Code:
6 (in-package :obj/uri)
7 
8 (eval-when (:compile-toplevel :load-toplevel :execute)
9  (defparameter *default-etld-names*
10  (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/psl.dat")))
11 
12  (defun load-etld-data (&optional (etld-names-file *default-etld-names*))
13  (when etld-names-file
14  (with-open-file (in etld-names-file
15  :element-type #+lispworks :default #-lispworks 'character
16  :external-format #+clisp charset:utf-8 #-clisp :utf-8)
17  (loop with special-tlds = nil
18  with normal-tlds = (make-hash-table :test 'equal)
19  with wildcard-tlds = (make-hash-table :test 'equal)
20  for line = (read-line in nil nil)
21  while line
22  unless (or (= 0 (length line))
23  (starts-with-subseq "//" line))
24  do (cond
25  ((starts-with-subseq "*" line)
26  (setf (gethash (subseq line 2) wildcard-tlds) t))
27  ((starts-with-subseq "!" line)
28  (push (subseq line 1) special-tlds))
29  (t
30  (setf (gethash line normal-tlds) t)))
31  finally (return (list normal-tlds wildcard-tlds special-tlds)))))))
32 
33 (defvar *etlds* (load-etld-data))
34 
35 (defun next-subdomain (hostname &optional (start 0))
36  (let ((pos (position #\. hostname :start start)))
37  (when pos
38  (incf pos)
39  (values (subseq hostname pos)
40  pos))))
41 
42 (defun make-subdomain-iter (hostname)
43  (let ((current-pos 0)
44  (first t))
45  (lambda ()
46  (block nil
47  (when first
48  (setq first nil)
49  (return hostname))
50  (multiple-value-bind (subdomain pos)
51  (next-subdomain hostname current-pos)
52  (when subdomain
53  (setf current-pos pos)
54  subdomain))))))
55 
56 (defun parse-domain (hostname)
57  (when *etlds*
58  (dolist (tld (third *etlds*))
59  (when (ends-with-subseq tld hostname)
60  (if (= (length tld) (length hostname))
61  (return-from parse-domain hostname)
62  (when (char= (aref hostname (- (length hostname) (length tld) 1))
63  #\.)
64  (return-from parse-domain
65  (subseq hostname
66  (- (length hostname) (length tld))))))))
67  (loop with iter = (make-subdomain-iter hostname)
68  with pre-prev-subdomain = nil
69  with prev-subdomain = nil
70  for subdomain = (funcall iter)
71  while subdomain
72  if (gethash subdomain (second *etlds*)) do
73  (return pre-prev-subdomain)
74  else if (gethash subdomain (first *etlds*)) do
75  (return (if (string= subdomain hostname)
76  nil
77  prev-subdomain))
78  do (setf pre-prev-subdomain prev-subdomain
79  prev-subdomain subdomain)
80  finally
81  (let* ((pos (position #\. hostname :from-end t))
82  (pos (and pos
83  (position #\. hostname :from-end t :end pos))))
84  (return
85  (if pos
86  (subseq hostname (1+ pos))
87  hostname))))))
88 
89 (defun uri-tld (uri)
90  (let ((host (uri-host uri)))
91  (when (and host
92  (not (ip-addr-p host)))
93  (let ((pos (position #\. host :from-end t)))
94  (if pos
95  (subseq host (1+ pos))
96  host)))))
97 
98 (defun uri-domain (uri)
99  (let ((host (uri-host uri)))
100  (when (and host
101  (not (ip-addr-p host)))
102  (parse-domain host))))
103 
104 (defun ipv4-addr-p (host)
105  (declare (optimize (speed 3) (safety 2))
106  #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
107  (check-type host string)
108  (flet ((read-byte-string (string start)
109  (declare (type fixnum start))
110  (when (<= (length string) start)
111  (return-from read-byte-string nil))
112  (let* ((end (+ start 2))
113  (endp (<= (1- (length string)) end))
114  (end (if endp
115  (1- (length string))
116  end))
117  (res 0))
118  (declare (type fixnum end res))
119  (do ((i start (1+ i)))
120  ((< end i))
121  (declare (type fixnum i))
122  (unless (char<= #\0 (aref string i) #\9)
123  (return-from read-byte-string
124  (if (= i start)
125  nil
126  (values res i nil))))
127  (setq res
128  (+ (* res 10)
129  (- (char-code (aref string i)) 48))))
130  (cond
131  (endp
132  (values res end t))
133  ((char= (aref string (1+ end)) #\.)
134  (values res (1+ end) nil))))))
135  (let ((start 0))
136  (dotimes (i 4 t)
137  (multiple-value-bind (byte pos endp)
138  (read-byte-string host start)
139  (unless (typep byte '(unsigned-byte 8))
140  (return nil))
141  (unless (xor endp (not (= i 3)))
142  (return nil))
143  (setq start (1+ pos)))))))
144 
145 (defun trim-brackets (host)
146  (if (char= (aref host 0) #\[)
147  (if (char= (aref host (1- (length host))) #\])
148  (subseq host 1 (1- (length host)))
149  nil)
150  host))
151 
152 (defun ipv6-addr-p (host)
153  (declare (optimize (speed 3) (safety 2))
154  #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
155  (check-type host string)
156  (when (= (length host) 0)
157  (return-from ipv6-addr-p nil))
158 
159  (labels ((read-section (string start &optional read-colons)
160  (declare (type string string)
161  (type fixnum start))
162  (when (<= (length string) start)
163  (return-from read-section
164  (values start read-colons t)))
165  (when (char= (aref string start) #\:)
166  (cond
167  ((<= (length string) (1+ start))
168  (return-from read-section nil))
169  ((char= (aref string (1+ start)) #\:)
170  (if read-colons
171  (return-from read-section nil)
172  (return-from read-section (read-section string (+ 2 start) t))))
173  (t (incf start))))
174  (let* ((end (+ start 4))
175  (endp (<= (length string) end))
176  (end (if endp
177  (length string)
178  end)))
179  (declare (type fixnum end))
180 
181  (do ((i start (1+ i)))
182  ((= end i))
183  (let ((ch (aref string i)))
184  (cond
185  ((char= ch #\:)
186  (return-from read-section
187  (values i read-colons nil)))
188  ((or (char<= #\0 ch #\9)
189  (char<= #\a ch #\f)
190  (char<= #\A ch #\F)))
191  (t (return-from read-section nil)))))
192 
193  (if endp
194  (values end read-colons endp)
195  (if (char= (aref string end) #\:)
196  (values end read-colons endp)
197  nil)))))
198 
199  (setq host (trim-brackets host))
200  (unless host
201  (return-from ipv6-addr-p nil))
202 
203  (let ((start 0)
204  (read-colons-p nil))
205  (dotimes (i 8 t)
206  (multiple-value-bind (e read-colons endp)
207  (read-section host start read-colons-p)
208  (unless e
209  (return-from ipv6-addr-p nil))
210  (when endp
211  (when (and (not (= i 7))
212  (not read-colons))
213  (return-from ipv6-addr-p nil))
214  (return-from ipv6-addr-p t))
215  (when (and (= i 7) (not endp))
216  (return-from ipv6-addr-p nil))
217  (setq start e
218  read-colons-p read-colons))))))
219 
220 (defun ip-addr-p (host)
221  (or (ipv4-addr-p host)
222  (ipv6-addr-p host)))
223 
224 (defun ip-addr= (ip1 ip2)
225  (flet ((parse-ipv6 (ip)
226  (setq ip (trim-brackets ip))
227  (cond
228  ((char= (aref ip 0) #\:)
229  (setq ip (concatenate 'string "0" ip)))
230  ((char= (aref ip (1- (length ip))) #\:)
231  (setq ip (concatenate 'string ip "0"))))
232  (let* ((ip-parsed (split-sequence #\: ip))
233  (len (length ip-parsed)))
234  (loop for section in ip-parsed
235  if (string= section "")
236  append (make-list (- 9 len) :initial-element 0)
237  else
238  collect (parse-integer section :radix 16)))))
239  (cond
240  ((ipv4-addr-p ip1)
241  (string= ip1 ip2))
242  ((ipv6-addr-p ip1)
243  (and (ipv6-addr-p ip2)
244  (equal (parse-ipv6 ip1)
245  (parse-ipv6 ip2)))))))
246 
247 (defun cookie-domain-p (domain cookie-domain)
248  (unless cookie-domain
249  (return-from cookie-domain-p t))
250  (if (ip-addr-p domain)
251  (ip-addr= domain cookie-domain)
252  (progn
253  ;; ignore the preceding "."
254  (when (char= (aref cookie-domain 0) #\.)
255  (setq cookie-domain (subseq cookie-domain 1)))
256  (when-let ((registered-domain (parse-domain domain)))
257  (cond
258  ((= (length registered-domain) (length cookie-domain))
259  (string= registered-domain cookie-domain))
260  ((= (length domain) (length cookie-domain))
261  (string= domain cookie-domain))
262  (t (and (ends-with-subseq domain cookie-domain)
263  (char= #\.
264  (aref cookie-domain (- (length cookie-domain)
265  (length registered-domain)))))))))))