changelog shortlog graph tags branches changeset file revisions annotate raw help

Mercurial > core / lisp/lib/obj/uri/domain.lisp

revision 359: 0e00dec3de03
child 365: 49c3f3d11432
     1.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2+++ b/lisp/lib/obj/uri/domain.lisp	Wed May 22 22:16:26 2024 -0400
     1.3@@ -0,0 +1,239 @@
     1.4+;;; obj/uri/domain.lisp --- URI Domains
     1.5+
     1.6+;;
     1.7+
     1.8+;;; Code:
     1.9+(in-package :obj/uri)
    1.10+
    1.11+(defun next-subdomain (hostname &optional (start 0))
    1.12+  (let ((pos (position #\. hostname :start start)))
    1.13+    (when pos
    1.14+      (incf pos)
    1.15+      (values (subseq hostname pos)
    1.16+              pos))))
    1.17+
    1.18+(defun make-subdomain-iter (hostname)
    1.19+  (let ((current-pos 0)
    1.20+        (first t))
    1.21+    (lambda ()
    1.22+      (block nil
    1.23+        (when first
    1.24+          (setq first nil)
    1.25+          (return hostname))
    1.26+        (multiple-value-bind (subdomain pos)
    1.27+            (next-subdomain hostname current-pos)
    1.28+          (when subdomain
    1.29+            (setf current-pos pos)
    1.30+            subdomain))))))
    1.31+
    1.32+(defvar *etlds* nil)
    1.33+
    1.34+(defun parse-domain (hostname)
    1.35+  (dolist (tld (third *etlds*))
    1.36+    (when (ends-with-subseq tld hostname)
    1.37+      (if (= (length tld) (length hostname))
    1.38+          (return-from parse-domain hostname)
    1.39+          (when (char= (aref hostname (- (length hostname) (length tld) 1))
    1.40+                       #\.)
    1.41+            (return-from parse-domain
    1.42+              (subseq hostname
    1.43+                      (- (length hostname) (length tld))))))))
    1.44+  (loop with iter = (make-subdomain-iter hostname)
    1.45+        with pre-prev-subdomain = nil
    1.46+        with prev-subdomain = nil
    1.47+        for subdomain = (funcall iter)
    1.48+        while subdomain
    1.49+        if (gethash subdomain (second *etlds*)) do
    1.50+          (return pre-prev-subdomain)
    1.51+        else if (gethash subdomain (first *etlds*)) do
    1.52+          (return (if (string= subdomain hostname)
    1.53+                      nil
    1.54+                      prev-subdomain))
    1.55+        do (setf pre-prev-subdomain prev-subdomain
    1.56+                 prev-subdomain subdomain)
    1.57+        finally
    1.58+           (let* ((pos (position #\. hostname :from-end t))
    1.59+                  (pos (and pos
    1.60+                            (position #\. hostname :from-end t :end pos))))
    1.61+             (return
    1.62+               (if pos
    1.63+                   (subseq hostname (1+ pos))
    1.64+                   hostname)))))
    1.65+
    1.66+(defun uri-tld (uri)
    1.67+  (let ((host (uri-host uri)))
    1.68+    (when (and host
    1.69+               (not (ip-addr-p host)))
    1.70+      (let ((pos (position #\. host :from-end t)))
    1.71+        (if pos
    1.72+            (subseq host (1+ pos))
    1.73+            host)))))
    1.74+
    1.75+(defun uri-domain (uri)
    1.76+  (let ((host (uri-host uri)))
    1.77+    (when (and host
    1.78+               (not (ip-addr-p host)))
    1.79+      (parse-domain host))))
    1.80+
    1.81+(defun ipv4-addr-p (host)
    1.82+  (declare (optimize (speed 3) (safety 2))
    1.83+           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
    1.84+  (check-type host string)
    1.85+  (flet ((read-byte-string (string start)
    1.86+           (declare (type fixnum start))
    1.87+           (when (<= (length string) start)
    1.88+             (return-from read-byte-string nil))
    1.89+           (let* ((end (+ start 2))
    1.90+                  (endp (<= (1- (length string)) end))
    1.91+                  (end (if endp
    1.92+                           (1- (length string))
    1.93+                           end))
    1.94+                  (res 0))
    1.95+             (declare (type fixnum end res))
    1.96+             (do ((i start (1+ i)))
    1.97+                 ((< end i))
    1.98+               (declare (type fixnum i))
    1.99+               (unless (char<= #\0 (aref string i) #\9)
   1.100+                 (return-from read-byte-string
   1.101+                   (if (= i start)
   1.102+                       nil
   1.103+                       (values res i nil))))
   1.104+               (setq res
   1.105+                     (+ (* res 10)
   1.106+                        (- (char-code (aref string i)) 48))))
   1.107+             (cond
   1.108+               (endp
   1.109+                (values res end t))
   1.110+               ((char= (aref string (1+ end)) #\.)
   1.111+                (values res (1+ end) nil))))))
   1.112+    (let ((start 0))
   1.113+      (dotimes (i 4 t)
   1.114+        (multiple-value-bind (byte pos endp)
   1.115+            (read-byte-string host start)
   1.116+          (unless (typep byte '(unsigned-byte 8))
   1.117+            (return nil))
   1.118+          (unless (xor endp (not (= i 3)))
   1.119+            (return nil))
   1.120+          (setq start (1+ pos)))))))
   1.121+
   1.122+(defun trim-brackets (host)
   1.123+  (if (char= (aref host 0) #\[)
   1.124+      (if (char= (aref host (1- (length host))) #\])
   1.125+          (subseq host 1 (1- (length host)))
   1.126+          nil)
   1.127+      host))
   1.128+
   1.129+(defun ipv6-addr-p (host)
   1.130+  (declare (optimize (speed 3) (safety 2))
   1.131+           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
   1.132+  (check-type host string)
   1.133+  (when (= (length host) 0)
   1.134+    (return-from ipv6-addr-p nil))
   1.135+
   1.136+  (labels ((read-section (string start &optional read-colons)
   1.137+             (declare (type string string)
   1.138+                      (type fixnum start))
   1.139+             (when (<= (length string) start)
   1.140+               (return-from read-section
   1.141+                 (values start read-colons t)))
   1.142+             (when (char= (aref string start) #\:)
   1.143+               (cond
   1.144+                 ((<= (length string) (1+ start))
   1.145+                  (return-from read-section nil))
   1.146+                 ((char= (aref string (1+ start)) #\:)
   1.147+                  (if read-colons
   1.148+                      (return-from read-section nil)
   1.149+                      (return-from read-section (read-section string (+ 2 start) t))))
   1.150+                 (t (incf start))))
   1.151+             (let* ((end (+ start 4))
   1.152+                    (endp (<= (length string) end))
   1.153+                    (end (if endp
   1.154+                             (length string)
   1.155+                             end)))
   1.156+               (declare (type fixnum end))
   1.157+
   1.158+               (do ((i start (1+ i)))
   1.159+                   ((= end i))
   1.160+                 (let ((ch (aref string i)))
   1.161+                   (cond
   1.162+                     ((char= ch #\:)
   1.163+                      (return-from read-section
   1.164+                        (values i read-colons nil)))
   1.165+                     ((or (char<= #\0 ch #\9)
   1.166+                          (char<= #\a ch #\f)
   1.167+                          (char<= #\A ch #\F)))
   1.168+                     (t (return-from read-section nil)))))
   1.169+
   1.170+               (if endp
   1.171+                   (values end read-colons endp)
   1.172+                   (if (char= (aref string end) #\:)
   1.173+                       (values end read-colons endp)
   1.174+                       nil)))))
   1.175+
   1.176+    (setq host (trim-brackets host))
   1.177+    (unless host
   1.178+      (return-from ipv6-addr-p nil))
   1.179+
   1.180+    (let ((start 0)
   1.181+          (read-colons-p nil))
   1.182+      (dotimes (i 8 t)
   1.183+        (multiple-value-bind (e read-colons endp)
   1.184+            (read-section host start read-colons-p)
   1.185+          (unless e
   1.186+            (return-from ipv6-addr-p nil))
   1.187+          (when endp
   1.188+            (when (and (not (= i 7))
   1.189+                       (not read-colons))
   1.190+              (return-from ipv6-addr-p nil))
   1.191+            (return-from ipv6-addr-p t))
   1.192+          (when (and (= i 7) (not endp))
   1.193+            (return-from ipv6-addr-p nil))
   1.194+          (setq start e
   1.195+                read-colons-p read-colons))))))
   1.196+
   1.197+(defun ip-addr-p (host)
   1.198+  (or (ipv4-addr-p host)
   1.199+      (ipv6-addr-p host)))
   1.200+
   1.201+(defun ip-addr= (ip1 ip2)
   1.202+  (flet ((parse-ipv6 (ip)
   1.203+           (setq ip (trim-brackets ip))
   1.204+           (cond
   1.205+             ((char= (aref ip 0) #\:)
   1.206+              (setq ip (concatenate 'string "0" ip)))
   1.207+             ((char= (aref ip (1- (length ip))) #\:)
   1.208+              (setq ip (concatenate 'string ip "0"))))
   1.209+           (let* ((ip-parsed (split-sequence #\: ip))
   1.210+                  (len (length ip-parsed)))
   1.211+             (loop for section in ip-parsed
   1.212+                   if (string= section "")
   1.213+                     append (make-list (- 9 len) :initial-element 0)
   1.214+                   else
   1.215+                     collect (parse-integer section :radix 16)))))
   1.216+    (cond
   1.217+      ((ipv4-addr-p ip1)
   1.218+       (string= ip1 ip2))
   1.219+      ((ipv6-addr-p ip1)
   1.220+       (and (ipv6-addr-p ip2)
   1.221+            (equal (parse-ipv6 ip1)
   1.222+                   (parse-ipv6 ip2)))))))
   1.223+
   1.224+(defun cookie-domain-p (domain cookie-domain)
   1.225+  (unless cookie-domain
   1.226+    (return-from cookie-domain-p t))
   1.227+  (if (ip-addr-p domain)
   1.228+      (ip-addr= domain cookie-domain)
   1.229+      (progn
   1.230+        ;; ignore the preceding "."
   1.231+        (when (char= (aref cookie-domain 0) #\.)
   1.232+          (setq cookie-domain (subseq cookie-domain 1)))
   1.233+        (when-let ((registered-domain (parse-domain domain)))
   1.234+          (cond
   1.235+            ((= (length registered-domain) (length cookie-domain))
   1.236+             (string= registered-domain cookie-domain))
   1.237+            ((= (length domain) (length cookie-domain))
   1.238+             (string= domain cookie-domain))
   1.239+            (t (and (ends-with-subseq domain cookie-domain)
   1.240+                    (char= #\.
   1.241+                           (aref cookie-domain (- (length cookie-domain)
   1.242+                                                  (length registered-domain)))))))))))