changelog shortlog graph tags branches changeset files file revisions raw help

Mercurial > core / annotate lisp/lib/net/fetch.lisp

changeset 698: 96958d3eb5b0
parent: 7ce855f76e1d
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
1
 ;;; net/fetch.lisp --- Simple HTTP Downloads
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
2
 
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
3
 ;; Provides the DOWNLOAD and FETCH functions for easily download remote files.
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
4
 
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
5
 ;;; Commentary:
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
6
 
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
7
 ;;; Code:
123
a4ed30cbe083 data testing, added ical and vcard formats
ellis <ellis@rwest.io>
parents: 122
diff changeset
8
 (in-package :net/fetch)
a4ed30cbe083 data testing, added ical and vcard formats
ellis <ellis@rwest.io>
parents: 122
diff changeset
9
 
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
10
 (define-condition invalid-path-error (error)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
11
   ((text :initarg :text :reader text)))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
12
 
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
13
 (defvar *default-fetch-output-file* #P"index.html")
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
14
 
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
15
 (defun download (url &key (output (or (obj/uri:uri-path (obj/uri:uri url)) *default-fetch-output-file*))
584
35bb0d5ec95e bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
16
                           (if-exists :error) (progress nil) (connect-timeout net/req:*default-connect-timeout*)
35bb0d5ec95e bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
17
                           cookies)
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
18
   "Download a file from URL to OUTPUT."
584
35bb0d5ec95e bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
19
   (let ((*progress-bar-enabled* progress))
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
20
     (multiple-value-bind (stream status header uri)
584
35bb0d5ec95e bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
21
         (req:get url :want-stream t :force-binary t :connect-timeout connect-timeout :verbose (log:trace-p)
35bb0d5ec95e bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
22
                      :cookie-jar cookies)
549
32bd859533b3 fetch fixes
Richard Westhaver <ellis@rwest.io>
parents: 538
diff changeset
23
       (when (= status 200)
584
35bb0d5ec95e bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
24
         (log:debug! "download connect OK:" url)
35bb0d5ec95e bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
25
         (log:debug! "headers:" (hash-table-alist header))
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
26
         (let ((len (gethash "content-length" header))
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
27
               (buff (make-array 4096 :element-type 'octet :adjustable t)))
584
35bb0d5ec95e bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
28
           (when len (setf len (parse-integer len)))
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
29
           (with-progress-maybe progress (len "downloading ~a to ~a..." url output)
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
30
             (with-open-file (out output :direction :output :element-type 'octet :if-exists if-exists)
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
31
               (loop
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
32
                 (let ((end (read-sequence buff stream :end 4096)))
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
33
                   (when progress (update-progress *progress-bar* end))
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
34
                   (write-sequence buff out :end end)
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
35
                   (unless (= end 4096)
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
36
                     (return))))))))
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
37
       (values stream status uri header))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
38
 
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
39
 (defun split-file-path (path)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
40
   (let ((pos-last-slash (1+ (position #\/ path :from-end t))))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
41
     (list (subseq path 0 pos-last-slash)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
42
           (subseq path pos-last-slash))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
43
 
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
44
 (defun split-uri-string (uri-string)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
45
   (let ((pu (parse-uri uri-string)))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
46
     (cons (uri-host pu) (split-file-path (uri-path pu)))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
47
 
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
48
 (defun condition-path (path)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
49
   "Abuse parse-uri to strip possible get args from path"
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
50
   (let ((p (parse-uri path))) (uri-path p)))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
51
 
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
52
 (defun is-file (path)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
53
   (handler-case (probe-file path)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
54
     (type-error (e) #+sbcl (declare (ignore e)) (error 'invalid-path-error
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
55
                                                        :text (format nil "Invalid path: ~A" path)))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
56
 
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
57
 (defun %fetch (url-or-path &key (cache t)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
58
                                 dir
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
59
                                 (flush nil))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
60
   (cond
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
61
     ((is-file (condition-path url-or-path)) (condition-path url-or-path))
538
d84e518059be add parquet and arrow
Richard Westhaver <ellis@rwest.io>
parents: 440
diff changeset
62
     ((is-file (condition-path (concatenate 'string  dir (uri-path url-or-path))))
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
63
      (condition-path (concatenate 'string  dir url-or-path)))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
64
     ((parse-uri url-or-path)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
65
      (let* ((tmp-pathname (split-uri-string url-or-path))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
66
             (file-pathstring (format nil "~{~A~^~}" (if dir (cons dir tmp-pathname) tmp-pathname)))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
67
             (file-pathname (ensure-directories-exist
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
68
                             file-pathstring)))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
69
        (if flush
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
70
            (when (is-file file-pathname) (delete-file file-pathname))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
71
            (if (and cache (probe-file file-pathname))
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
72
                (values file-pathname 200 "OK")
549
32bd859533b3 fetch fixes
Richard Westhaver <ellis@rwest.io>
parents: 538
diff changeset
73
                (download url-or-path :output file-pathname)))))
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
74
     (t (values nil 404 "Not file of url"))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
75
 
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
76
 (defun fetch (url
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
77
               &key
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
78
               (dir)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
79
               (external-format :utf-8)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
80
               (cache t)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
81
               (stream nil)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
82
               (flush nil))
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
83
   "Fetch file from URL if not cached in DIR
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
84
 stores the file in the location specified by dir if url or file is url the file
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
85
 is stored in DIR/URI-HOST/URI-PATH.
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
86
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
87
 Note that it is important to ensure that dir and subdir if used end in a /
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
88
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
89
 -return: path to file or stream if :stream parameter is passed
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
90
 -arguments:
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
91
   - url: <string> pathname or url string identifying file to be fetched.
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
92
   - stream: resuests that fetch returns a stream
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
93
   - cache: <T|NIL> if T looks for file in -dir and uses that as source if NIL then the a fresh copy of the file is fetched
275
78ef6145e272 return of the uri
Richard Westhaver <ellis@rwest.io>
parents: 142
diff changeset
94
   - dir: location to store fetched file.
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
95
   - flush: if T fetch does not download the file it deletes the existing file.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
96
 "
586
7ce855f76e1d net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents: 584
diff changeset
97
   (let ((fetched-path (%fetch url :dir dir :cache cache :flush flush)))
302
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
98
     (if (not fetched-path)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
99
         nil
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
100
         (if stream
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
101
             (open fetched-path :direction :input :external-format external-format)
6ea593ae4cea net/fetch
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
102
             fetched-path))))