changeset 698: | 96958d3eb5b0 |
parent: | 7ce855f76e1d |
author: | Richard Westhaver <ellis@rwest.io> |
date: | Fri, 04 Oct 2024 22:04:59 -0400 |
permissions: | -rw-r--r-- |
description: | fixes |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
1 | ;;; net/fetch.lisp --- Simple HTTP Downloads |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
2 | |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
3 | ;; Provides the DOWNLOAD and FETCH functions for easily download remote files. |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
4 | |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
5 | ;;; Commentary: |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
6 | |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
7 | ;;; Code: |
123
a4ed30cbe083
data testing, added ical and vcard formats
ellis <ellis@rwest.io>
parents:
122
diff
changeset
|
8 | (in-package :net/fetch) |
a4ed30cbe083
data testing, added ical and vcard formats
ellis <ellis@rwest.io>
parents:
122
diff
changeset
|
9 | |
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
10 | (define-condition invalid-path-error (error) |
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
11 | ((text :initarg :text :reader text))) |
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
12 | |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
13 | (defvar *default-fetch-output-file* #P"index.html") |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
14 | |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
15 | (defun download (url &key (output (or (obj/uri:uri-path (obj/uri:uri url)) *default-fetch-output-file*)) |
584
35bb0d5ec95e
bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents:
549
diff
changeset
|
16 | (if-exists :error) (progress nil) (connect-timeout net/req:*default-connect-timeout*) |
35bb0d5ec95e
bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents:
549
diff
changeset
|
17 | cookies) |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
18 | "Download a file from URL to OUTPUT." |
584
35bb0d5ec95e
bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents:
549
diff
changeset
|
19 | (let ((*progress-bar-enabled* progress)) |
302 | 20 | (multiple-value-bind (stream status header uri) |
584
35bb0d5ec95e
bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents:
549
diff
changeset
|
21 | (req:get url :want-stream t :force-binary t :connect-timeout connect-timeout :verbose (log:trace-p) |
35bb0d5ec95e
bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents:
549
diff
changeset
|
22 | :cookie-jar cookies) |
549 | 23 | (when (= status 200) |
584
35bb0d5ec95e
bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents:
549
diff
changeset
|
24 | (log:debug! "download connect OK:" url) |
35bb0d5ec95e
bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents:
549
diff
changeset
|
25 | (log:debug! "headers:" (hash-table-alist header)) |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
26 | (let ((len (gethash "content-length" header)) |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
27 | (buff (make-array 4096 :element-type 'octet :adjustable t))) |
584
35bb0d5ec95e
bug fixes, added freedesktop.org.xml rule. more work on prolog/dql - considering lib/lang+forrth..
Richard Westhaver <ellis@rwest.io>
parents:
549
diff
changeset
|
28 | (when len (setf len (parse-integer len))) |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
29 | (with-progress-maybe progress (len "downloading ~a to ~a..." url output) |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
30 | (with-open-file (out output :direction :output :element-type 'octet :if-exists if-exists) |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
31 | (loop |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
32 | (let ((end (read-sequence buff stream :end 4096))) |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
33 | (when progress (update-progress *progress-bar* end)) |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
34 | (write-sequence buff out :end end) |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
35 | (unless (= end 4096) |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
36 | (return)))))))) |
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
37 | (values stream status uri header)))) |
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
38 | |
302 | 39 | (defun split-file-path (path) |
40 | (let ((pos-last-slash (1+ (position #\/ path :from-end t)))) |
|
41 | (list (subseq path 0 pos-last-slash) |
|
42 | (subseq path pos-last-slash)))) |
|
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
43 | |
302 | 44 | (defun split-uri-string (uri-string) |
45 | (let ((pu (parse-uri uri-string))) |
|
46 | (cons (uri-host pu) (split-file-path (uri-path pu))))) |
|
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
47 | |
302 | 48 | (defun condition-path (path) |
49 | "Abuse parse-uri to strip possible get args from path" |
|
50 | (let ((p (parse-uri path))) (uri-path p))) |
|
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
51 | |
302 | 52 | (defun is-file (path) |
53 | (handler-case (probe-file path) |
|
54 | (type-error (e) #+sbcl (declare (ignore e)) (error 'invalid-path-error |
|
55 | :text (format nil "Invalid path: ~A" path))))) |
|
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
56 | |
302 | 57 | (defun %fetch (url-or-path &key (cache t) |
58 | dir |
|
59 | (flush nil)) |
|
60 | (cond |
|
61 | ((is-file (condition-path url-or-path)) (condition-path url-or-path)) |
|
538 | 62 | ((is-file (condition-path (concatenate 'string dir (uri-path url-or-path)))) |
302 | 63 | (condition-path (concatenate 'string dir url-or-path))) |
64 | ((parse-uri url-or-path) |
|
65 | (let* ((tmp-pathname (split-uri-string url-or-path)) |
|
66 | (file-pathstring (format nil "~{~A~^~}" (if dir (cons dir tmp-pathname) tmp-pathname))) |
|
67 | (file-pathname (ensure-directories-exist |
|
68 | file-pathstring))) |
|
69 | (if flush |
|
70 | (when (is-file file-pathname) (delete-file file-pathname)) |
|
71 | (if (and cache (probe-file file-pathname)) |
|
72 | (values file-pathname 200 "OK") |
|
549 | 73 | (download url-or-path :output file-pathname))))) |
302 | 74 | (t (values nil 404 "Not file of url")))) |
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
75 | |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
76 | (defun fetch (url |
302 | 77 | &key |
78 | (dir) |
|
79 | (external-format :utf-8) |
|
80 | (cache t) |
|
81 | (stream nil) |
|
82 | (flush nil)) |
|
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
83 | "Fetch file from URL if not cached in DIR |
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
84 | stores the file in the location specified by dir if url or file is url the file |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
85 | is stored in DIR/URI-HOST/URI-PATH. |
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
86 | |
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
87 | Note that it is important to ensure that dir and subdir if used end in a / |
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
88 | |
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
89 | -return: path to file or stream if :stream parameter is passed |
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
90 | -arguments: |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
91 | - url: <string> pathname or url string identifying file to be fetched. |
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
92 | - stream: resuests that fetch returns a stream |
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
93 | - cache: <T|NIL> if T looks for file in -dir and uses that as source if NIL then the a fresh copy of the file is fetched |
275 | 94 | - dir: location to store fetched file. |
122
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
95 | - flush: if T fetch does not download the file it deletes the existing file. |
4ba88cac5bc7
num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff
changeset
|
96 | " |
586
7ce855f76e1d
net/fetch upgrades, fixes, net/err -> net/condition
Richard Westhaver <ellis@rwest.io>
parents:
584
diff
changeset
|
97 | (let ((fetched-path (%fetch url :dir dir :cache cache :flush flush))) |
302 | 98 | (if (not fetched-path) |
99 | nil |
|
100 | (if stream |
|
101 | (open fetched-path :direction :input :external-format external-format) |
|
102 | fetched-path)))) |