changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/std/file.lisp

changeset 646: 95fd920af398
parent: 32bd859533b3
child: 5f81d888c31f
author: Richard Westhaver <ellis@rwest.io>
date: Wed, 11 Sep 2024 18:08:29 -0400
permissions: -rw-r--r--
description: error handling methods for clap
1 ;;; std/file.lisp --- Standard File Library
2 
3 ;;
4 
5 ;;; Code:
6 (in-package :std/file)
7 
8 ;;; Sexp utils
9 ;; (reexport-from :uiop :include '(read-file-form read-file-forms slurp-stream-forms))
10 
11 (defun tmpfile (size)
12  "Create an anonymous temporary file of the given size. Returns a file descriptor."
13  (let (done fd pathname)
14  (unwind-protect
15  (progn
16  (setf (values fd pathname) (sb-posix:mkstemp "/dev/shm/tmp.XXXXXXXX"))
17  (sb-posix:unlink pathname)
18  (sb-posix:ftruncate fd size)
19  (setf done t))
20  (when (and fd (not done)) (sb-posix:close fd)))
21  fd))
22 
23 (declaim (inline octet-vector=/unsafe))
24 (defun octet-vector=/unsafe (v1 v2 start1 end1 start2 end2)
25  (declare (optimize (speed 3)
26  (safety 0)
27  (debug 0)
28  (compilation-speed 0))
29  (type octet-vector v1 v2)
30  (type array-index start1 start2)
31  (type array-length end1 end2))
32  (and (= (- end1 start1)
33  (- end2 start2))
34  (loop for i from start1 below end1
35  for j from start2 below end2
36  always (eql (aref v1 i) (aref v2 j)))))
37 
38 (defun octet-vector= (v1 v2 &key (start1 0) end1
39  (start2 0) end2)
40  "Like `string=' for octet vectors."
41  (declare (octet-vector v1 v2)
42  (array-index start1 start2)
43  ((or array-length null) end1 end2)
44  (optimize speed))
45  (let* ((len1 (length v1))
46  (len2 (length v2))
47  (end1 (or end1 len1))
48  (end2 (or end2 len2)))
49  (assert (<= start1 end1 len1))
50  (assert (<= start2 end2 len2))
51  (octet-vector=/unsafe v1 v2 start1 end1 start2 end2)))
52 
53 (defun file-size-in-octets (file)
54  (multiple-value-bind (path namestring)
55  (etypecase file
56  (string (values (pathname file)
57  file))
58  (pathname (values file
59  (sb-ext:native-namestring file))))
60  (declare (ignorable path namestring))
61  (sb-posix:stat-size (sb-posix:stat path))))
62 
63 (define-constant si-prefixes
64  '((-30 "quecto" "q")
65  (-27 "ronto" "r")
66  (-24 "yocto" "y")
67  (-21 "zepto" "z")
68  (-18 "atto" "a")
69  (-15 "femto" "f")
70  (-12 "pico" "p")
71  ( -9 "nano" "n")
72  ( -6 "micro" "μ")
73  ( -3 "milli" "m")
74  ( -2 "centi" "c")
75  ( -1 "deci" "d")
76  ( 0 "" "" )
77  ( 1 "deca" "da")
78  ( 2 "hecto" "h")
79  ( 3 "kilo" "k")
80  ( 6 "mega" "M")
81  ( 9 "giga" "G")
82  ( 12 "tera" "T")
83  ( 15 "peta" "P")
84  ( 18 "exa" "E")
85  ( 21 "zetta" "Z")
86  ( 24 "yotta" "Y")
87  ( 27 "ronna" "R")
88  ( 30 "quetta" "Q"))
89  :test #'equalp
90  :documentation "List as SI prefixes: power of ten, long form, short form.")
91 
92 (define-constant si-prefixes-base-1000
93  (loop for (pow long short) in si-prefixes
94  unless (and (not (zerop pow))
95  (< (abs pow) 3))
96  collect (list (truncate pow 3) long short))
97  :test #'equalp
98  :documentation "The SI prefixes as powers of 1000, with centi, deci, deca and hecto omitted.")
99 
100 (define-constant iec-prefixes
101  '(( 0 "" "")
102  (10 "kibi" "Ki")
103  (20 "mebi" "Mi")
104  (30 "gibi" "Gi")
105  (40 "tebi" "Ti")
106  (50 "pebi" "Pi")
107  (60 "exbi" "Ei"))
108  :test #'equalp
109  :documentation "The IEC binary prefixes, as powers of 2.")
110 
111 (eval-always
112  (defun single (seq)
113  "Is SEQ a sequence of one element?"
114  (= (length seq) 1)))
115 
116 (defmacro si-prefix-rec (n base prefixes)
117  (cond ((null prefixes) (error "No prefixes!"))
118  ((single prefixes)
119  (destructuring-bind ((power long short)) prefixes
120  `(values ,long ,short ,(expt base power))))
121  (t
122  ;; good enough
123  (let* ((halfway (ceiling (length prefixes) 2))
124  (lo (subseq prefixes 0 halfway))
125  (hi (subseq prefixes halfway))
126  (split (* (expt base (caar hi)))))
127  `(if (< ,n ,split)
128  (si-prefix-rec ,n ,base ,lo)
129  (si-prefix-rec ,n ,base ,hi))))))
130 
131 (defun si-prefix (n &key (base 1000))
132  "Given a number, return the prefix of the nearest SI unit.
133 
134 Three values are returned: the long form, the short form, and the
135 multiplying factor.
136 
137  (si-prefix 1001) => \"kilo\", \"k\", 1000d0
138 
139 BASE can be 1000, 10, 1024, or 2. 1000 is the default, and prefixes
140 start at kilo and milli. Base 10 is mostly the same, except the
141 prefixes centi, deci, deca and hecto are also used. Base 1024 uses the
142 same prefixes as 1000, but with 1024 as the base, as in vulgar file
143 sizes. Base 2 uses the IEC binary prefixes."
144  (if (zerop n) (values "" "" 1d0)
145  (let ((n (abs (coerce n 'double-float))))
146  (ecase base
147  (2 (si-prefix-rec n 2d0 #.iec-prefixes))
148  (10 (si-prefix-rec n 10d0 #.si-prefixes))
149  (1000 (si-prefix-rec n 1000d0 #.si-prefixes-base-1000))
150  (1024 (si-prefix-rec n 1024d0 #.si-prefixes-base-1000))))))
151 
152 (defun human-size-formatter (size &key (flavor :si)
153  (space (eql flavor :si)))
154  "Auxiliary function for formatting quantities human-readably.
155 Returns two values: a format control and a list of arguments.
156 
157 This can be used to integrate the human-readable printing of
158 quantities into larger format control strings using the recursive
159 processing format directive (~?):
160 
161  (multiple-value-bind (control args)
162  (human-size-formatter size)
163  (format t \"~?\" control args))"
164  (let ((size (coerce size 'double-float))
165  ;; Avoid printing exponent markers.
166  (*read-default-float-format* 'double-float)
167  (base (ecase flavor
168  (:file 1024)
169  (:si 1000)
170  (:iec 2))))
171  (multiple-value-bind (long short factor)
172  (si-prefix size :base base)
173  (declare (ignore long))
174  (let* ((size (/ size factor))
175  (int (round size))
176  (size
177  (if (> (abs (- size int))
178  0.05d0)
179  size
180  int)))
181  (values (formatter "~:[~d~;~,1f~]~:[~; ~]~a")
182  (list (floatp size) size space short))))))
183 
184 (defun format-human-size (stream size
185  &key (flavor :si)
186  (space (eql flavor :si)))
187  "Write SIZE to STREAM, in human-readable form.
188 
189 STREAM is interpreted as by `format'.
190 
191 If FLAVOR is `:si' (the default) the base is 1000 and SI prefixes are used.
192 
193 If FLAVOR is `:file', the base is 1024 and SI prefixes are used.
194 
195 If FLAVOR is `:iec', the base is 1024 bytes and IEC prefixes (Ki, Mi,
196 etc.) are used.
197 
198 If SPACE is non-nil, include a space between the number and the
199 prefix. (Defaults to T if FLAVOR is `:si'.)"
200  (if (zerop size)
201  (format stream "0")
202  (multiple-value-bind (formatter args)
203  (human-size-formatter size :flavor flavor :space space)
204  (format stream "~?" formatter args))))
205 
206 (defun format-file-size-human-readable (stream file-size
207  &key flavor
208  (space (eql flavor :si))
209  (suffix (if (eql flavor :iec) "B" "")))
210  "Write FILE-SIZE, a file size in bytes, to STREAM, in human-readable form.
211 
212 STREAM is interpreted as by `format'.
213 
214 If FLAVOR is nil, kilobytes are 1024 bytes and SI prefixes are used.
215 
216 If FLAVOR is `:si', kilobytes are 1000 bytes and SI prefixes are used.
217 
218 If FLAVOR is `:iec', kilobytes are 1024 bytes and IEC prefixes (Ki,
219 Mi, etc.) are used.
220 
221 If SPACE is non-nil, include a space between the number and the
222 prefix. (Defaults to T if FLAVOR is `:si'.)
223 
224 SUFFIX is the suffix to use; defaults to B if FLAVOR is `:iec',
225 otherwise empty."
226  (check-type file-size (integer 0 *))
227  (if (zerop file-size)
228  (format stream "0")
229  (let ((flavor (if (null flavor) :file flavor)))
230  (multiple-value-bind (formatter args)
231  (human-size-formatter file-size :flavor flavor :space space)
232  (format stream "~?~a" formatter args suffix)))))
233 
234 (defun file-size-human-readable (file &key flavor space suffix stream)
235  "Format the size of FILE (in octets) using `format-file-size-human-readable'.
236 The size of file is found by `trivial-file-size:file-size-in-octets'.
237 
238 Inspired by the function of the same name in Emacs."
239  (let ((file-size (file-size-in-octets file)))
240  (format-file-size-human-readable
241  stream
242  file-size
243  :flavor flavor
244  :suffix suffix
245  :space space)))
246 
247 (defmacro with-open-files ((&rest args) &body body)
248  "A simple macro to open one or more files providing the streams for the
249 BODY. The ARGS is a list of `(stream filespec options*)` as supplied to
250 WITH-OPEN-FILE."
251  (case (length args)
252  ((0)
253  `(progn ,@body))
254  ((1)
255  `(with-open-file ,(first args) ,@body))
256  (t `(with-open-file ,(first args)
257  (with-open-files
258  ,(rest args) ,@body)))))
259 
260 (defmacro with-open-file* ((stream filespec &key direction element-type
261  if-exists if-does-not-exist external-format)
262  &body body)
263  "Just like WITH-OPEN-FILE, but NIL values in the keyword arguments
264 mean to use the default value specified for OPEN."
265  (once-only (direction element-type if-exists if-does-not-exist external-format)
266  `(with-open-stream
267  (,stream (apply #'open ,filespec
268  (append
269  (when ,direction
270  (list :direction ,direction))
271  (list :element-type (or ,element-type
272  +default-element-type+))
273  (when ,if-exists
274  (list :if-exists ,if-exists))
275  (when ,if-does-not-exist
276  (list :if-does-not-exist ,if-does-not-exist))
277  (when ,external-format
278  (list :external-format ,external-format)))))
279  ,@body)))
280 
281 (defmacro with-input-from-file ((stream-name file-name &rest args
282  &key (direction nil direction-p)
283  &allow-other-keys)
284  &body body)
285  "Evaluate BODY with STREAM-NAME to an input stream on the file
286 FILE-NAME. ARGS is sent as is to the call to OPEN except EXTERNAL-FORMAT,
287 which is only sent to WITH-OPEN-FILE when it's not NIL."
288  (declare (ignore direction))
289  (when direction-p
290  (error "Can't specify :DIRECTION for WITH-INPUT-FROM-FILE."))
291  `(with-open-file* (,stream-name ,file-name :direction :input ,@args)
292  ,@body))
293 
294 (defmacro with-output-to-file ((stream-name file-name &rest args
295  &key (direction nil direction-p)
296  &allow-other-keys)
297  &body body)
298  "Evaluate BODY with STREAM-NAME to an output stream on the file
299 FILE-NAME. ARGS is sent as is to the call to OPEN except EXTERNAL-FORMAT,
300 which is only sent to WITH-OPEN-FILE when it's not NIL."
301  (declare (ignore direction))
302  (when direction-p
303  (error "Can't specify :DIRECTION for WITH-OUTPUT-TO-FILE."))
304  `(with-open-file* (,stream-name ,file-name :direction :output ,@args)
305  ,@body))
306 
307 (defun write-stream-into-file (stream pathname &key (if-exists :error) if-does-not-exist)
308  "Read STREAM and write the contents into PATHNAME.
309 
310 STREAM will be closed afterwards, so wrap it with
311 `make-concatenated-stream' if you want it left open."
312  (check-type pathname pathname)
313  (with-output-to-file (out pathname
314  :element-type (stream-element-type stream)
315  :if-exists if-exists
316  :if-does-not-exist if-does-not-exist)
317  (copy-stream stream out))
318 pathname)
319 
320 (defun write-file-into-stream (pathname output &key (if-does-not-exist :error)
321  (external-format :default))
322  "Write the contents of FILE into STREAM."
323  (check-type pathname pathname)
324  (with-input-from-file (input pathname
325  :element-type (stream-element-type output)
326  :if-does-not-exist if-does-not-exist
327  :external-format external-format)
328  (copy-stream input output :end (file-size-in-octets pathname))))
329 
330 (defun file= (file1 file2 &key (buffer-size 4096))
331  "Compare FILE1 and FILE2 octet by octet, \(possibly) using buffers
332 of BUFFER-SIZE."
333  (declare (ignorable buffer-size))
334  (let ((file1 (truename file1))
335  (file2 (truename file2)))
336  (or (equal file1 file2)
337  (and (= (file-size-in-octets file1)
338  (file-size-in-octets file2))
339  #+ccl (file=/mmap file1 file2)
340  #-ccl (file=/loop file1 file2 :buffer-size buffer-size)))))
341 
342 (defun file=/loop (file1 file2 &key (buffer-size 4096))
343  "Compare two files by looping over their contents using a buffer."
344  (declare
345  (type pathname file1 file2)
346  (type array-length buffer-size)
347  (optimize (safety 1) (debug 0) (compilation-speed 0)))
348  (flet ((make-buffer ()
349  (make-array buffer-size
350  :element-type 'octet
351  :initial-element 0)))
352  (declare (inline make-buffer))
353  (with-open-files ((file1 file1 :element-type 'octet :direction :input)
354  (file2 file2 :element-type 'octet :direction :input))
355  (and (= (file-length file1)
356  (file-length file2))
357  (locally (declare (optimize speed))
358  (loop with buffer1 = (make-buffer)
359  with buffer2 = (make-buffer)
360  for end1 = (read-sequence buffer1 file1)
361  for end2 = (read-sequence buffer2 file2)
362  until (or (zerop end1) (zerop end2))
363  always (and (= end1 end2)
364  (octet-vector= buffer1 buffer2
365  :end1 end1
366  :end2 end2))))))))
367 
368 (defun file-size (file &key (element-type '(unsigned-byte 8)))
369  "The size of FILE, in units of ELEMENT-TYPE (defaults to bytes).
370 
371 The size is computed by opening the file and getting the length of the
372 resulting stream.
373 
374 If all you want is to read the file's size in octets from its metadata,
375 consider FILE-SIZE-IN-OCTETS instead."
376  (check-type file (or string pathname))
377  (with-input-from-file (in file :element-type element-type)
378  (file-length in)))
379 
380 (defun file-timestamp ()
381  "Returns current timestamp as a string suitable as the name of a timestamped-file."
382  (multiple-value-bind (sec min hr day mon yr)
383  (get-decoded-time)
384  (format nil "~4d~2,'0d~2,'0d_~2,'0d~2,'0d~2,'0d" yr mon day hr min sec)))
385 
386 (defun file-date ()
387  "Returns current date as a string suitable as the name of a timestamped-file."
388  (multiple-value-bind (sec min hr day mon yr)
389  (get-decoded-time)
390  (declare (ignore sec min hr))
391  (format nil "~4d~2,'0d~2,'0d" yr mon day)))
392 
393 ;; see https://www.n16f.net/blog/counting-lines-with-common-lisp/
394 
395 (defun directory-path-p (path)
396  "Return T if PATH is a directory or NIL else."
397  (declare (type (or pathname string) path))
398  (and (not (pathname-name path))
399  (not (pathname-type path))))
400 
401 (defvar *hidden-paths* (list ".hg" ".git"))
402 
403 (defun hidden-path-p (path &optional strict)
404  "Return T if PATH is strictly a hidden file or directory or NIL else."
405  (declare (type pathname path))
406  (let ((name (if (directory-path-p path)
407  (car (last (pathname-directory path)))
408  (file-namestring path))))
409  (and (plusp (length name))
410  (if strict
411  (eq (char name 0) #\.)
412  (member name *hidden-paths* :test 'equal)))))
413 
414 (defun directory-path (path)
415  "If PATH is a directory pathname, return it as it is. If it is a file
416 pathname or a string, transform it into a directory pathname."
417  (declare (type (or pathname string) path))
418  (if (directory-path-p path)
419  path
420  (make-pathname :directory (append (or (pathname-directory path)
421  (list :relative))
422  (list (file-namestring path)))
423  :name nil :type nil :defaults path)))
424 
425 (defun find-files (path &optional (hide *hidden-paths*))
426  "Return a list of all files contained in the directory at PATH or any of its
427 subdirectories."
428  (declare (type (or pathname string) path))
429  (flet ((list-directory (path)
430  (directory
431  (make-pathname :defaults (directory-path path)
432  :type :wild :name :wild))))
433  (let ((paths nil)
434  (children (list-directory (directory-path path))))
435  (dolist (child children paths)
436  (unless (and hide (hidden-path-p child (eq t hide)))
437  (if (directory-path-p child)
438  (setf paths (append paths (find-files child)))
439  (push child paths)))))))
440 
441 (defun count-file-lines (path)
442  "Count the number of non-empty lines in the file at PATH. A line is empty if
443 it only contains space or tabulation characters."
444  (declare (type pathname path))
445  (with-open-file (stream path :element-type '(unsigned-byte 8))
446  (do ((nb-lines 0)
447  (blank-line t))
448  (nil)
449  (let ((octet (read-byte stream nil)))
450  (cond
451  ((or (null octet) (eq octet #.(char-code #\Newline)))
452  (unless blank-line
453  (incf nb-lines))
454  (when (null octet)
455  (return-from count-file-lines nb-lines))
456  (setf blank-line t))
457  ((and (/= octet #.(char-code #\Space))
458  (/= octet #.(char-code #\Tab)))
459  (setf blank-line nil)))))))