changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/std/file.lisp

changeset 342: 254cca648492
parent: a0dfde3cb3c4
child: 770f2d03efd8
author: Richard Westhaver <ellis@rwest.io>
date: Mon, 13 May 2024 21:10:33 -0400
permissions: -rw-r--r--
description: homer fixups
1 ;;; std/file.lisp --- Standard File Library
2 
3 ;;
4 
5 ;;; Code:
6 (in-package :std/file)
7 
8 ;;; Sexp utils
9 ;; (reexport-from :uiop :include '(read-file-form read-file-forms slurp-stream-forms))
10 
11 (defun tmpfile (size)
12  "Create an anonymous temporary file of the given size. Returns a file descriptor."
13  (let (done fd pathname)
14  (unwind-protect
15  (progn
16  (setf (values fd pathname) (sb-posix:mkstemp "/dev/shm/tmp.XXXXXXXX"))
17  (sb-posix:unlink pathname)
18  (sb-posix:ftruncate fd size)
19  (setf done t))
20  (when (and fd (not done)) (sb-posix:close fd)))
21  fd))
22 
23 (declaim (inline octet-vector=/unsafe))
24 (defun octet-vector=/unsafe (v1 v2 start1 end1 start2 end2)
25  (declare (optimize (speed 3)
26  (safety 0)
27  (debug 0)
28  (compilation-speed 0))
29  (type octet-vector v1 v2)
30  (type array-index start1 start2)
31  (type array-length end1 end2))
32  (and (= (- end1 start1)
33  (- end2 start2))
34  (loop for i from start1 below end1
35  for j from start2 below end2
36  always (eql (aref v1 i) (aref v2 j)))))
37 
38 (defun octet-vector= (v1 v2 &key (start1 0) end1
39  (start2 0) end2)
40  "Like `string=' for octet vectors."
41  (declare (octet-vector v1 v2)
42  (array-index start1 start2)
43  ((or array-length null) end1 end2)
44  (optimize speed))
45  (let* ((len1 (length v1))
46  (len2 (length v2))
47  (end1 (or end1 len1))
48  (end2 (or end2 len2)))
49  (assert (<= start1 end1 len1))
50  (assert (<= start2 end2 len2))
51  (octet-vector=/unsafe v1 v2 start1 end1 start2 end2)))
52 
53 (defun file-size-in-octets (file)
54  (multiple-value-bind (path namestring)
55  (etypecase file
56  (string (values (pathname file)
57  file))
58  (pathname (values file
59  (sb-ext:native-namestring file))))
60  (declare (ignorable path namestring))
61  (sb-posix:stat-size (sb-posix:stat path))))
62 
63 (define-constant si-prefixes
64  '((-30 "quecto" "q")
65  (-27 "ronto" "r")
66  (-24 "yocto" "y")
67  (-21 "zepto" "z")
68  (-18 "atto" "a")
69  (-15 "femto" "f")
70  (-12 "pico" "p")
71  ( -9 "nano" "n")
72  ( -6 "micro" "μ")
73  ( -3 "milli" "m")
74  ( -2 "centi" "c")
75  ( -1 "deci" "d")
76  ( 0 "" "" )
77  ( 1 "deca" "da")
78  ( 2 "hecto" "h")
79  ( 3 "kilo" "k")
80  ( 6 "mega" "M")
81  ( 9 "giga" "G")
82  ( 12 "tera" "T")
83  ( 15 "peta" "P")
84  ( 18 "exa" "E")
85  ( 21 "zetta" "Z")
86  ( 24 "yotta" "Y")
87  ( 27 "ronna" "R")
88  ( 30 "quetta" "Q"))
89  :test #'equalp
90  :documentation "List as SI prefixes: power of ten, long form, short form.")
91 
92 (define-constant si-prefixes-base-1000
93  (loop for (pow long short) in si-prefixes
94  unless (and (not (zerop pow))
95  (< (abs pow) 3))
96  collect (list (truncate pow 3) long short))
97  :test #'equalp
98  :documentation "The SI prefixes as powers of 1000, with centi, deci, deca and hecto omitted.")
99 
100 (define-constant iec-prefixes
101  '(( 0 "" "")
102  (10 "kibi" "Ki")
103  (20 "mebi" "Mi")
104  (30 "gibi" "Gi")
105  (40 "tebi" "Ti")
106  (50 "pebi" "Pi")
107  (60 "exbi" "Ei"))
108  :test #'equalp
109  :documentation "The IEC binary prefixes, as powers of 2.")
110 
111 (eval-always
112  (defun single (seq)
113  "Is SEQ a sequence of one element?"
114  (= (length seq) 1)))
115 
116 (defmacro si-prefix-rec (n base prefixes)
117  (cond ((null prefixes) (error "No prefixes!"))
118  ((single prefixes)
119  (destructuring-bind ((power long short)) prefixes
120  `(values ,long ,short ,(expt base power))))
121  (t
122  ;; good enough
123  (let* ((halfway (ceiling (length prefixes) 2))
124  (lo (subseq prefixes 0 halfway))
125  (hi (subseq prefixes halfway))
126  (split (* (expt base (caar hi)))))
127  `(if (< ,n ,split)
128  (si-prefix-rec ,n ,base ,lo)
129  (si-prefix-rec ,n ,base ,hi))))))
130 
131 (defun si-prefix (n &key (base 1000))
132  "Given a number, return the prefix of the nearest SI unit.
133 
134 Three values are returned: the long form, the short form, and the
135 multiplying factor.
136 
137  (si-prefix 1001) => \"kilo\", \"k\", 1000d0
138 
139 BASE can be 1000, 10, 1024, or 2. 1000 is the default, and prefixes
140 start at kilo and milli. Base 10 is mostly the same, except the
141 prefixes centi, deci, deca and hecto are also used. Base 1024 uses the
142 same prefixes as 1000, but with 1024 as the base, as in vulgar file
143 sizes. Base 2 uses the IEC binary prefixes."
144  (if (zerop n) (values "" "" 1d0)
145  (let ((n (abs (coerce n 'double-float))))
146  (ecase base
147  (2 (si-prefix-rec n 2d0 #.iec-prefixes))
148  (10 (si-prefix-rec n 10d0 #.si-prefixes))
149  (1000 (si-prefix-rec n 1000d0 #.si-prefixes-base-1000))
150  (1024 (si-prefix-rec n 1024d0 #.si-prefixes-base-1000))))))
151 
152 (defun human-size-formatter (size &key (flavor :si)
153  (space (eql flavor :si)))
154  "Auxiliary function for formatting quantities human-readably.
155 Returns two values: a format control and a list of arguments.
156 
157 This can be used to integrate the human-readable printing of
158 quantities into larger format control strings using the recursive
159 processing format directive (~?):
160 
161  (multiple-value-bind (control args)
162  (human-size-formatter size)
163  (format t \"~?\" control args))"
164  (let ((size (coerce size 'double-float))
165  ;; Avoid printing exponent markers.
166  (*read-default-float-format* 'double-float)
167  (base (ecase flavor
168  (:file 1024)
169  (:si 1000)
170  (:iec 2))))
171  (multiple-value-bind (long short factor)
172  (si-prefix size :base base)
173  (declare (ignore long))
174  (let* ((size (/ size factor))
175  (int (round size))
176  (size
177  (if (> (abs (- size int))
178  0.05d0)
179  size
180  int)))
181  (values (formatter "~:[~d~;~,1f~]~:[~; ~]~a")
182  (list (floatp size) size space short))))))
183 
184 (defun format-human-size (stream size
185  &key (flavor :si)
186  (space (eql flavor :si)))
187  "Write SIZE to STREAM, in human-readable form.
188 
189 STREAM is interpreted as by `format'.
190 
191 If FLAVOR is `:si' (the default) the base is 1000 and SI prefixes are used.
192 
193 If FLAVOR is `:file', the base is 1024 and SI prefixes are used.
194 
195 If FLAVOR is `:iec', the base is 1024 bytes and IEC prefixes (Ki, Mi,
196 etc.) are used.
197 
198 If SPACE is non-nil, include a space between the number and the
199 prefix. (Defaults to T if FLAVOR is `:si'.)"
200  (if (zerop size)
201  (format stream "0")
202  (multiple-value-bind (formatter args)
203  (human-size-formatter size :flavor flavor :space space)
204  (format stream "~?" formatter args))))
205 
206 (defun format-file-size-human-readable (stream file-size
207  &key flavor
208  (space (eql flavor :si))
209  (suffix (if (eql flavor :iec) "B" "")))
210  "Write FILE-SIZE, a file size in bytes, to STREAM, in human-readable form.
211 
212 STREAM is interpreted as by `format'.
213 
214 If FLAVOR is nil, kilobytes are 1024 bytes and SI prefixes are used.
215 
216 If FLAVOR is `:si', kilobytes are 1000 bytes and SI prefixes are used.
217 
218 If FLAVOR is `:iec', kilobytes are 1024 bytes and IEC prefixes (Ki,
219 Mi, etc.) are used.
220 
221 If SPACE is non-nil, include a space between the number and the
222 prefix. (Defaults to T if FLAVOR is `:si'.)
223 
224 SUFFIX is the suffix to use; defaults to B if FLAVOR is `:iec',
225 otherwise empty."
226  (check-type file-size (integer 0 *))
227  (if (zerop file-size)
228  (format stream "0")
229  (let ((flavor (if (null flavor) :file flavor)))
230  (multiple-value-bind (formatter args)
231  (human-size-formatter file-size :flavor flavor :space space)
232  (format stream "~?~a" formatter args suffix)))))
233 
234 (defun file-size-human-readable (file &key flavor space suffix stream)
235  "Format the size of FILE (in octets) using `format-file-size-human-readable'.
236 The size of file is found by `trivial-file-size:file-size-in-octets'.
237 
238 Inspired by the function of the same name in Emacs."
239  (let ((file-size (file-size-in-octets file)))
240  (format-file-size-human-readable
241  stream
242  file-size
243  :flavor flavor
244  :suffix suffix
245  :space space)))
246 
247 (defmacro with-open-files ((&rest args) &body body)
248  "A simple macro to open one or more files providing the streams for the BODY. The ARGS is a list of `(stream filespec options*)` as supplied to WITH-OPEN-FILE."
249  (case (length args)
250  ((0)
251  `(progn ,@body))
252  ((1)
253  `(with-open-file ,(first args) ,@body))
254  (t `(with-open-file ,(first args)
255  (with-open-files
256  ,(rest args) ,@body)))))
257 
258 (defmacro with-open-file* ((stream filespec &key direction element-type
259  if-exists if-does-not-exist external-format)
260  &body body)
261  "Just like WITH-OPEN-FILE, but NIL values in the keyword arguments
262 mean to use the default value specified for OPEN."
263  (once-only (direction element-type if-exists if-does-not-exist external-format)
264  `(with-open-stream
265  (,stream (apply #'open ,filespec
266  (append
267  (when ,direction
268  (list :direction ,direction))
269  (list :element-type (or ,element-type
270  +default-element-type+))
271  (when ,if-exists
272  (list :if-exists ,if-exists))
273  (when ,if-does-not-exist
274  (list :if-does-not-exist ,if-does-not-exist))
275  (when ,external-format
276  (list :external-format ,external-format)))))
277  ,@body)))
278 
279 (defmacro with-input-from-file ((stream-name file-name &rest args
280  &key (direction nil direction-p)
281  &allow-other-keys)
282  &body body)
283  "Evaluate BODY with STREAM-NAME to an input stream on the file
284 FILE-NAME. ARGS is sent as is to the call to OPEN except EXTERNAL-FORMAT,
285 which is only sent to WITH-OPEN-FILE when it's not NIL."
286  (declare (ignore direction))
287  (when direction-p
288  (error "Can't specify :DIRECTION for WITH-INPUT-FROM-FILE."))
289  `(with-open-file* (,stream-name ,file-name :direction :input ,@args)
290  ,@body))
291 
292 (defmacro with-output-to-file ((stream-name file-name &rest args
293  &key (direction nil direction-p)
294  &allow-other-keys)
295  &body body)
296  "Evaluate BODY with STREAM-NAME to an output stream on the file
297 FILE-NAME. ARGS is sent as is to the call to OPEN except EXTERNAL-FORMAT,
298 which is only sent to WITH-OPEN-FILE when it's not NIL."
299  (declare (ignore direction))
300  (when direction-p
301  (error "Can't specify :DIRECTION for WITH-OUTPUT-TO-FILE."))
302  `(with-open-file* (,stream-name ,file-name :direction :output ,@args)
303  ,@body))
304 
305 (defun write-stream-into-file (stream pathname &key (if-exists :error) if-does-not-exist)
306  "Read STREAM and write the contents into PATHNAME.
307 
308 STREAM will be closed afterwards, so wrap it with
309 `make-concatenated-stream' if you want it left open."
310  (check-type pathname pathname)
311  (with-open-stream (in stream)
312  (with-output-to-file (out pathname
313  :element-type (stream-element-type in)
314  :if-exists if-exists
315  :if-does-not-exist if-does-not-exist)
316  (copy-stream in out)))
317  pathname)
318 
319 (defun write-file-into-stream (pathname output &key (if-does-not-exist :error)
320  (external-format :default))
321  "Write the contents of FILE into STREAM."
322  (check-type pathname pathname)
323  (with-input-from-file (input pathname
324  :element-type (stream-element-type output)
325  :if-does-not-exist if-does-not-exist
326  :external-format external-format)
327  (copy-stream input output)))
328 
329 (defun file= (file1 file2 &key (buffer-size 4096))
330  "Compare FILE1 and FILE2 octet by octet, \(possibly) using buffers
331 of BUFFER-SIZE."
332  (declare (ignorable buffer-size))
333  (let ((file1 (truename file1))
334  (file2 (truename file2)))
335  (or (equal file1 file2)
336  (and (= (file-size-in-octets file1)
337  (file-size-in-octets file2))
338  #+ccl (file=/mmap file1 file2)
339  #-ccl (file=/loop file1 file2 :buffer-size buffer-size)))))
340 
341 (defun file=/loop (file1 file2 &key (buffer-size 4096))
342  "Compare two files by looping over their contents using a buffer."
343  (declare
344  (type pathname file1 file2)
345  (type array-length buffer-size)
346  (optimize (safety 1) (debug 0) (compilation-speed 0)))
347  (flet ((make-buffer ()
348  (make-array buffer-size
349  :element-type 'octet
350  :initial-element 0)))
351  (declare (inline make-buffer))
352  (with-open-files ((file1 file1 :element-type 'octet :direction :input)
353  (file2 file2 :element-type 'octet :direction :input))
354  (and (= (file-length file1)
355  (file-length file2))
356  (locally (declare (optimize speed))
357  (loop with buffer1 = (make-buffer)
358  with buffer2 = (make-buffer)
359  for end1 = (read-sequence buffer1 file1)
360  for end2 = (read-sequence buffer2 file2)
361  until (or (zerop end1) (zerop end2))
362  always (and (= end1 end2)
363  (octet-vector= buffer1 buffer2
364  :end1 end1
365  :end2 end2))))))))
366 
367 (defun file-size (file &key (element-type '(unsigned-byte 8)))
368  "The size of FILE, in units of ELEMENT-TYPE (defaults to bytes).
369 
370 The size is computed by opening the file and getting the length of the
371 resulting stream.
372 
373 If all you want is to read the file's size in octets from its
374 metadata, consider `trivial-file-size:file-size-in-octets' instead."
375  (check-type file (or string pathname))
376  (with-input-from-file (in file :element-type element-type)
377  (file-length in)))
378 
379 (defun file-timestamp ()
380  "Returns current timestamp as a string suitable as the name of a timestamped-file."
381  (multiple-value-bind (sec min hr day mon yr)
382  (get-decoded-time)
383  (format nil "~4d~2,'0d~2,'0d_~2,'0d~2,'0d~2,'0d" yr mon day hr min sec)))
384 
385 (defun file-date ()
386  "Returns current date as a string suitable as the name of a timestamped-file."
387  (multiple-value-bind (sec min hr day mon yr)
388  (get-decoded-time)
389  (declare (ignore sec min hr))
390  (format nil "~4d~2,'0d~2,'0d" yr mon day)))
391 
392 ;; see https://www.n16f.net/blog/counting-lines-with-common-lisp/
393 
394 (defun directory-path-p (path)
395  "Return T if PATH is a directory or NIL else."
396  (declare (type (or pathname string) path))
397  (and (not (pathname-name path))
398  (not (pathname-type path))))
399 
400 (defvar *hidden-paths* (list ".hg" ".git"))
401 
402 (defun hidden-path-p (path &optional strict)
403  "Return T if PATH is strictly a hidden file or directory or NIL else."
404  (declare (type pathname path))
405  (let ((name (if (directory-path-p path)
406  (car (last (pathname-directory path)))
407  (file-namestring path))))
408  (and (plusp (length name))
409  (if strict
410  (eq (char name 0) #\.)
411  (member name *hidden-paths* :test 'equal)))))
412 
413 (defun directory-path (path)
414  "If PATH is a directory pathname, return it as it is. If it is a file
415 pathname or a string, transform it into a directory pathname."
416  (declare (type (or pathname string) path))
417  (if (directory-path-p path)
418  path
419  (make-pathname :directory (append (or (pathname-directory path)
420  (list :relative))
421  (list (file-namestring path)))
422  :name nil :type nil :defaults path)))
423 
424 (defun find-files (path &optional (hide *hidden-paths*))
425  "Return a list of all files contained in the directory at PATH or any of its
426 subdirectories."
427  (declare (type (or pathname string) path))
428  (flet ((list-directory (path)
429  (directory
430  (make-pathname :defaults (directory-path path)
431  :type :wild :name :wild))))
432  (let ((paths nil)
433  (children (list-directory (directory-path path))))
434  (dolist (child children paths)
435  (unless (and hide (hidden-path-p child (eq t hide)))
436  (if (directory-path-p child)
437  (setf paths (append paths (find-files child)))
438  (push child paths)))))))
439 
440 (defun count-file-lines (path)
441  "Count the number of non-empty lines in the file at PATH. A line is empty if
442 it only contains space or tabulation characters."
443  (declare (type pathname path))
444  (with-open-file (stream path :element-type '(unsigned-byte 8))
445  (do ((nb-lines 0)
446  (blank-line t))
447  (nil)
448  (let ((octet (read-byte stream nil)))
449  (cond
450  ((or (null octet) (eq octet #.(char-code #\Newline)))
451  (unless blank-line
452  (incf nb-lines))
453  (when (null octet)
454  (return-from count-file-lines nb-lines))
455  (setf blank-line t))
456  ((and (/= octet #.(char-code #\Space))
457  (/= octet #.(char-code #\Tab)))
458  (setf blank-line nil)))))))