1.1--- a/lisp/lib/cli/progress.lisp Fri Apr 12 21:19:02 2024 -0400
1.2+++ b/lisp/lib/cli/progress.lisp Sun Apr 14 01:19:10 2024 -0400
1.3@@ -96,17 +96,17 @@
1.4 (defconstant +seconds-in-one-minute+ 60)
1.5
1.6 (defun time-in-seconds-minutes-hours (in-seconds)
1.7- (format t "Finished in")
1.8- (when (>= in-seconds +seconds-in-one-hour+)
1.9- (let* ((hours (floor (/ in-seconds +seconds-in-one-hour+))))
1.10- (decf in-seconds (* hours +seconds-in-one-hour+))
1.11- (format t " ~a hour~p" hours hours)))
1.12- (when (>= in-seconds +seconds-in-one-minute+)
1.13- (let* ((minutes (floor (/ in-seconds +seconds-in-one-minute+))))
1.14- (decf in-seconds (* minutes +seconds-in-one-minute+))
1.15- (format t " ~a minute~p" minutes minutes)))
1.16 (unless (zerop in-seconds)
1.17- (format t " ~$ seconds" in-seconds))
1.18+ (format t "Finished in")
1.19+ (when (>= in-seconds +seconds-in-one-hour+)
1.20+ (let* ((hours (floor (/ in-seconds +seconds-in-one-hour+))))
1.21+ (decf in-seconds (* hours +seconds-in-one-hour+))
1.22+ (format t " ~a hour~p" hours hours)))
1.23+ (when (>= in-seconds +seconds-in-one-minute+)
1.24+ (let* ((minutes (floor (/ in-seconds +seconds-in-one-minute+))))
1.25+ (decf in-seconds (* minutes +seconds-in-one-minute+))
1.26+ (format t " ~a minute~p" minutes minutes)))
1.27+ (unless (zerop in-seconds) (format t " ~$ seconds" in-seconds)))
1.28 (terpri))
1.29
1.30 (defmethod finish-progress-display (progress-bar)
2.1--- a/lisp/lib/dat/csv.lisp Fri Apr 12 21:19:02 2024 -0400
2.2+++ b/lisp/lib/dat/csv.lisp Sun Apr 14 01:19:10 2024 -0400
2.3@@ -7,16 +7,16 @@
2.4
2.5 (defun parse-number-no-error (string &optional default)
2.6 (let ((result
2.7- (ignore-errors
2.8- (parse-number string))))
2.9+ (ignore-errors
2.10+ (parse-number string))))
2.11 (if result
2.12 result
2.13- default)))
2.14+ default)))
2.15
2.16 (defparameter *csv-separator* #\,)
2.17 (defparameter *csv-quote* #\")
2.18-(defparameter *csv-print-quote-p* nil "print \" when the element is a string?")
2.19-(defparameter *csv-default-external-format* #+allegro :932 #+ccl :Windows-31j #+(or sbcl lispworks) :sjis)
2.20+(defparameter *csv-print-quote-p* t "print \" when the element is a string?")
2.21+(defparameter *csv-default-external-format* :utf-8)
2.22
2.23 (defun write-csv-line (record &key stream (delimiter *csv-separator*))
2.24 "Accept a record and print it in one line as a csv record.
2.25@@ -26,22 +26,22 @@
2.26 If stream is nil (default case), it will return a string, otherwise it will return nil.
2.27 For efficiency reason, no intermediate string will be constructed. "
2.28 (let ((result
2.29- (with-output-to-string (s)
2.30- (let ((*standard-output* s)
2.31- (record-size (length record)))
2.32- (loop for e across record
2.33- for i from 0
2.34- do (typecase e
2.35- (string (progn
2.36- (if *csv-print-quote-p*
2.37- (progn
2.38- (write-char *csv-quote*)
2.39- (write-string e)
2.40- (write-char *csv-quote*))
2.41- (write-string e))))
2.42- (t (princ e)))
2.43- (when (< i (1- record-size))
2.44- (write-char delimiter)))))))
2.45+ (with-output-to-string (s)
2.46+ (let ((*standard-output* s)
2.47+ (record-size (length record)))
2.48+ (loop for e across record
2.49+ for i from 0
2.50+ do (typecase e
2.51+ (string (progn
2.52+ (if *csv-print-quote-p*
2.53+ (progn
2.54+ (write-char *csv-quote*)
2.55+ (write-string e)
2.56+ (write-char *csv-quote*))
2.57+ (write-string e))))
2.58+ (t (princ e)))
2.59+ (when (< i (1- record-size))
2.60+ (write-char delimiter)))))))
2.61 (format stream "~&~a" result)))
2.62
2.63 (defun write-csv-stream (stream table &key (delimiter *csv-separator*))
2.64@@ -60,50 +60,56 @@
2.65 A table is a sequence of lines. A line is a sequence of elements.
2.66 Elements can be any types"
2.67 (with-open-file (f filename :direction :output
2.68- :if-does-not-exist :create
2.69- :if-exists :supersede
2.70- :external-format external-format)
2.71+ :if-does-not-exist :create
2.72+ :if-exists :supersede
2.73+ :external-format external-format)
2.74 (write-csv-stream f table :delimiter delimiter)))
2.75
2.76 (defun parse-csv-string (str &key (delimiter *csv-separator*)) ;; refer RFC4180
2.77 (coerce
2.78 ;; (regexp:split-re "," str)
2.79 (let ((q-count (count *csv-quote* str :test #'char-equal)))
2.80- (cond ((zerop q-count) (cl-ppcre:split *csv-separator* str)) ;(cl-ppcre:split *csv-separator* str)
2.81- ((evenp q-count)
2.82- (macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds)))
2.83- (loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q
2.84- with field with fields
2.85- for chr of-type character across str
2.86- do (cond ((eq state :at-first)
2.87- (setf field nil)
2.88- (cond ((char-equal chr *csv-quote*) (setf state :data-q))
2.89- ((char-equal chr delimiter) (push "" fields))
2.90- (t (setf state :data-nq) (push chr field))))
2.91- ((eq state :data-nq)
2.92- (cond ((char-equal chr *csv-quote*) (setf state :q-in-nq))
2.93- ((char-equal chr delimiter)
2.94- (push-f field fields)
2.95- (setf state :at-first))
2.96- (t (push chr field))))
2.97- ((eq state :q-in-nq)
2.98- (cond ((char-equal chr *csv-quote*) (error "#\" inside the non quoted field"))
2.99- ((char-equal chr delimiter)
2.100- (push-f field fields)
2.101- (setf state :at-first))
2.102- (t (setf state :data-nq) (push chr field))))
2.103- ((eq state :data-q)
2.104- (if (char-equal chr *csv-quote*) (setf state :q-in-q)
2.105- (push chr field)))
2.106- ((eq state :q-in-q)
2.107- (cond ((char-equal chr *csv-quote*) (push chr field) (setf state :data-q))
2.108- ((char-equal chr delimiter)
2.109- (push-f field fields)
2.110- (setf state :at-first))
2.111- (t (error "illegal value ( ~A ) after quotation" chr)))))
2.112- finally (return
2.113- (progn (push-f field fields) (reverse fields))))))
2.114- (t (error "odd number of \" ( ~A ) in a line." q-count))))
2.115+ (when (oddp q-count) (warn 'simple-warning :format-control "odd number of #\" in a line (~A)"
2.116+ :format-arguments (list q-count)))
2.117+ (if (zerop q-count)
2.118+ (cl-ppcre:split delimiter str) ;(cl-ppcre:split *csv-separator* str)
2.119+ (macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds)))
2.120+ (loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q
2.121+ with field with fields
2.122+ for chr of-type character across str
2.123+ do (cond ((eq state :at-first)
2.124+ (setf field nil)
2.125+ (cond ((char-equal chr *csv-quote*) (setf state :data-q))
2.126+ ((char-equal chr delimiter) (push "" fields))
2.127+ (t (setf state :data-nq) (push chr field))))
2.128+ ((eq state :data-nq)
2.129+ (cond ((char-equal chr *csv-quote*) (setf state :q-in-nq))
2.130+ ((char-equal chr delimiter)
2.131+ (push-f field fields)
2.132+ (setf state :at-first))
2.133+ (t (push chr field))))
2.134+ ((eq state :q-in-nq)
2.135+ (cond ((char-equal chr *csv-quote*) (setf state :q-in-q))
2.136+ ((char-equal chr delimiter)
2.137+ (push-f field fields)
2.138+ (setf state :at-first))
2.139+ (t (setf state :data-nq) (push chr field))))
2.140+ ((eq state :data-q)
2.141+ (cond ((char-equal chr *csv-quote*) (setf state :q-in-q))
2.142+ ((char-equal chr delimiter) (push-f field fields) (setf state :at-first))
2.143+ (t (push chr field))))
2.144+ ((eq state :q-in-q)
2.145+ (cond ((char-equal chr *csv-quote*) (setf state :data-q))
2.146+ ;; this should only be done conditionally - early escapes quotes
2.147+ ((char-equal chr delimiter)
2.148+ (push-f field fields)
2.149+ (setf state :at-first))
2.150+ (t
2.151+ ;; (error "illegal value ( ~A ) after quotation" chr)
2.152+ (push chr field)
2.153+ ))))
2.154+ finally (return
2.155+ (progn (push-f field fields) (reverse fields)))))))
2.156 'vector))
2.157
2.158
2.159@@ -125,9 +131,8 @@
2.160 "
2.161 (declare (type (or (simple-array function *) null) type-conv-fns map-fns))
2.162 (let* ((rline (read-line stream nil nil)))
2.163-
2.164 (when rline
2.165- (let* ((line (string-trim '(#\Space #\Tab #\Newline #\Return) rline))
2.166+ (let* ((line (string-trim '(#\Space #\Newline #\Return) rline))
2.167 (strs (parse-csv-string line :delimiter delimiter))
2.168 (strs-size (length strs)))
2.169 (when (< start 0)
2.170@@ -138,19 +143,19 @@
2.171 (when type-conv-fns
2.172 (unless (= (length strs) (length type-conv-fns))
2.173 (error "Number of type specifier (~a) does not match the number of elements (~a)."
2.174- (length strs) (length type-conv-fns))))
2.175+ (length type-conv-fns) (length strs))))
2.176 (when map-fns
2.177 (unless (= (length strs) (length map-fns))
2.178 (error "Number of mapping functions (~a) does not match the number of elements (~a)."
2.179- (length strs) (length map-fns))))
2.180+ (length map-fns) (length strs))))
2.181 (let ((result strs))
2.182 ;; strs is not needed so we simply overwrite it
2.183 (when type-conv-fns
2.184 (setf result
2.185- (map 'vector #'funcall type-conv-fns result)))
2.186+ (map 'vector #'funcall type-conv-fns result)))
2.187 (when map-fns
2.188 (setf result
2.189- (map 'vector #'funcall map-fns result)))
2.190+ (map 'vector #'funcall map-fns result)))
2.191 result)))))
2.192
2.193 (defun read-csv-stream (stream &key (header t) type-spec map-fns (delimiter *csv-separator*) (start 0) end)
2.194@@ -173,53 +178,53 @@
2.195 If start or end is negative, it counts from the end. -1 is the last element.
2.196 "
2.197 (let ((type-conv-fns
2.198- (when type-spec
2.199- (macrolet ((make-num-specifier (specifier)
2.200- `(lambda (s) (let ((s (parse-number-no-error s s)))
2.201- (if (numberp s) (funcall ,specifier s) s)))))
2.202- (map 'vector
2.203- (lambda (type)
2.204- (ecase type
2.205- ((t nil string) #'identity)
2.206- (number #'(lambda (s) (parse-number-no-error s s)))
2.207- (float (make-num-specifier #'float))
2.208- (single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float))))
2.209- (double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float))))
2.210- (integer (make-num-specifier #'round))
2.211- (pathname #'pathname)
2.212- (symbol #'intern)
2.213- (keyword (lambda (s) (intern s :keyword)))))
2.214- type-spec))))
2.215+ (when type-spec
2.216+ (macrolet ((make-num-specifier (specifier)
2.217+ `(lambda (s) (let ((s (parse-number-no-error s s)))
2.218+ (if (numberp s) (funcall ,specifier s) s)))))
2.219+ (map 'vector
2.220+ (lambda (type)
2.221+ (ecase type
2.222+ ((t nil string) #'identity)
2.223+ (number #'(lambda (s) (parse-number-no-error s s)))
2.224+ (float (make-num-specifier #'float))
2.225+ (single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float))))
2.226+ (double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float))))
2.227+ (integer (make-num-specifier #'round))
2.228+ (pathname #'pathname)
2.229+ (symbol #'intern)
2.230+ (keyword (lambda (s) (intern s :keyword)))))
2.231+ type-spec))))
2.232 (map-fns
2.233- (when map-fns
2.234- (map 'vector
2.235- (lambda (fn)
2.236- (cond ((or (eq fn t)
2.237- (eq fn nil))
2.238- #'identity)
2.239- ((functionp fn)
2.240- fn)
2.241- ((and (symbolp fn)
2.242- (not (keywordp fn)))
2.243- (symbol-function fn))
2.244- (t (error "~a is not a valid function specifier." fn))))
2.245- map-fns)))
2.246+ (when map-fns
2.247+ (map 'vector
2.248+ (lambda (fn)
2.249+ (cond ((or (eq fn t)
2.250+ (eq fn nil))
2.251+ #'identity)
2.252+ ((functionp fn)
2.253+ fn)
2.254+ ((and (symbolp fn)
2.255+ (not (keywordp fn)))
2.256+ (symbol-function fn))
2.257+ (t (error "~a is not a valid function specifier." fn))))
2.258+ map-fns)))
2.259 (header
2.260- (etypecase header
2.261- (cons (coerce header 'vector))
2.262- (boolean (when header
2.263- (read-csv-line stream :delimiter delimiter))))))
2.264+ (etypecase header
2.265+ (cons (coerce header 'vector))
2.266+ (boolean (when header
2.267+ (read-csv-line stream :delimiter delimiter))))))
2.268 (loop for rec = (read-csv-line stream :type-conv-fns type-conv-fns :map-fns map-fns :delimiter delimiter
2.269- :start start :end end)
2.270- while rec
2.271- collect rec into result
2.272- finally (return
2.273- (values
2.274- (coerce result 'vector)
2.275- header)))))
2.276+ :start start :end end)
2.277+ while rec
2.278+ collect rec into result
2.279+ finally (return
2.280+ (values
2.281+ (coerce result 'vector)
2.282+ header)))))
2.283
2.284 (defun read-csv-file (filename &key (header t) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*)
2.285- (os :anynl-dos) (start 0) end)
2.286+ (start 0) end)
2.287 "Read from stream until eof and return a csv table.
2.288
2.289 A csv table is a vector of csv records.
2.290@@ -242,13 +247,11 @@
2.291 start and end specifies how many elements per record will be included.
2.292 If start or end is negative, it counts from the end. -1 is the last element.
2.293 "
2.294- #+sbcl (declare (ignorable os))
2.295 (with-open-file (f filename :external-format external-format)
2.296- #+allegro (setf (excl:eol-convention f) os)
2.297 (read-csv-stream f :type-spec type-spec :map-fns map-fns
2.298- :delimiter delimiter
2.299- :start start :end end
2.300- :header header)))
2.301+ :delimiter delimiter
2.302+ :start start :end end
2.303+ :header header)))
2.304
2.305
2.306 (defun read-csv-file-and-sort (filename sort-order &key (header t) (order :ascend) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*))
2.307@@ -259,7 +262,7 @@
2.308 :delimiter delimiter
2.309 :external-format external-format)))
2.310 (loop for i in (reverse sort-order)
2.311- do (setf table
2.312- (stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=))
2.313- :key (lambda (rec) (aref rec i))))
2.314- finally (return table))))
2.315+ do (setf table
2.316+ (stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=))
2.317+ :key (lambda (rec) (aref rec i))))
2.318+ finally (return table))))
3.1--- a/lisp/lib/dat/pkg.lisp Fri Apr 12 21:19:02 2024 -0400
3.2+++ b/lisp/lib/dat/pkg.lisp Sun Apr 14 01:19:10 2024 -0400
3.3@@ -27,6 +27,7 @@
3.4 (:use :cl :std :dat/proto)
3.5 (:export
3.6 :read-csv-file
3.7+ :*csv-separator*
3.8 #:read-csv-stream
3.9 :write-csv-file
3.10 :write-csv-stream
4.1--- a/lisp/lib/log/log.lisp Fri Apr 12 21:19:02 2024 -0400
4.2+++ b/lisp/lib/log/log.lisp Sun Apr 14 01:19:10 2024 -0400
4.3@@ -52,12 +52,12 @@
4.4 (let ((%name (string-upcase name)))
4.5 `(progn
4.6 (defun ,(intern (concatenate 'string %name "!")) (&rest args)
4.7- (format t "#:~(~A~) ~A~%"
4.8+ (format t "#:~(~A~) ~A "
4.9 ',name
4.10 (if *log-timestamp*
4.11 (log-timestamp-source)
4.12 ""))
4.13- (mapc (lambda (x) (format t "~t; ~A~%" x)) args)
4.14+ (mapc (lambda (x) (format t "; ~A~%" x)) args)
4.15 (if (= 1 (length args))
4.16 (car args)
4.17 args))
5.1--- a/lisp/lib/net/fetch.lisp Fri Apr 12 21:19:02 2024 -0400
5.2+++ b/lisp/lib/net/fetch.lisp Sun Apr 14 01:19:10 2024 -0400
5.3@@ -4,23 +4,14 @@
5.4 ((text :initarg :text :reader text)))
5.5
5.6 (defun download (url &optional output)
5.7- (let ((output (if output output (file-namestring (quri:uri-path (quri:uri url))))))
5.8- (multiple-value-bind (body status header uri)
5.9- (dex:get url)
5.10- (values
5.11- (let ((val
5.12- (if (= status 200)
5.13- (with-open-file (file output
5.14- :direction :output
5.15- :if-does-not-exist :create
5.16- :if-exists :supersede)
5.17- (write-string body file)
5.18- output
5.19- )
5.20- nil)))
5.21- (or body uri header)
5.22- val)
5.23- status))))
5.24+ (let ((output (if output
5.25+ output
5.26+ (file-namestring (obj/uri:uri-path (obj/uri:uri url))))))
5.27+ (multiple-value-bind (stream status header uri)
5.28+ (dex:get url :want-stream t)
5.29+ (when (= status 200) (write-stream-into-file stream (pathname output)))
5.30+ (values (or stream uri header)
5.31+ status))))
5.32
5.33 (defun split-file-path (path)
5.34 (let ((pos-last-slash (1+ (position #\/ path :from-end t))))
5.35@@ -28,12 +19,12 @@
5.36 (subseq path pos-last-slash))))
5.37
5.38 (defun split-uri-string (uri-string)
5.39- (let ((pu (puri:parse-uri uri-string)))
5.40- (cons (puri:uri-host pu) (split-file-path (puri:uri-path pu)))))
5.41+ (let ((pu (parse-uri uri-string)))
5.42+ (cons (uri-host pu) (split-file-path (uri-path pu)))))
5.43
5.44 (defun condition-path (path)
5.45- "Abuse puri:parse-uri to strip possible get args from path"
5.46- (let ((p (puri:parse-uri path))) (puri:uri-path p)))
5.47+ "Abuse parse-uri to strip possible get args from path"
5.48+ (let ((p (parse-uri path))) (uri-path p)))
5.49
5.50 (defun is-file (path)
5.51 (handler-case (probe-file path)
5.52@@ -47,7 +38,7 @@
5.53 ((is-file (condition-path url-or-path)) (condition-path url-or-path))
5.54 ((is-file (condition-path (concatenate 'string dir url-or-path)))
5.55 (condition-path (concatenate 'string dir url-or-path)))
5.56- ((puri:parse-uri url-or-path)
5.57+ ((parse-uri url-or-path)
5.58 (let* ((tmp-pathname (split-uri-string url-or-path))
5.59 (file-pathstring (format nil "~{~A~^~}" (if dir (cons dir tmp-pathname) tmp-pathname)))
5.60 (file-pathname (ensure-directories-exist
6.1--- a/lisp/lib/net/pkg.lisp Fri Apr 12 21:19:02 2024 -0400
6.2+++ b/lisp/lib/net/pkg.lisp Sun Apr 14 01:19:10 2024 -0400
6.3@@ -154,7 +154,7 @@
6.4
6.5 (uiop:define-package :net/fetch
6.6 (:nicknames :fetch)
6.7- (:use :cl :std)
6.8+ (:use :cl :std :obj/uri)
6.9 (:export :fetch :download))
6.10
6.11 (uiop:define-package :net
7.1--- a/lisp/lib/net/proto/http.lisp Fri Apr 12 21:19:02 2024 -0400
7.2+++ b/lisp/lib/net/proto/http.lisp Sun Apr 14 01:19:10 2024 -0400
7.3@@ -1,7 +1,9 @@
7.4 ;;; TODO
7.5 ;;; lib/net/proto/http.lisp --- HTTP Support
7.6
7.7-;;
7.8+;; based on https://github.com/fukamachi/fast-http (maybe)
7.9+
7.10+;; see also: https://github.com/orthecreedence/http-parse
7.11
7.12 ;;; Code:
7.13 (in-package :net/proto/http)
8.1--- a/lisp/lib/obj/meta/fast.lisp Fri Apr 12 21:19:02 2024 -0400
8.2+++ b/lisp/lib/obj/meta/fast.lisp Sun Apr 14 01:19:10 2024 -0400
8.3@@ -717,24 +717,6 @@
8.4 ;;;
8.5 ;;; Computing the Effective Method Lambda List
8.6
8.7-(defun compute-effective-method-lambda-list (generic-function applicable-methods)
8.8- (multiple-value-bind (required optional rest-var keyword allow-other-keys)
8.9- (parse-ordinary-lambda-list (sb-mop:generic-function-lambda-list generic-function))
8.10- (let ((method-parses
8.11- (mapcar
8.12- (lambda (method)
8.13- (multiple-value-list
8.14- (parse-ordinary-lambda-list
8.15- (sb-mop:method-lambda-list method))))
8.16- applicable-methods)))
8.17- (unparse-ordinary-lambda-list
8.18- (merge-required-infos required (mapcar #'first method-parses))
8.19- (merge-optional-infos optional (mapcar #'second method-parses))
8.20- rest-var
8.21- (merge-keyword-infos keyword (mapcar #'fourth method-parses))
8.22- (merge-allow-other-keys allow-other-keys (mapcar #'fifth method-parses))
8.23- '()))))
8.24-
8.25 (defun merge-required-infos (g-required m-requireds)
8.26 (dolist (m-required m-requireds g-required)
8.27 (assert (= (length m-required)
8.28@@ -824,6 +806,24 @@
8.29 m-allow-other-keys-list
8.30 :initial-value g-allow-other-keys))
8.31
8.32+(defun compute-effective-method-lambda-list (generic-function applicable-methods)
8.33+ (multiple-value-bind (required optional rest-var keyword allow-other-keys)
8.34+ (parse-ordinary-lambda-list (sb-mop:generic-function-lambda-list generic-function))
8.35+ (let ((method-parses
8.36+ (mapcar
8.37+ (lambda (method)
8.38+ (multiple-value-list
8.39+ (parse-ordinary-lambda-list
8.40+ (sb-mop:method-lambda-list method))))
8.41+ applicable-methods)))
8.42+ (unparse-ordinary-lambda-list
8.43+ (merge-required-infos required (mapcar #'first method-parses))
8.44+ (merge-optional-infos optional (mapcar #'second method-parses))
8.45+ rest-var
8.46+ (merge-keyword-infos keyword (mapcar #'fourth method-parses))
8.47+ (merge-allow-other-keys allow-other-keys (mapcar #'fifth method-parses))
8.48+ '()))))
8.49+
8.50 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8.51 ;;;
8.52 ;;; Effective Method Lookup
9.1--- a/lisp/lib/obj/meta/pkg.lisp Fri Apr 12 21:19:02 2024 -0400
9.2+++ b/lisp/lib/obj/meta/pkg.lisp Sun Apr 14 01:19:10 2024 -0400
9.3@@ -61,7 +61,6 @@
9.4 :specializer-direct-superspecializers
9.5 :specializer-intersectionp
9.6 :specializer-subsetp
9.7-
9.8 :domain
9.9 :ensure-domain
9.10 :method-domain
9.11@@ -105,12 +104,11 @@
9.12 :potentially-sealable-method
9.13 :potentially-sealable-standard-method))
9.14
9.15-(std:defpkg :obj/meta/fast
9.16+(defpackage :obj/meta/fast
9.17 (:use :cl :std :obj/meta/sealed)
9.18 (:import-from :sb-int :gensymify)
9.19 (:import-from :sb-walker :macroexpand-all)
9.20- (:export :fast-generic-function :fast-method :inlineable)
9.21- (:use-reexport :obj/meta/sealed))
9.22+ (:export :fast-generic-function :fast-method :inlineable))
9.23
9.24 (defpackage :obj/meta/lazy
9.25 (:use :cl :std))
10.1--- a/lisp/lib/obj/meta/sealed.lisp Fri Apr 12 21:19:02 2024 -0400
10.2+++ b/lisp/lib/obj/meta/sealed.lisp Sun Apr 14 01:19:10 2024 -0400
10.3@@ -722,83 +722,6 @@
10.4 (loop for char in '(#\backspace #\tab #\newline #\linefeed #\page #\return #\space #\rubout) do
10.5 (register-class-prototype char))
10.6
10.7-;;; In this file, we compute the static call signatures of a given, sealed
10.8-;;; generic function. A static call signature consists of a list of types,
10.9-;;; and a list of prototypes. The list of types is guaranteed to be
10.10-;;; non-overlapping with the types of any other call signature. The list
10.11-;;; of prototypes is chosen such that the list of applicable methods of
10.12-;;; these prototypes is representative for all arguments of the types of
10.13-;;; the call signature.
10.14-
10.15-(defclass static-call-signature ()
10.16- ((%types
10.17- :initarg :types
10.18- :reader static-call-signature-types)
10.19- (%prototypes
10.20- :initarg :prototypes
10.21- :reader static-call-signature-prototypes)))
10.22-
10.23-(defmethod print-object ((scs static-call-signature) stream)
10.24- (print-unreadable-object (scs stream :type t :identity t)
10.25- (format stream "~S ~S"
10.26- (static-call-signature-types scs)
10.27- (static-call-signature-prototypes scs))))
10.28-
10.29-(defmethod make-load-form
10.30- ((static-call-signature static-call-signature) &optional environment)
10.31- (make-load-form-saving-slots
10.32- static-call-signature
10.33- :slot-names '(%types %prototypes)
10.34- :environment environment))
10.35-
10.36-(defmethod externalizable-object-p
10.37- ((static-call-signature static-call-signature))
10.38- (and
10.39- (every #'externalizable-object-p
10.40- (static-call-signature-types static-call-signature))
10.41- (every #'externalizable-object-p
10.42- (static-call-signature-prototypes static-call-signature))))
10.43-
10.44-(defmethod compute-static-call-signatures
10.45- ((sgf sealable-generic-function)
10.46- (domain domain))
10.47- (let* ((sealed-methods
10.48- (remove-if-not
10.49- (lambda (method)
10.50- (domain-intersectionp (method-domain method) domain))
10.51- (generic-function-methods sgf)))
10.52- (list-of-specializers
10.53- (mapcar #'method-specializers sealed-methods))
10.54- (static-call-signatures '()))
10.55- (unless (null list-of-specializers)
10.56- (map-types-and-prototypes
10.57- (lambda (types prototypes)
10.58- (push (make-instance 'static-call-signature
10.59- :types types
10.60- :prototypes prototypes)
10.61- static-call-signatures))
10.62- ;; Transpose the list of specializers so that we operate on each
10.63- ;; argument instead of on each method.
10.64- (apply #'mapcar #'list list-of-specializers)
10.65- domain))
10.66- static-call-signatures))
10.67-
10.68-(defun map-types-and-prototypes (fn specializers-list domain)
10.69- (assert (= (length specializers-list)
10.70- (domain-arity domain)))
10.71- (labels ((rec (sl specializers types prototypes)
10.72- (if (null sl)
10.73- (funcall fn (reverse types) (reverse prototypes))
10.74- (loop for (type prototype)
10.75- in (type-prototype-pairs
10.76- (first sl)
10.77- (first specializers))
10.78- do (rec (rest sl)
10.79- (rest specializers)
10.80- (cons type types)
10.81- (cons prototype prototypes))))))
10.82- (rec specializers-list (domain-specializers domain) '() '())))
10.83-
10.84 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10.85 ;;;
10.86 ;;; Reasoning About Specializer Specificity
10.87@@ -880,3 +803,80 @@
10.88 (push (list (snode-type snode) prototype)
10.89 pairs)))))
10.90 pairs)))
10.91+
10.92+;;; In this file, we compute the static call signatures of a given, sealed
10.93+;;; generic function. A static call signature consists of a list of types,
10.94+;;; and a list of prototypes. The list of types is guaranteed to be
10.95+;;; non-overlapping with the types of any other call signature. The list
10.96+;;; of prototypes is chosen such that the list of applicable methods of
10.97+;;; these prototypes is representative for all arguments of the types of
10.98+;;; the call signature.
10.99+
10.100+(defclass static-call-signature ()
10.101+ ((%types
10.102+ :initarg :types
10.103+ :reader static-call-signature-types)
10.104+ (%prototypes
10.105+ :initarg :prototypes
10.106+ :reader static-call-signature-prototypes)))
10.107+
10.108+(defmethod print-object ((scs static-call-signature) stream)
10.109+ (print-unreadable-object (scs stream :type t :identity t)
10.110+ (format stream "~S ~S"
10.111+ (static-call-signature-types scs)
10.112+ (static-call-signature-prototypes scs))))
10.113+
10.114+(defmethod make-load-form
10.115+ ((static-call-signature static-call-signature) &optional environment)
10.116+ (make-load-form-saving-slots
10.117+ static-call-signature
10.118+ :slot-names '(%types %prototypes)
10.119+ :environment environment))
10.120+
10.121+(defmethod externalizable-object-p
10.122+ ((static-call-signature static-call-signature))
10.123+ (and
10.124+ (every #'externalizable-object-p
10.125+ (static-call-signature-types static-call-signature))
10.126+ (every #'externalizable-object-p
10.127+ (static-call-signature-prototypes static-call-signature))))
10.128+
10.129+(defmethod compute-static-call-signatures
10.130+ ((sgf sealable-generic-function)
10.131+ (domain domain))
10.132+ (let* ((sealed-methods
10.133+ (remove-if-not
10.134+ (lambda (method)
10.135+ (domain-intersectionp (method-domain method) domain))
10.136+ (generic-function-methods sgf)))
10.137+ (list-of-specializers
10.138+ (mapcar #'method-specializers sealed-methods))
10.139+ (static-call-signatures '()))
10.140+ (unless (null list-of-specializers)
10.141+ (map-types-and-prototypes
10.142+ (lambda (types prototypes)
10.143+ (push (make-instance 'static-call-signature
10.144+ :types types
10.145+ :prototypes prototypes)
10.146+ static-call-signatures))
10.147+ ;; Transpose the list of specializers so that we operate on each
10.148+ ;; argument instead of on each method.
10.149+ (apply #'mapcar #'list list-of-specializers)
10.150+ domain))
10.151+ static-call-signatures))
10.152+
10.153+(defun map-types-and-prototypes (fn specializers-list domain)
10.154+ (assert (= (length specializers-list)
10.155+ (domain-arity domain)))
10.156+ (labels ((rec (sl specializers types prototypes)
10.157+ (if (null sl)
10.158+ (funcall fn (reverse types) (reverse prototypes))
10.159+ (loop for (type prototype)
10.160+ in (type-prototype-pairs
10.161+ (first sl)
10.162+ (first specializers))
10.163+ do (rec (rest sl)
10.164+ (rest specializers)
10.165+ (cons type types)
10.166+ (cons prototype prototypes))))))
10.167+ (rec specializers-list (domain-specializers domain) '() '())))
11.1--- a/lisp/lib/obj/obj.asd Fri Apr 12 21:19:02 2024 -0400
11.2+++ b/lisp/lib/obj/obj.asd Sun Apr 14 01:19:10 2024 -0400
11.3@@ -5,10 +5,10 @@
11.4 :components ((:file "pkg")
11.5 (:module "meta"
11.6 :components ((:file "pkg")
11.7+ (:file "sealed")
11.8 (:file "stealth")
11.9 (:file "typed")
11.10 (:file "filtered")
11.11- (:file "sealed")
11.12 (:file "fast")
11.13 (:file "lazy")
11.14 (:file "overloaded")))
12.1--- a/lisp/lib/obj/uuid.lisp Fri Apr 12 21:19:02 2024 -0400
12.2+++ b/lisp/lib/obj/uuid.lisp Sun Apr 14 01:19:10 2024 -0400
12.3@@ -95,29 +95,7 @@
12.4 (with-open-file (address (make-pathname :directory
12.5 `(:absolute "sys" "class" "net" ,interface)
12.6 :name "address"))
12.7- (parse-integer (remove #\: (read-line address)) :radix 16))))
12.8-
12.9- #+(and :windows :clisp)
12.10- (let ((output (ext:run-program "ipconfig"
12.11- :arguments (list "/all")
12.12- :input nil
12.13- :output :stream
12.14- :wait t)))
12.15- (loop for line = (read-line output nil) while line
12.16- when (search "Physical" line :test #'string-equal)
12.17- return (parse-integer (remove #\- (subseq line 37)) :radix 16)))
12.18-
12.19- #+(and :macosx :lispworks)
12.20- (with-open-stream (stream
12.21- (sys:run-shell-command "/sbin/ifconfig en0 ether"
12.22- :output :stream
12.23- :if-error-output-exists t
12.24- :wait nil))
12.25- (loop for line = (read-line stream nil)
12.26- while line
12.27- when (search "ether" line :test #'string-equal)
12.28- return (parse-integer (remove #\: (subseq line 7))
12.29- :radix 16)))))
12.30+ (parse-integer (remove #\: (read-line address)) :radix 16))))))
12.31 (unless node
12.32 (unless *uuid-random-state*
12.33 (setf *uuid-random-state* (make-random-state t)))
13.1--- a/lisp/lib/rdb/obj.lisp Fri Apr 12 21:19:02 2024 -0400
13.2+++ b/lisp/lib/rdb/obj.lisp Sun Apr 14 01:19:10 2024 -0400
13.3@@ -116,12 +116,12 @@
13.4 (sap nil :type (or null alien)))
13.5
13.6 ;;; column family
13.7-(defstruct (rdb-cf (:constructor make-rdb-cf (name &key kv sap)))
13.8+(defstruct (rdb-cf (:constructor make-rdb-cf (name &key #+nil kv sap)))
13.9 "RDB Column Family structure. Contains a name, a cons of (rdb-key-type
13.10 . rdb-val-type), and a system-area-pointer to the underlying
13.11 rocksdb_cf_t handle."
13.12 (name "" :type string)
13.13- (kv *default-rdb-kv* :type rdb-kv)
13.14+ ;; (kv *default-rdb-kv* :type rdb-kv)
13.15 (sap nil :type (or null alien)))
13.16
13.17 ;;; rdb-stats
13.18@@ -223,7 +223,7 @@
13.19 (backup nil :type (or null alien))
13.20 (snapshots #() :type (array alien)))
13.21
13.22-;; (defvar *default-rdb-opts* (default-rdb-opts))
13.23+(defvar *default-rdb-opts* (default-rdb-opts))
13.24
13.25 (defmethod print-object ((self rdb) stream)
13.26 (print-unreadable-object (self stream :type t :identity t)
14.1--- a/lisp/std/alien.lisp Fri Apr 12 21:19:02 2024 -0400
14.2+++ b/lisp/std/alien.lisp Sun Apr 14 01:19:10 2024 -0400
14.3@@ -125,6 +125,9 @@
14.4 (defun num-cpus ()
14.5 "Return the number of CPU threads online."
14.6 (alien-funcall (extern-alien "sysconf" (function int int)) sb-unix:sc-nprocessors-onln))
14.7+
14.8+(defvar *cpus* (num-cpus))
14.9+
14.10 ;;; C Standard
14.11
14.12 ;; types
15.1--- a/lisp/std/pkg.lisp Fri Apr 12 21:19:02 2024 -0400
15.2+++ b/lisp/std/pkg.lisp Sun Apr 14 01:19:10 2024 -0400
15.3@@ -8,6 +8,7 @@
15.4 :proper-list-of-length-p :proper-list-p :singleton-p
15.5 :with-unique-names :symbolicate :package-symbolicate :keywordicate :gensymify*)
15.6 (:export
15.7+ ;; types
15.8 ;; err
15.9 :std-error :std-error-message
15.10 :define-error-reporter
15.11@@ -46,7 +47,23 @@
15.12 :decode-float32
15.13 :encode-float64
15.14 :decode-float64
15.15- ;; str
15.16+ ;; stream
15.17+ #:wild-pathname
15.18+ #:non-wild-pathname
15.19+ #:absolute-pathname
15.20+ #:relative-pathname
15.21+ #:directory-pathname
15.22+ #:absolute-directory-pathname
15.23+ #:file-pathname
15.24+ #:with-open-files
15.25+ #:write-stream-into-file
15.26+ #:write-file-into-stream
15.27+ #:file=
15.28+ #:file-size
15.29+ :file-size-in-octets
15.30+ :+pathsep+
15.31+ :octet-vector=
15.32+ ;; string
15.33 :*omit-nulls*
15.34 :*whitespaces*
15.35 :string-designator
15.36@@ -87,8 +104,13 @@
15.37 :make-threads :with-threads :finish-threads
15.38 :timed-join-thread :kill-thread :hang
15.39 :thread-count :dump-thread
15.40- :task :job :task-queue :*task-queue*
15.41- :job-stack :task-object
15.42+ :make-oracle :make-supervisor
15.43+ :push-job :push-task :push-worker :push-result
15.44+ :start-task-pool :pause-task-pool :shutdown-task-pool
15.45+ :push-stage :designate-oracle :make-task-pool
15.46+ :task :job :task-pool :stage :task-pool-p
15.47+ :job-tasks :make-job :job-p :task-object
15.48+ :make-task :task-p :task
15.49 ;; util
15.50 :find-package* #:find-symbol* #:symbol-call
15.51 :intern* #:export* #:import* #:shadowing-import*
15.52@@ -192,6 +214,7 @@
15.53 :c-string-to-string-list
15.54 :list-all-shared-objects
15.55 :num-cpus
15.56+ :*cpus*
15.57 :loff-t
15.58 :memset
15.59 ;; os
16.1--- a/lisp/std/std.asd Fri Apr 12 21:19:02 2024 -0400
16.2+++ b/lisp/std/std.asd Sun Apr 14 01:19:10 2024 -0400
16.3@@ -11,22 +11,24 @@
16.4 :depends-on (:std/named-readtables :cl-ppcre :sb-concurrency)
16.5 :serial t
16.6 :components ((:file "pkg")
16.7- (:file "defpkg")
16.8 (:file "err")
16.9 (:file "bits")
16.10 (:module "num"
16.11 :components ((:file "float")
16.12 (:file "parse")))
16.13- (:file "str")
16.14+ (:file "string")
16.15 (:file "fmt")
16.16 (:file "sym")
16.17 (:file "list")
16.18 (:file "util")
16.19 (:file "readtable")
16.20- (:file "fu")
16.21 (:file "ana")
16.22 (:file "pan")
16.23+ (:file "fu")
16.24+ (:file "types")
16.25+ (:file "stream")
16.26 (:file "thread")
16.27+ (:file "defpkg")
16.28 (:file "alien"))
16.29 :in-order-to ((test-op (test-op "std/tests"))))
16.30
17.1--- a/lisp/std/str.lisp Fri Apr 12 21:19:02 2024 -0400
17.2+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
17.3@@ -1,528 +0,0 @@
17.4-;;; std/str.lisp --- String utilities
17.5-
17.6-;;; Code:
17.7-
17.8-;; (defvar sb-unicode-syms
17.9-;; '(words lines sentences whitespace-p uppercase lowercase titlecase
17.10-;; word-break-class line-break-class sentence-break-class char-block
17.11-;; cased-p uppercase-p lowercase-p titlecase-p casefold
17.12-;; graphemes grapheme-break-class
17.13-;; bidi-mirroring-glyph bidi-class
17.14-;; normalize-string normalized-p default-ignorable-p
17.15-;; confusable-p hex-digit-p mirrored-p alphabetic-p math-p
17.16-;; decimal-value digit-value
17.17-;; unicode< unicode> unicode= unicode-equal
17.18-;; unicode<= unicode>=))
17.19-(in-package :std)
17.20-
17.21-;; (mapc (lambda (s) (export s)) sb-unicode-syms)
17.22-;; (reexport-from
17.23-;; :sb-unicode
17.24-;; :include sb-unicode-syms)
17.25-
17.26-(defparameter *omit-nulls* nil)
17.27-(defvar *whitespaces* (list #\Backspace #\Tab #\Linefeed #\Newline #\Vt #\Page
17.28- #\Return #\Space #\Rubout
17.29- #+sbcl #\Next-Line #-sbcl (code-char 133)
17.30- #\No-break_space)
17.31- "On some implementations, linefeed and newline represent the same character (code).")
17.32-
17.33-(deftype string-designator ()
17.34- "A string designator type. A string designator is either a string, a symbol,
17.35-or a character."
17.36- `(or symbol string character))
17.37-
17.38-(defun ssplit (separator s &key (omit-nulls *omit-nulls*) limit (start 0) end)
17.39- "Split s into substring by separator (cl-ppcre takes a regex, we do not).
17.40-
17.41- `limit' limits the number of elements returned (i.e. the string is
17.42- split at most `limit' - 1 times)."
17.43- ;; cl-ppcre:split doesn't return a null string if the separator appears at the end of s.
17.44- (let* ((limit (or limit (1+ (length s))))
17.45- (res (cl-ppcre:split separator s :limit limit :start start :end end)))
17.46- (if omit-nulls
17.47- (remove-if (lambda (it) (sequence:emptyp it)) res)
17.48- res)))
17.49-
17.50-(defun collapse-whitespaces (s)
17.51- "Ensure there is only one space character between words.
17.52- Remove newlines."
17.53- (cl-ppcre:regex-replace-all "\\s+" s " "))
17.54-
17.55-(defun trim (s &key (char-bag *whitespaces*))
17.56- "Removes all characters in `char-bag` (default: whitespaces) at the beginning and end of `s`.
17.57- If supplied, char-bag has to be a sequence (e.g. string or list of characters).
17.58-
17.59- Examples: (trim \" foo \") => \"foo\"
17.60- (trim \"+-*foo-bar*-+\" :char-bag \"+-*\") => \"foo-bar\"
17.61- (trim \"afood\" :char-bag (str:concat \"a\" \"d\")) => \"foo\""
17.62- (when s
17.63- (string-trim char-bag s)))
17.64-
17.65-;;; TODO 2023-08-27: camel snake kebab
17.66-
17.67-(defun make-template-parser (start-delimiter end-delimiter &key (ignore-case nil))
17.68- "Returns a closure than can substitute variables
17.69- delimited by \"start-delimiter\" and \"end-delimiter\"
17.70- in a string, by the provided values."
17.71- (check-type start-delimiter string)
17.72- (check-type end-delimiter string)
17.73- (when (or (string= start-delimiter "")
17.74- (string= end-delimiter ""))
17.75- (error 'simple-type-error
17.76- :format-control "The empty string is not a valid delimiter."))
17.77- (let ((start-len (length start-delimiter))
17.78- (end-len (length end-delimiter))
17.79- (test (if ignore-case
17.80- #'string-equal
17.81- #'string=)))
17.82-
17.83- (lambda (string values)
17.84- (check-type string string)
17.85- (unless (listp values)
17.86- (error 'simple-type-error
17.87- :format-control "values should be an association list"))
17.88-
17.89- (with-output-to-string (stream)
17.90- (loop for prev = 0 then (+ j end-len)
17.91- for i = (search start-delimiter string)
17.92- then (search start-delimiter string :start2 j)
17.93- for j = (if i (search end-delimiter string :start2 i))
17.94- then (if i (search end-delimiter string :start2 i))
17.95- while (and i j)
17.96- do (write-string (subseq string prev i) stream)
17.97- (let ((instance (rest (assoc (subseq string (+ i start-len) j)
17.98- values
17.99- :test test))))
17.100- (if instance
17.101- (princ instance stream)
17.102- (write-string (subseq string i (+ j end-len)) stream)))
17.103-
17.104- finally (write-string (subseq string prev) stream))))))
17.105-
17.106-;;; STRING-CASE
17.107-;;; Implementing an efficient string= case in Common Lisp
17.108-;;;
17.109-;;; 2015-11-15: Defknown don't have explicit-check in SBCL 1.3.0
17.110-;;; Remove the declaration. It's never useful the way we use
17.111-;;; numeric-char=.
17.112-;;;
17.113-;;; 2015-11-15: Make this a real ASDF system for Xach
17.114-;;; I copied the system definition from Quicklisp and mangled as
17.115-;;; necessary.
17.116-;;;
17.117-;;; 2010-06-30: Tiny bugfix
17.118-;;; Widen the type declarations inside cases to allow vectors that
17.119-;;; have a length that's shorter than the total size (due to fill-
17.120-;;; pointers).
17.121-
17.122-;;;
17.123-;;;# Introduction
17.124-;;;
17.125-;;; In `<http://neverfriday.com/blog/?p=10>', OMouse asks how
17.126-;;; best to implement a `string= case' (in Scheme). I noted that
17.127-;;; naively iterating through the cases with `string=' at runtime
17.128-;;; is suboptimal. Seeing the problem as a simplistic pattern
17.129-;;; matching one makes an efficient solution obvious.
17.130-;;; Note that, unlike Haskell, both Scheme and CL have random-
17.131-;;; access on strings in O(1), something which I exploit to
17.132-;;; generate better code.
17.133-;;;
17.134-;;; This is also a pbook.el file (the pdf can be found at
17.135-;;; `<http://www.discontinuity.info/~pkhuong/string-case.pdf>' ).
17.136-;;; I'm new at this not-quite-illiterate programming thing, so
17.137-;;; please bear with me (: I'm also looking for comments on the
17.138-;;; formatting. I'm particularly iffy with the way keywords look
17.139-;;; like. It just looks really fuzzy when you're not really zoomed
17.140-;;; in (or reading it on paper).
17.141-
17.142-;;; I usually don't use packages for throw-away code, but this looks
17.143-;;; like it could be useful to someone.
17.144-
17.145-;;;# Some utility code
17.146-
17.147-(defun split-tree (list &key (test 'eql) (key 'identity))
17.148- "Splits input list into sublists of elements
17.149- whose elements are all such that (key element)
17.150- are all test.
17.151- It's assumed that test and key form an equality class.
17.152- (This is similar to groupBy)"
17.153- (when list
17.154- (let* ((lists ())
17.155- (cur-list (list (first list)))
17.156- (cur-key (funcall key (first list))))
17.157- (dolist (elt (rest list) (nreverse (cons (nreverse cur-list)
17.158- lists)))
17.159- (let ((new-key (funcall key elt)))
17.160- (if (funcall test cur-key new-key)
17.161- (push elt cur-list)
17.162- (progn
17.163- (push (nreverse cur-list) lists)
17.164- (setf cur-list (list elt)
17.165- cur-key new-key))))))))
17.166-
17.167-(defun iota (n)
17.168- (loop for i below n collect i))
17.169-
17.170-(defun hash-table->list (table &key (keep-keys t) (keep-values t))
17.171- "Saves the keys and/or values in table to a list.
17.172- As with hash table iterating functions, there is no
17.173- implicit ordering."
17.174- (let ((list ()))
17.175- (maphash (cond ((and keep-keys
17.176- keep-values)
17.177- (lambda (k v)
17.178- (push (cons k v) list)))
17.179- (keep-keys
17.180- (lambda (k v)
17.181- (declare (ignore v))
17.182- (push k list)))
17.183- (keep-values
17.184- (lambda (k v)
17.185- (declare (ignore k))
17.186- (push v list))))
17.187- table)
17.188- list))
17.189-
17.190-(defun all-equal (list &key (key 'identity) (test 'eql))
17.191- (if (or (null list)
17.192- (null (rest list)))
17.193- t
17.194- (let ((first-key (funcall key (first list))))
17.195- (every (lambda (element)
17.196- (funcall test first-key
17.197- (funcall key element)))
17.198- (rest list)))))
17.199-
17.200-(defun split-at (list n)
17.201- "Split list in k lists of n elements (or less for the last list)"
17.202- (declare (type (and fixnum (integer (0))) n))
17.203- (let ((lists '())
17.204- (cur-list '())
17.205- (counter 0))
17.206- (declare (type (and fixnum unsigned-byte) counter))
17.207- (dolist (elt list (nreverse (if cur-list
17.208- (cons (nreverse cur-list)
17.209- lists)
17.210- lists)))
17.211- (push elt cur-list)
17.212- (when (= (incf counter) n)
17.213- (push (nreverse cur-list) lists)
17.214- (setf cur-list '()
17.215- counter 0)))))
17.216-
17.217-;;;# The string matching compiler per se
17.218-;;;
17.219-;;; I use special variables here because I find that
17.220-;;; preferable to introducing noise everywhere to thread
17.221-;;; these values through all the calls, especially
17.222-;;; when `*no-match-form*' is only used at the very end.
17.223-
17.224-(defparameter *input-string* nil
17.225- "Symbol of the variable holding the input string")
17.226-
17.227-(defparameter *no-match-form* nil
17.228- "Form to insert when no match is found.")
17.229-
17.230-;;; The basic idea of the pattern matching process here is
17.231-;;; to first discriminate with the input string's length;
17.232-;;; once that is done, it is very easy to safely use random
17.233-;;; access until only one candidate string (pattern) remains.
17.234-;;; However, even if we determine that only one case might be
17.235-;;; a candidate, it might still be possible for another string
17.236-;;; (not in the set of cases) to match the criteria. So we also
17.237-;;; have to make sure that *all* the indices match. A simple
17.238-;;; way to do this would be to emit the remaining checks at the
17.239-;;; every end, when only one candidate is left. However, that
17.240-;;; can result in a lot of duplicate code, and some useless
17.241-;;; work on mismatches. Instead, the code generator always
17.242-;;; tries to find (new) indices for which all the candidates
17.243-;;; left in the branch share the same character, and then emits
17.244-;;; a guard, checking the character at that index as soon as possible.
17.245-
17.246-;;; In my experience, there are two main problems when writing
17.247-;;; pattern matchers: how to decide what to test for at each
17.248-;;; fork, and how to ensure the code won't explode exponentially.
17.249-;;; Luckily, for our rather restricted pattern language (equality
17.250-;;; on strings), patterns can't overlap, and it's possible to guarantee
17.251-;;; that no candidate will ever be possible in both branches of a
17.252-;;; fork.
17.253-
17.254-;;; Due to the the latter guarantee, we have a simple fitness
17.255-;;; measure for tests: simply maximising the number of
17.256-;;; candidates in the smallest branch will make our search tree
17.257-;;; as balanced as possible. Of course, we don't know whether
17.258-;;; the subtrees will be balanced too, but I don't think it'll
17.259-;;; be much of an issue.
17.260-
17.261-;;; Note that, if we had access, whether via annotations or profiling,
17.262-;;; to the probability of each case, the situation would be very
17.263-;;; different. In fact, on a pipelined machine where branch
17.264-;;; mispredictions are expensive, an unbalanced tree will yield
17.265-;;; better expected runtimes. There was a very interesting and rather
17.266-;;; sophisticated Google lecture on that topic on Google video
17.267-;;; (the speaker used markov chains to model dynamic predictors,
17.268-;;; for example), but I can't seem to find the URL.
17.269-
17.270-;;; TODO: Find bounds on the size of the code!
17.271-
17.272-(defun find-best-split (strings to-check)
17.273- "Iterate over all the indices left to check to find
17.274- which index (and which character) to test for equality
17.275- with, keeping the ones which result in the most balanced
17.276- split."
17.277- (flet ((evaluate-split (i char)
17.278- "Just count all the matches and mismatches"
17.279- (let ((= 0)
17.280- (/= 0))
17.281- (dolist (string strings (min = /=))
17.282- (if (eql (aref string i) char)
17.283- (incf =)
17.284- (incf /=)))))
17.285- (uniquify-chars (chars)
17.286- "Only keep one copy of each char in the list"
17.287- (mapcar 'first (split-tree (sort chars 'char<) :test #'eql))))
17.288- (let ((best-split 0) ; maximise size of smallest branch
17.289- (best-posn nil)
17.290- (best-char nil))
17.291- (dolist (i to-check (values best-posn best-char))
17.292- (dolist (char (uniquify-chars (mapcar (lambda (string)
17.293- (aref string i))
17.294- strings)))
17.295- (let ((Z (evaluate-split i char)))
17.296- (when (> Z best-split)
17.297- (setf best-split Z
17.298- best-posn i
17.299- best-char char))))))))
17.300-
17.301-;;; We sometimes have to execute sequences of checks for
17.302-;;; equality. The natural way to express this is via a
17.303-;;; sequence of checks, wrapped in an `and'. However, that
17.304-;;; translates to a sequence of conditional branches, predicated
17.305-;;; on very short computations. On (not so) modern architectures,
17.306-;;; it'll be faster to coalesce a sequence of such checks together
17.307-;;; as straightline code (e.g. via `or' of `xor'), and only branch
17.308-;;; at the very end. The code doesn't become much more complex,
17.309-;;; and benchmarks have shown it to be beneficial (giving a speed
17.310-;;; up of 2-5% for both predictable and unpredictable workloads,
17.311-;;; on a Core 2).
17.312-
17.313-;;; Benchmarks (and experience) have shown that, instead of executing
17.314-;;; a cascade of comparison/conditional branch, it's slightly
17.315-;;; faster, both for predictable and unpredictable workloads,
17.316-;;; to `or' together a bunch of comparisons (e.g. `xor'). On a Core 2
17.317-;;; processor, it seems that doing so for sequences of around 4
17.318-;;; comparisons is the sweetspot. On perfectly predictable input,
17.319-;;; aborting early (on the first check) saves as much time as
17.320-;;; the 4 test/conditional branch add, compared to a sequence of
17.321-;;; `xor' and `or'.
17.322-
17.323-;;; Numeric char= abstracts out the xor check, and, on SBCL,
17.324-;;; is replaced by a short assembly sequence when the first
17.325-;;; argument is a constant. The declared return type is then
17.326-;;; wider than strictly necessary making it fit in a machine
17.327-;;; register, but not as a fixnum ensures that the compiler
17.328-;;; won't repeatedly convert the values to fixnums, when all
17.329-;;; we'll do is `or' them together and check for zero-ness.
17.330-;;; This function is the only place where the macro isn't
17.331-;;; generic over the elements stored in the cases. It shouldn't
17.332-;;; be too hard to implement a numeric-eql, which would
17.333-;;; restore genericity to the macro, while keeping the
17.334-;;; speed-up.
17.335-
17.336-#+nil
17.337-(declaim (inline numeric-char=)
17.338- (ftype (function (character character)
17.339- (values (and unsigned-byte fixnum)))
17.340- numeric-char=))
17.341-;; FIXME 2024-04-11:
17.342-;; #+ (and sbcl (or x86 x86-64))
17.343-(defun numeric-char= (x y)
17.344- (declare (type character x y))
17.345- (logxor (char-code x)
17.346- (char-code y)))
17.347-
17.348-(eval-when (:load-toplevel :compile-toplevel)
17.349-(progn
17.350- (defknown numeric-char= (character character)
17.351- (unsigned-byte #. (1- sb-vm:n-machine-word-bits))
17.352- (movable foldable flushable))
17.353-
17.354- (define-vop (numeric-char=)
17.355- (:args (x :scs (sb-vm::character-reg sb-vm::character-stack)
17.356- :target r
17.357- :load-if (not (location= x r))))
17.358- (:info y)
17.359- (:arg-types (:constant character) character)
17.360- (:results (r :scs (sb-vm::unsigned-reg)
17.361- :load-if (not (location= x r))))
17.362- (:result-types sb-vm::unsigned-num)
17.363- (:translate numeric-char=)
17.364- (:policy :fast-safe)
17.365- (:note "inline constant numeric-char=")
17.366- (:generator 1
17.367- (move r x)
17.368- (sb-vm::inst #:xor r (char-code y))))))
17.369-
17.370-;;; At each step, we may be able to find positions for which
17.371-;;; there can only be one character. If we emit the check for
17.372-;;; these positions as soon as possible, we avoid duplicating
17.373-;;; potentially a lot of code. Since benchmarks have shown
17.374-;;; it to be useful, this function implements the checks
17.375-;;; as a series of (zerop (logior (numeric-char= ...)...)),
17.376-;;; if there is more than one such check to emit.
17.377-
17.378-(defun emit-common-checks (strings to-check)
17.379- (labels ((emit-char= (pairs)
17.380- (mapcar (lambda (pair)
17.381- (destructuring-bind (posn . char)
17.382- pair
17.383- `(numeric-char= ,char
17.384- (aref ,*input-string* ,posn))))
17.385- pairs))
17.386- (emit-checking-form (common-chars)
17.387- (when common-chars
17.388- (let ((common-chars (sort common-chars '< :key 'car)))
17.389- #+ (and) `(and ,@(mapcar
17.390- (lambda (chunk)
17.391- (if (null (rest chunk))
17.392- (destructuring-bind ((posn . char))
17.393- chunk
17.394- `(eql ,char
17.395- (aref ,*input-string* ,posn)))
17.396- `(zerop
17.397- (logior ,@(emit-char= chunk)))))
17.398- (split-at common-chars 4)))
17.399- #+ (or) `(and ,@(mapcar
17.400- (lambda (pair)
17.401- (destructuring-bind (posn . char)
17.402- pair
17.403- `(eql ,char
17.404- (aref ,*input-string* ,posn))))
17.405- common-chars))))))
17.406- (let ((common-chars ())
17.407- (left-to-check ()))
17.408- (dolist (posn to-check (values (emit-checking-form common-chars)
17.409- (nreverse left-to-check)))
17.410- (if (all-equal strings :key (lambda (string)
17.411- (aref string posn)))
17.412- (push (cons posn (aref (first strings) posn))
17.413- common-chars)
17.414- (push posn left-to-check))))))
17.415-
17.416-;;; The driving function: First, emit any test that is
17.417-;;; common to all the candidates. If there's only one
17.418-;;; candidate, then we just have to execute the body;
17.419-;;; if not, we look for the `best' test and emit the
17.420-;;; corresponding code: execute the test, and recurse
17.421-;;; on the candidates that match the test and on those
17.422-;;; that don't.
17.423-
17.424-(defun make-search-tree (strings bodies to-check)
17.425- (multiple-value-bind (guard to-check)
17.426- (emit-common-checks strings to-check)
17.427- (if (null (rest strings))
17.428- (progn
17.429- (assert (null to-check)) ; there shouldn't be anything left to check
17.430- (if guard
17.431- `(if ,guard
17.432- (progn ,@(first bodies))
17.433- ,*no-match-form*)
17.434- `(progn ,@(first bodies))))
17.435- (multiple-value-bind (posn char)
17.436- (find-best-split strings to-check)
17.437- (assert posn) ; this can only happen if all strings are equal
17.438- (let ((=strings ())
17.439- (=bodies ())
17.440- (/=strings ())
17.441- (/=bodies ()))
17.442- (loop
17.443- for string in strings
17.444- for body in bodies
17.445- do (if (eql char (aref string posn))
17.446- (progn
17.447- (push string =strings)
17.448- (push body =bodies))
17.449- (progn
17.450- (push string /=strings)
17.451- (push body /=bodies))))
17.452- (let ((tree `(if (eql ,char (aref ,*input-string* ,posn))
17.453- ,(make-search-tree =strings =bodies
17.454- (remove posn to-check))
17.455- ,(make-search-tree /=strings /=bodies
17.456- to-check))))
17.457- (if guard
17.458- `(if ,guard
17.459- ,tree
17.460- ,*no-match-form*)
17.461- tree)))))))
17.462-
17.463-;;; Finally, we can glue it all together.
17.464-;;; To recapitulate, first, dispatch on string
17.465-;;; length, then execute a search tree for the
17.466-;;; few candidates left, and finally make sure
17.467-;;; the input string actually matches the one
17.468-;;; candidate left at the leaf.
17.469-
17.470-(defun emit-string-case (cases input-var no-match)
17.471- (flet ((case-string-length (x)
17.472- (length (first x))))
17.473- (let ((*input-string* input-var)
17.474- (*no-match-form* no-match)
17.475- (cases-lists (split-tree (sort cases '<
17.476- :key #'case-string-length)
17.477- :key #'case-string-length)))
17.478- `(locally (declare (type vector ,input-var))
17.479- (case (length ,input-var)
17.480- ,@(loop for cases in cases-lists
17.481- for length = (case-string-length (first cases))
17.482- collect `((,length)
17.483- ;; arrays with fill pointers expose the total length
17.484- ;; in their type, not the position of the fill-pointer.
17.485- ;; The type below only applies to simple-arrays.
17.486- (locally (declare (type (or (not simple-array)
17.487- (simple-array * (,length)))
17.488- ,input-var))
17.489- ,(make-search-tree (mapcar 'first cases)
17.490- (mapcar 'rest cases)
17.491- (iota length)))))
17.492- (t ,no-match))))))
17.493-
17.494-;;; Just wrapping the previous function in a macro,
17.495-;;; and adding some error checking (the rest of the code
17.496-;;; just assumes there won't be duplicate patterns).
17.497-;;; Note how we use a local function instead of passing
17.498-;;; the default form directly. This can save a lot on
17.499-;;; code size, especially when the default form is
17.500-;;; large.
17.501-
17.502-(defmacro string-case ((string &key (default '(error "No match")))
17.503- &body cases)
17.504- "(string-case (string &key default)
17.505- case*)
17.506- case ::= string form*
17.507- | t form*
17.508- Where t is the default case."
17.509- (let ((cases-table (make-hash-table :test 'equal)))
17.510- "Error checking cruft"
17.511- (dolist (case cases)
17.512- (assert (typep case '(cons (or string (eql t)))))
17.513- (let ((other-case (gethash (first case) cases-table)))
17.514- (if other-case
17.515- (warn "Duplicate string-case cases: ~A -> ~A or ~A~%"
17.516- (first case)
17.517- (rest other-case)
17.518- (rest case))
17.519- (setf (gethash (first case) cases-table)
17.520- (rest case)))))
17.521- (let ((input-var (gensym "INPUT"))
17.522- (default-fn (gensym "ON-ERROR"))
17.523- (default-body (gethash t cases-table (list default))))
17.524- `(let ((,input-var ,string))
17.525- (flet ((,default-fn ()
17.526- ,@default-body))
17.527- ,(emit-string-case (progn
17.528- (remhash t cases-table)
17.529- (hash-table->list cases-table))
17.530- input-var
17.531- `(,default-fn)))))))
18.1--- /dev/null Thu Jan 01 00:00:00 1970 +0000
18.2+++ b/lisp/std/stream.lisp Sun Apr 14 01:19:10 2024 -0400
18.3@@ -0,0 +1,437 @@
18.4+;;; std/stream.lisp --- Standard Streams
18.5+
18.6+;;
18.7+
18.8+;;; Code:
18.9+(in-package :std)
18.10+(declaim (inline octet-vector=/unsafe))
18.11+(defun octet-vector=/unsafe (v1 v2 start1 end1 start2 end2)
18.12+ (declare (optimize (speed 3)
18.13+ (safety 0)
18.14+ (debug 0)
18.15+ (compilation-speed 0))
18.16+ (type octet-vector v1 v2)
18.17+ (type array-index start1 start2)
18.18+ (type array-length end1 end2))
18.19+ (and (= (- end1 start1)
18.20+ (- end2 start2))
18.21+ (loop for i from start1 below end1
18.22+ for j from start2 below end2
18.23+ always (eql (aref v1 i) (aref v2 j)))))
18.24+
18.25+(defun octet-vector= (v1 v2 &key (start1 0) end1
18.26+ (start2 0) end2)
18.27+ "Like `string=' for octet vectors."
18.28+ (declare (octet-vector v1 v2)
18.29+ (array-index start1 start2)
18.30+ ((or array-length null) end1 end2)
18.31+ (optimize speed))
18.32+ (let* ((len1 (length v1))
18.33+ (len2 (length v2))
18.34+ (end1 (or end1 len1))
18.35+ (end2 (or end2 len2)))
18.36+ (assert (<= start1 end1 len1))
18.37+ (assert (<= start2 end2 len2))
18.38+ (octet-vector=/unsafe v1 v2 start1 end1 start2 end2)))
18.39+
18.40+(defun file-size-in-octets (file)
18.41+ (multiple-value-bind (path namestring)
18.42+ (etypecase file
18.43+ (string (values (pathname file)
18.44+ file))
18.45+ (pathname (values file
18.46+ (sb-ext:native-namestring file))))
18.47+ (declare (ignorable path namestring))
18.48+ (sb-posix:stat-size (sb-posix:stat path))))
18.49+
18.50+(define-constant si-prefixes
18.51+ '((-30 "quecto" "q")
18.52+ (-27 "ronto" "r")
18.53+ (-24 "yocto" "y")
18.54+ (-21 "zepto" "z")
18.55+ (-18 "atto" "a")
18.56+ (-15 "femto" "f")
18.57+ (-12 "pico" "p")
18.58+ ( -9 "nano" "n")
18.59+ ( -6 "micro" "μ")
18.60+ ( -3 "milli" "m")
18.61+ ( -2 "centi" "c")
18.62+ ( -1 "deci" "d")
18.63+ ( 0 "" "" )
18.64+ ( 1 "deca" "da")
18.65+ ( 2 "hecto" "h")
18.66+ ( 3 "kilo" "k")
18.67+ ( 6 "mega" "M")
18.68+ ( 9 "giga" "G")
18.69+ ( 12 "tera" "T")
18.70+ ( 15 "peta" "P")
18.71+ ( 18 "exa" "E")
18.72+ ( 21 "zetta" "Z")
18.73+ ( 24 "yotta" "Y")
18.74+ ( 27 "ronna" "R")
18.75+ ( 30 "quetta" "Q"))
18.76+ :test #'equalp
18.77+ :documentation "List as SI prefixes: power of ten, long form, short form.")
18.78+
18.79+(define-constant si-prefixes-base-1000
18.80+ (loop for (pow long short) in si-prefixes
18.81+ unless (and (not (zerop pow))
18.82+ (< (abs pow) 3))
18.83+ collect (list (truncate pow 3) long short))
18.84+ :test #'equalp
18.85+ :documentation "The SI prefixes as powers of 1000, with centi, deci, deca and hecto omitted.")
18.86+
18.87+(define-constant iec-prefixes
18.88+ '(( 0 "" "")
18.89+ (10 "kibi" "Ki")
18.90+ (20 "mebi" "Mi")
18.91+ (30 "gibi" "Gi")
18.92+ (40 "tebi" "Ti")
18.93+ (50 "pebi" "Pi")
18.94+ (60 "exbi" "Ei"))
18.95+ :test #'equalp
18.96+ :documentation "The IEC binary prefixes, as powers of 2.")
18.97+
18.98+(eval-always
18.99+ (defun single (seq)
18.100+ "Is SEQ a sequence of one element?"
18.101+ (= (length seq) 1)))
18.102+
18.103+(defmacro si-prefix-rec (n base prefixes)
18.104+ (cond ((null prefixes) (error "No prefixes!"))
18.105+ ((single prefixes)
18.106+ (destructuring-bind ((power long short)) prefixes
18.107+ `(values ,long ,short ,(expt base power))))
18.108+ (t
18.109+ ;; good enough
18.110+ (let* ((halfway (ceiling (length prefixes) 2))
18.111+ (lo (subseq prefixes 0 halfway))
18.112+ (hi (subseq prefixes halfway))
18.113+ (split (* (expt base (caar hi)))))
18.114+ `(if (< ,n ,split)
18.115+ (si-prefix-rec ,n ,base ,lo)
18.116+ (si-prefix-rec ,n ,base ,hi))))))
18.117+
18.118+(defun si-prefix (n &key (base 1000))
18.119+ "Given a number, return the prefix of the nearest SI unit.
18.120+
18.121+Three values are returned: the long form, the short form, and the
18.122+multiplying factor.
18.123+
18.124+ (si-prefix 1001) => \"kilo\", \"k\", 1000d0
18.125+
18.126+BASE can be 1000, 10, 1024, or 2. 1000 is the default, and prefixes
18.127+start at kilo and milli. Base 10 is mostly the same, except the
18.128+prefixes centi, deci, deca and hecto are also used. Base 1024 uses the
18.129+same prefixes as 1000, but with 1024 as the base, as in vulgar file
18.130+sizes. Base 2 uses the IEC binary prefixes."
18.131+ (if (zerop n) (values "" "" 1d0)
18.132+ (let ((n (abs (coerce n 'double-float))))
18.133+ (ecase base
18.134+ (2 (si-prefix-rec n 2d0 #.iec-prefixes))
18.135+ (10 (si-prefix-rec n 10d0 #.si-prefixes))
18.136+ (1000 (si-prefix-rec n 1000d0 #.si-prefixes-base-1000))
18.137+ (1024 (si-prefix-rec n 1024d0 #.si-prefixes-base-1000))))))
18.138+
18.139+(defun human-size-formatter (size &key (flavor :si)
18.140+ (space (eql flavor :si)))
18.141+ "Auxiliary function for formatting quantities human-readably.
18.142+Returns two values: a format control and a list of arguments.
18.143+
18.144+This can be used to integrate the human-readable printing of
18.145+quantities into larger format control strings using the recursive
18.146+processing format directive (~?):
18.147+
18.148+ (multiple-value-bind (control args)
18.149+ (human-size-formatter size)
18.150+ (format t \"~?\" control args))"
18.151+ (let ((size (coerce size 'double-float))
18.152+ ;; Avoid printing exponent markers.
18.153+ (*read-default-float-format* 'double-float)
18.154+ (base (ecase flavor
18.155+ (:file 1024)
18.156+ (:si 1000)
18.157+ (:iec 2))))
18.158+ (multiple-value-bind (long short factor)
18.159+ (si-prefix size :base base)
18.160+ (declare (ignore long))
18.161+ (let* ((size (/ size factor))
18.162+ (int (round size))
18.163+ (size
18.164+ (if (> (abs (- size int))
18.165+ 0.05d0)
18.166+ size
18.167+ int)))
18.168+ (values (formatter "~:[~d~;~,1f~]~:[~; ~]~a")
18.169+ (list (floatp size) size space short))))))
18.170+
18.171+(defun format-human-size (stream size
18.172+ &key (flavor :si)
18.173+ (space (eql flavor :si)))
18.174+ "Write SIZE to STREAM, in human-readable form.
18.175+
18.176+STREAM is interpreted as by `format'.
18.177+
18.178+If FLAVOR is `:si' (the default) the base is 1000 and SI prefixes are used.
18.179+
18.180+If FLAVOR is `:file', the base is 1024 and SI prefixes are used.
18.181+
18.182+If FLAVOR is `:iec', the base is 1024 bytes and IEC prefixes (Ki, Mi,
18.183+etc.) are used.
18.184+
18.185+If SPACE is non-nil, include a space between the number and the
18.186+prefix. (Defaults to T if FLAVOR is `:si'.)"
18.187+ (if (zerop size)
18.188+ (format stream "0")
18.189+ (multiple-value-bind (formatter args)
18.190+ (human-size-formatter size :flavor flavor :space space)
18.191+ (format stream "~?" formatter args))))
18.192+
18.193+(defun format-file-size-human-readable (stream file-size
18.194+ &key flavor
18.195+ (space (eql flavor :si))
18.196+ (suffix (if (eql flavor :iec) "B" "")))
18.197+ "Write FILE-SIZE, a file size in bytes, to STREAM, in human-readable form.
18.198+
18.199+STREAM is interpreted as by `format'.
18.200+
18.201+If FLAVOR is nil, kilobytes are 1024 bytes and SI prefixes are used.
18.202+
18.203+If FLAVOR is `:si', kilobytes are 1000 bytes and SI prefixes are used.
18.204+
18.205+If FLAVOR is `:iec', kilobytes are 1024 bytes and IEC prefixes (Ki,
18.206+Mi, etc.) are used.
18.207+
18.208+If SPACE is non-nil, include a space between the number and the
18.209+prefix. (Defaults to T if FLAVOR is `:si'.)
18.210+
18.211+SUFFIX is the suffix to use; defaults to B if FLAVOR is `:iec',
18.212+otherwise empty."
18.213+ (check-type file-size (integer 0 *))
18.214+ (if (zerop file-size)
18.215+ (format stream "0")
18.216+ (let ((flavor (if (null flavor) :file flavor)))
18.217+ (multiple-value-bind (formatter args)
18.218+ (human-size-formatter file-size :flavor flavor :space space)
18.219+ (format stream "~?~a" formatter args suffix)))))
18.220+
18.221+(defun file-size-human-readable (file &key flavor space suffix stream)
18.222+ "Format the size of FILE (in octets) using `format-file-size-human-readable'.
18.223+The size of file is found by `trivial-file-size:file-size-in-octets'.
18.224+
18.225+Inspired by the function of the same name in Emacs."
18.226+ (let ((file-size (file-size-in-octets file)))
18.227+ (format-file-size-human-readable
18.228+ stream
18.229+ file-size
18.230+ :flavor flavor
18.231+ :suffix suffix
18.232+ :space space)))
18.233+
18.234+(deftype wild-pathname ()
18.235+ "A pathname with wild components."
18.236+ '(and pathname (satisfies wild-pathname-p)))
18.237+
18.238+(deftype non-wild-pathname ()
18.239+ "A pathname without wild components."
18.240+ '(or directory-pathname
18.241+ (and pathname (not (satisfies wild-pathname-p)))))
18.242+
18.243+(deftype absolute-pathname ()
18.244+ '(and pathname (satisfies uiop:absolute-pathname-p)))
18.245+
18.246+(deftype relative-pathname ()
18.247+ '(and pathname (satisfies uiop:relative-pathname-p)))
18.248+
18.249+(deftype directory-pathname ()
18.250+ '(and pathname (satisfies uiop:directory-pathname-p)))
18.251+
18.252+(deftype absolute-directory-pathname ()
18.253+ '(and absolute-pathname directory-pathname))
18.254+
18.255+(deftype file-pathname ()
18.256+ '(and pathname (satisfies uiop:file-pathname-p)))
18.257+
18.258+;;; logical-pathname is defined in CL.
18.259+
18.260+(defconstant +default-element-type+ 'character)
18.261+
18.262+(defmacro with-open-files ((&rest args) &body body)
18.263+ "A simple macro to open one or more files providing the streams for the BODY. The ARGS is a list of `(stream filespec options*)` as supplied to WITH-OPEN-FILE."
18.264+ (case (length args)
18.265+ ((0)
18.266+ `(progn ,@body))
18.267+ ((1)
18.268+ `(with-open-file ,(first args) ,@body))
18.269+ (t `(with-open-file ,(first args)
18.270+ (with-open-files
18.271+ ,(rest args) ,@body)))))
18.272+
18.273+(defmacro with-open-file* ((stream filespec &key direction element-type
18.274+ if-exists if-does-not-exist external-format)
18.275+ &body body)
18.276+ "Just like WITH-OPEN-FILE, but NIL values in the keyword arguments
18.277+mean to use the default value specified for OPEN."
18.278+ (once-only (direction element-type if-exists if-does-not-exist external-format)
18.279+ `(with-open-stream
18.280+ (,stream (apply #'open ,filespec
18.281+ (append
18.282+ (when ,direction
18.283+ (list :direction ,direction))
18.284+ (list :element-type (or ,element-type
18.285+ +default-element-type+))
18.286+ (when ,if-exists
18.287+ (list :if-exists ,if-exists))
18.288+ (when ,if-does-not-exist
18.289+ (list :if-does-not-exist ,if-does-not-exist))
18.290+ (when ,external-format
18.291+ (list :external-format ,external-format)))))
18.292+ ,@body)))
18.293+
18.294+(defmacro with-input-from-file ((stream-name file-name &rest args
18.295+ &key (direction nil direction-p)
18.296+ &allow-other-keys)
18.297+ &body body)
18.298+ "Evaluate BODY with STREAM-NAME to an input stream on the file
18.299+FILE-NAME. ARGS is sent as is to the call to OPEN except EXTERNAL-FORMAT,
18.300+which is only sent to WITH-OPEN-FILE when it's not NIL."
18.301+ (declare (ignore direction))
18.302+ (when direction-p
18.303+ (error "Can't specify :DIRECTION for WITH-INPUT-FROM-FILE."))
18.304+ `(with-open-file* (,stream-name ,file-name :direction :input ,@args)
18.305+ ,@body))
18.306+
18.307+(defmacro with-output-to-file ((stream-name file-name &rest args
18.308+ &key (direction nil direction-p)
18.309+ &allow-other-keys)
18.310+ &body body)
18.311+ "Evaluate BODY with STREAM-NAME to an output stream on the file
18.312+FILE-NAME. ARGS is sent as is to the call to OPEN except EXTERNAL-FORMAT,
18.313+which is only sent to WITH-OPEN-FILE when it's not NIL."
18.314+ (declare (ignore direction))
18.315+ (when direction-p
18.316+ (error "Can't specify :DIRECTION for WITH-OUTPUT-TO-FILE."))
18.317+ `(with-open-file* (,stream-name ,file-name :direction :output ,@args)
18.318+ ,@body))
18.319+
18.320+(defun copy-stream (input output &key (element-type (stream-element-type input))
18.321+ (buffer-size 4096)
18.322+ (buffer (make-array buffer-size :element-type element-type))
18.323+ (start 0) end
18.324+ finish-output)
18.325+ "Reads data from INPUT and writes it to OUTPUT. Both INPUT and OUTPUT must
18.326+be streams, they will be passed to READ-SEQUENCE and WRITE-SEQUENCE and must have
18.327+compatible element-types."
18.328+ (check-type start non-negative-integer)
18.329+ (check-type end (or null non-negative-integer))
18.330+ (check-type buffer-size positive-integer)
18.331+ (when (and end
18.332+ (< end start))
18.333+ (error "END is smaller than START in ~S" 'copy-stream))
18.334+ (let ((output-position 0)
18.335+ (input-position 0))
18.336+ (unless (zerop start)
18.337+ ;; FIXME add platform specific optimization to skip seekable streams
18.338+ (loop while (< input-position start)
18.339+ do (let ((n (read-sequence buffer input
18.340+ :end (min (length buffer)
18.341+ (- start input-position)))))
18.342+ (when (zerop n)
18.343+ (error "~@<Could not read enough bytes from the input to fulfill ~
18.344+ the :START ~S requirement in ~S.~:@>" 'copy-stream start))
18.345+ (incf input-position n))))
18.346+ (assert (= input-position start))
18.347+ (loop while (or (null end) (< input-position end))
18.348+ do (let ((n (read-sequence buffer input
18.349+ :end (when end
18.350+ (min (length buffer)
18.351+ (- end input-position))))))
18.352+ (when (zerop n)
18.353+ (if end
18.354+ (error "~@<Could not read enough bytes from the input to fulfill ~
18.355+ the :END ~S requirement in ~S.~:@>" 'copy-stream end)
18.356+ (return)))
18.357+ (incf input-position n)
18.358+ (write-sequence buffer output :end n)
18.359+ (incf output-position n)))
18.360+ (when finish-output
18.361+ (finish-output output))
18.362+ output-position))
18.363+
18.364+(defun write-stream-into-file (stream pathname &key (if-exists :error) if-does-not-exist)
18.365+ "Read STREAM and write the contents into PATHNAME.
18.366+
18.367+STREAM will be closed afterwards, so wrap it with
18.368+`make-concatenated-stream' if you want it left open."
18.369+ (check-type pathname pathname)
18.370+ (with-open-stream (in stream)
18.371+ (with-output-to-file (out pathname
18.372+ :element-type (stream-element-type in)
18.373+ :if-exists if-exists
18.374+ :if-does-not-exist if-does-not-exist)
18.375+ (copy-stream in out)))
18.376+ pathname)
18.377+
18.378+(defun write-file-into-stream (pathname output &key (if-does-not-exist :error)
18.379+ (external-format :default))
18.380+ "Write the contents of FILE into STREAM."
18.381+ (check-type pathname pathname)
18.382+ (with-input-from-file (input pathname
18.383+ :element-type (stream-element-type output)
18.384+ :if-does-not-exist if-does-not-exist
18.385+ :external-format external-format)
18.386+ (copy-stream input output)))
18.387+
18.388+(defun file= (file1 file2 &key (buffer-size 4096))
18.389+ "Compare FILE1 and FILE2 octet by octet, \(possibly) using buffers
18.390+of BUFFER-SIZE."
18.391+ (declare (ignorable buffer-size))
18.392+ (let ((file1 (truename file1))
18.393+ (file2 (truename file2)))
18.394+ (or (equal file1 file2)
18.395+ (and (= (file-size-in-octets file1)
18.396+ (file-size-in-octets file2))
18.397+ #+ccl (file=/mmap file1 file2)
18.398+ #-ccl (file=/loop file1 file2 :buffer-size buffer-size)))))
18.399+
18.400+(defun file=/loop (file1 file2 &key (buffer-size 4096))
18.401+ "Compare two files by looping over their contents using a buffer."
18.402+ (declare
18.403+ (type pathname file1 file2)
18.404+ (type array-length buffer-size)
18.405+ (optimize (safety 1) (debug 0) (compilation-speed 0)))
18.406+ (flet ((make-buffer ()
18.407+ (make-array buffer-size
18.408+ :element-type 'octet
18.409+ :initial-element 0)))
18.410+ (declare (inline make-buffer))
18.411+ (with-open-files ((file1 file1 :element-type 'octet :direction :input)
18.412+ (file2 file2 :element-type 'octet :direction :input))
18.413+ (and (= (file-length file1)
18.414+ (file-length file2))
18.415+ (locally (declare (optimize speed))
18.416+ (loop with buffer1 = (make-buffer)
18.417+ with buffer2 = (make-buffer)
18.418+ for end1 = (read-sequence buffer1 file1)
18.419+ for end2 = (read-sequence buffer2 file2)
18.420+ until (or (zerop end1) (zerop end2))
18.421+ always (and (= end1 end2)
18.422+ (octet-vector= buffer1 buffer2
18.423+ :end1 end1
18.424+ :end2 end2))))))))
18.425+
18.426+(defun file-size (file &key (element-type '(unsigned-byte 8)))
18.427+ "The size of FILE, in units of ELEMENT-TYPE (defaults to bytes).
18.428+
18.429+The size is computed by opening the file and getting the length of the
18.430+resulting stream.
18.431+
18.432+If all you want is to read the file's size in octets from its
18.433+metadata, consider `trivial-file-size:file-size-in-octets' instead."
18.434+ (check-type file (or string pathname))
18.435+ (with-input-from-file (in file :element-type element-type)
18.436+ (file-length in)))
18.437+
18.438+(defconstant +pathsep+
18.439+ (if (uiop:os-windows-p) #\; #\:)
18.440+ "Path separator for this OS.")
19.1--- /dev/null Thu Jan 01 00:00:00 1970 +0000
19.2+++ b/lisp/std/string.lisp Sun Apr 14 01:19:10 2024 -0400
19.3@@ -0,0 +1,528 @@
19.4+;;; std/str.lisp --- String utilities
19.5+
19.6+;;; Code:
19.7+
19.8+;; (defvar sb-unicode-syms
19.9+;; '(words lines sentences whitespace-p uppercase lowercase titlecase
19.10+;; word-break-class line-break-class sentence-break-class char-block
19.11+;; cased-p uppercase-p lowercase-p titlecase-p casefold
19.12+;; graphemes grapheme-break-class
19.13+;; bidi-mirroring-glyph bidi-class
19.14+;; normalize-string normalized-p default-ignorable-p
19.15+;; confusable-p hex-digit-p mirrored-p alphabetic-p math-p
19.16+;; decimal-value digit-value
19.17+;; unicode< unicode> unicode= unicode-equal
19.18+;; unicode<= unicode>=))
19.19+(in-package :std)
19.20+
19.21+;; (mapc (lambda (s) (export s)) sb-unicode-syms)
19.22+;; (reexport-from
19.23+;; :sb-unicode
19.24+;; :include sb-unicode-syms)
19.25+
19.26+(defparameter *omit-nulls* nil)
19.27+(defvar *whitespaces* (list #\Backspace #\Tab #\Linefeed #\Newline #\Vt #\Page
19.28+ #\Return #\Space #\Rubout
19.29+ #+sbcl #\Next-Line #-sbcl (code-char 133)
19.30+ #\No-break_space)
19.31+ "On some implementations, linefeed and newline represent the same character (code).")
19.32+
19.33+(deftype string-designator ()
19.34+ "A string designator type. A string designator is either a string, a symbol,
19.35+or a character."
19.36+ `(or symbol string character))
19.37+
19.38+(defun ssplit (separator s &key (omit-nulls *omit-nulls*) limit (start 0) end)
19.39+ "Split s into substring by separator (cl-ppcre takes a regex, we do not).
19.40+
19.41+ `limit' limits the number of elements returned (i.e. the string is
19.42+ split at most `limit' - 1 times)."
19.43+ ;; cl-ppcre:split doesn't return a null string if the separator appears at the end of s.
19.44+ (let* ((limit (or limit (1+ (length s))))
19.45+ (res (cl-ppcre:split separator s :limit limit :start start :end end)))
19.46+ (if omit-nulls
19.47+ (remove-if (lambda (it) (sequence:emptyp it)) res)
19.48+ res)))
19.49+
19.50+(defun collapse-whitespaces (s)
19.51+ "Ensure there is only one space character between words.
19.52+ Remove newlines."
19.53+ (cl-ppcre:regex-replace-all "\\s+" s " "))
19.54+
19.55+(defun trim (s &key (char-bag *whitespaces*))
19.56+ "Removes all characters in `char-bag` (default: whitespaces) at the beginning and end of `s`.
19.57+ If supplied, char-bag has to be a sequence (e.g. string or list of characters).
19.58+
19.59+ Examples: (trim \" foo \") => \"foo\"
19.60+ (trim \"+-*foo-bar*-+\" :char-bag \"+-*\") => \"foo-bar\"
19.61+ (trim \"afood\" :char-bag (str:concat \"a\" \"d\")) => \"foo\""
19.62+ (when s
19.63+ (string-trim char-bag s)))
19.64+
19.65+;;; TODO 2023-08-27: camel snake kebab
19.66+
19.67+(defun make-template-parser (start-delimiter end-delimiter &key (ignore-case nil))
19.68+ "Returns a closure than can substitute variables
19.69+ delimited by \"start-delimiter\" and \"end-delimiter\"
19.70+ in a string, by the provided values."
19.71+ (check-type start-delimiter string)
19.72+ (check-type end-delimiter string)
19.73+ (when (or (string= start-delimiter "")
19.74+ (string= end-delimiter ""))
19.75+ (error 'simple-type-error
19.76+ :format-control "The empty string is not a valid delimiter."))
19.77+ (let ((start-len (length start-delimiter))
19.78+ (end-len (length end-delimiter))
19.79+ (test (if ignore-case
19.80+ #'string-equal
19.81+ #'string=)))
19.82+
19.83+ (lambda (string values)
19.84+ (check-type string string)
19.85+ (unless (listp values)
19.86+ (error 'simple-type-error
19.87+ :format-control "values should be an association list"))
19.88+
19.89+ (with-output-to-string (stream)
19.90+ (loop for prev = 0 then (+ j end-len)
19.91+ for i = (search start-delimiter string)
19.92+ then (search start-delimiter string :start2 j)
19.93+ for j = (if i (search end-delimiter string :start2 i))
19.94+ then (if i (search end-delimiter string :start2 i))
19.95+ while (and i j)
19.96+ do (write-string (subseq string prev i) stream)
19.97+ (let ((instance (rest (assoc (subseq string (+ i start-len) j)
19.98+ values
19.99+ :test test))))
19.100+ (if instance
19.101+ (princ instance stream)
19.102+ (write-string (subseq string i (+ j end-len)) stream)))
19.103+
19.104+ finally (write-string (subseq string prev) stream))))))
19.105+
19.106+;;; STRING-CASE
19.107+;;; Implementing an efficient string= case in Common Lisp
19.108+;;;
19.109+;;; 2015-11-15: Defknown don't have explicit-check in SBCL 1.3.0
19.110+;;; Remove the declaration. It's never useful the way we use
19.111+;;; numeric-char=.
19.112+;;;
19.113+;;; 2015-11-15: Make this a real ASDF system for Xach
19.114+;;; I copied the system definition from Quicklisp and mangled as
19.115+;;; necessary.
19.116+;;;
19.117+;;; 2010-06-30: Tiny bugfix
19.118+;;; Widen the type declarations inside cases to allow vectors that
19.119+;;; have a length that's shorter than the total size (due to fill-
19.120+;;; pointers).
19.121+
19.122+;;;
19.123+;;;# Introduction
19.124+;;;
19.125+;;; In `<http://neverfriday.com/blog/?p=10>', OMouse asks how
19.126+;;; best to implement a `string= case' (in Scheme). I noted that
19.127+;;; naively iterating through the cases with `string=' at runtime
19.128+;;; is suboptimal. Seeing the problem as a simplistic pattern
19.129+;;; matching one makes an efficient solution obvious.
19.130+;;; Note that, unlike Haskell, both Scheme and CL have random-
19.131+;;; access on strings in O(1), something which I exploit to
19.132+;;; generate better code.
19.133+;;;
19.134+;;; This is also a pbook.el file (the pdf can be found at
19.135+;;; `<http://www.discontinuity.info/~pkhuong/string-case.pdf>' ).
19.136+;;; I'm new at this not-quite-illiterate programming thing, so
19.137+;;; please bear with me (: I'm also looking for comments on the
19.138+;;; formatting. I'm particularly iffy with the way keywords look
19.139+;;; like. It just looks really fuzzy when you're not really zoomed
19.140+;;; in (or reading it on paper).
19.141+
19.142+;;; I usually don't use packages for throw-away code, but this looks
19.143+;;; like it could be useful to someone.
19.144+
19.145+;;;# Some utility code
19.146+
19.147+(defun split-tree (list &key (test 'eql) (key 'identity))
19.148+ "Splits input list into sublists of elements
19.149+ whose elements are all such that (key element)
19.150+ are all test.
19.151+ It's assumed that test and key form an equality class.
19.152+ (This is similar to groupBy)"
19.153+ (when list
19.154+ (let* ((lists ())
19.155+ (cur-list (list (first list)))
19.156+ (cur-key (funcall key (first list))))
19.157+ (dolist (elt (rest list) (nreverse (cons (nreverse cur-list)
19.158+ lists)))
19.159+ (let ((new-key (funcall key elt)))
19.160+ (if (funcall test cur-key new-key)
19.161+ (push elt cur-list)
19.162+ (progn
19.163+ (push (nreverse cur-list) lists)
19.164+ (setf cur-list (list elt)
19.165+ cur-key new-key))))))))
19.166+
19.167+(defun iota (n)
19.168+ (loop for i below n collect i))
19.169+
19.170+(defun hash-table->list (table &key (keep-keys t) (keep-values t))
19.171+ "Saves the keys and/or values in table to a list.
19.172+ As with hash table iterating functions, there is no
19.173+ implicit ordering."
19.174+ (let ((list ()))
19.175+ (maphash (cond ((and keep-keys
19.176+ keep-values)
19.177+ (lambda (k v)
19.178+ (push (cons k v) list)))
19.179+ (keep-keys
19.180+ (lambda (k v)
19.181+ (declare (ignore v))
19.182+ (push k list)))
19.183+ (keep-values
19.184+ (lambda (k v)
19.185+ (declare (ignore k))
19.186+ (push v list))))
19.187+ table)
19.188+ list))
19.189+
19.190+(defun all-equal (list &key (key 'identity) (test 'eql))
19.191+ (if (or (null list)
19.192+ (null (rest list)))
19.193+ t
19.194+ (let ((first-key (funcall key (first list))))
19.195+ (every (lambda (element)
19.196+ (funcall test first-key
19.197+ (funcall key element)))
19.198+ (rest list)))))
19.199+
19.200+(defun split-at (list n)
19.201+ "Split list in k lists of n elements (or less for the last list)"
19.202+ (declare (type (and fixnum (integer (0))) n))
19.203+ (let ((lists '())
19.204+ (cur-list '())
19.205+ (counter 0))
19.206+ (declare (type (and fixnum unsigned-byte) counter))
19.207+ (dolist (elt list (nreverse (if cur-list
19.208+ (cons (nreverse cur-list)
19.209+ lists)
19.210+ lists)))
19.211+ (push elt cur-list)
19.212+ (when (= (incf counter) n)
19.213+ (push (nreverse cur-list) lists)
19.214+ (setf cur-list '()
19.215+ counter 0)))))
19.216+
19.217+;;;# The string matching compiler per se
19.218+;;;
19.219+;;; I use special variables here because I find that
19.220+;;; preferable to introducing noise everywhere to thread
19.221+;;; these values through all the calls, especially
19.222+;;; when `*no-match-form*' is only used at the very end.
19.223+
19.224+(defparameter *input-string* nil
19.225+ "Symbol of the variable holding the input string")
19.226+
19.227+(defparameter *no-match-form* nil
19.228+ "Form to insert when no match is found.")
19.229+
19.230+;;; The basic idea of the pattern matching process here is
19.231+;;; to first discriminate with the input string's length;
19.232+;;; once that is done, it is very easy to safely use random
19.233+;;; access until only one candidate string (pattern) remains.
19.234+;;; However, even if we determine that only one case might be
19.235+;;; a candidate, it might still be possible for another string
19.236+;;; (not in the set of cases) to match the criteria. So we also
19.237+;;; have to make sure that *all* the indices match. A simple
19.238+;;; way to do this would be to emit the remaining checks at the
19.239+;;; every end, when only one candidate is left. However, that
19.240+;;; can result in a lot of duplicate code, and some useless
19.241+;;; work on mismatches. Instead, the code generator always
19.242+;;; tries to find (new) indices for which all the candidates
19.243+;;; left in the branch share the same character, and then emits
19.244+;;; a guard, checking the character at that index as soon as possible.
19.245+
19.246+;;; In my experience, there are two main problems when writing
19.247+;;; pattern matchers: how to decide what to test for at each
19.248+;;; fork, and how to ensure the code won't explode exponentially.
19.249+;;; Luckily, for our rather restricted pattern language (equality
19.250+;;; on strings), patterns can't overlap, and it's possible to guarantee
19.251+;;; that no candidate will ever be possible in both branches of a
19.252+;;; fork.
19.253+
19.254+;;; Due to the the latter guarantee, we have a simple fitness
19.255+;;; measure for tests: simply maximising the number of
19.256+;;; candidates in the smallest branch will make our search tree
19.257+;;; as balanced as possible. Of course, we don't know whether
19.258+;;; the subtrees will be balanced too, but I don't think it'll
19.259+;;; be much of an issue.
19.260+
19.261+;;; Note that, if we had access, whether via annotations or profiling,
19.262+;;; to the probability of each case, the situation would be very
19.263+;;; different. In fact, on a pipelined machine where branch
19.264+;;; mispredictions are expensive, an unbalanced tree will yield
19.265+;;; better expected runtimes. There was a very interesting and rather
19.266+;;; sophisticated Google lecture on that topic on Google video
19.267+;;; (the speaker used markov chains to model dynamic predictors,
19.268+;;; for example), but I can't seem to find the URL.
19.269+
19.270+;;; TODO: Find bounds on the size of the code!
19.271+
19.272+(defun find-best-split (strings to-check)
19.273+ "Iterate over all the indices left to check to find
19.274+ which index (and which character) to test for equality
19.275+ with, keeping the ones which result in the most balanced
19.276+ split."
19.277+ (flet ((evaluate-split (i char)
19.278+ "Just count all the matches and mismatches"
19.279+ (let ((= 0)
19.280+ (/= 0))
19.281+ (dolist (string strings (min = /=))
19.282+ (if (eql (aref string i) char)
19.283+ (incf =)
19.284+ (incf /=)))))
19.285+ (uniquify-chars (chars)
19.286+ "Only keep one copy of each char in the list"
19.287+ (mapcar 'first (split-tree (sort chars 'char<) :test #'eql))))
19.288+ (let ((best-split 0) ; maximise size of smallest branch
19.289+ (best-posn nil)
19.290+ (best-char nil))
19.291+ (dolist (i to-check (values best-posn best-char))
19.292+ (dolist (char (uniquify-chars (mapcar (lambda (string)
19.293+ (aref string i))
19.294+ strings)))
19.295+ (let ((Z (evaluate-split i char)))
19.296+ (when (> Z best-split)
19.297+ (setf best-split Z
19.298+ best-posn i
19.299+ best-char char))))))))
19.300+
19.301+;;; We sometimes have to execute sequences of checks for
19.302+;;; equality. The natural way to express this is via a
19.303+;;; sequence of checks, wrapped in an `and'. However, that
19.304+;;; translates to a sequence of conditional branches, predicated
19.305+;;; on very short computations. On (not so) modern architectures,
19.306+;;; it'll be faster to coalesce a sequence of such checks together
19.307+;;; as straightline code (e.g. via `or' of `xor'), and only branch
19.308+;;; at the very end. The code doesn't become much more complex,
19.309+;;; and benchmarks have shown it to be beneficial (giving a speed
19.310+;;; up of 2-5% for both predictable and unpredictable workloads,
19.311+;;; on a Core 2).
19.312+
19.313+;;; Benchmarks (and experience) have shown that, instead of executing
19.314+;;; a cascade of comparison/conditional branch, it's slightly
19.315+;;; faster, both for predictable and unpredictable workloads,
19.316+;;; to `or' together a bunch of comparisons (e.g. `xor'). On a Core 2
19.317+;;; processor, it seems that doing so for sequences of around 4
19.318+;;; comparisons is the sweetspot. On perfectly predictable input,
19.319+;;; aborting early (on the first check) saves as much time as
19.320+;;; the 4 test/conditional branch add, compared to a sequence of
19.321+;;; `xor' and `or'.
19.322+
19.323+;;; Numeric char= abstracts out the xor check, and, on SBCL,
19.324+;;; is replaced by a short assembly sequence when the first
19.325+;;; argument is a constant. The declared return type is then
19.326+;;; wider than strictly necessary making it fit in a machine
19.327+;;; register, but not as a fixnum ensures that the compiler
19.328+;;; won't repeatedly convert the values to fixnums, when all
19.329+;;; we'll do is `or' them together and check for zero-ness.
19.330+;;; This function is the only place where the macro isn't
19.331+;;; generic over the elements stored in the cases. It shouldn't
19.332+;;; be too hard to implement a numeric-eql, which would
19.333+;;; restore genericity to the macro, while keeping the
19.334+;;; speed-up.
19.335+
19.336+#+nil
19.337+(declaim (inline numeric-char=)
19.338+ (ftype (function (character character)
19.339+ (values (and unsigned-byte fixnum)))
19.340+ numeric-char=))
19.341+;; FIXME 2024-04-11:
19.342+;; #+ (and sbcl (or x86 x86-64))
19.343+(defun numeric-char= (x y)
19.344+ (declare (type character x y))
19.345+ (logxor (char-code x)
19.346+ (char-code y)))
19.347+
19.348+(eval-when (:load-toplevel :compile-toplevel)
19.349+(progn
19.350+ (defknown numeric-char= (character character)
19.351+ (unsigned-byte #. (1- sb-vm:n-machine-word-bits))
19.352+ (movable foldable flushable))
19.353+
19.354+ (define-vop (numeric-char=)
19.355+ (:args (x :scs (sb-vm::character-reg sb-vm::character-stack)
19.356+ :target r
19.357+ :load-if (not (location= x r))))
19.358+ (:info y)
19.359+ (:arg-types (:constant character) character)
19.360+ (:results (r :scs (sb-vm::unsigned-reg)
19.361+ :load-if (not (location= x r))))
19.362+ (:result-types sb-vm::unsigned-num)
19.363+ (:translate numeric-char=)
19.364+ (:policy :fast-safe)
19.365+ (:note "inline constant numeric-char=")
19.366+ (:generator 1
19.367+ (move r x)
19.368+ (sb-vm::inst #:xor r (char-code y))))))
19.369+
19.370+;;; At each step, we may be able to find positions for which
19.371+;;; there can only be one character. If we emit the check for
19.372+;;; these positions as soon as possible, we avoid duplicating
19.373+;;; potentially a lot of code. Since benchmarks have shown
19.374+;;; it to be useful, this function implements the checks
19.375+;;; as a series of (zerop (logior (numeric-char= ...)...)),
19.376+;;; if there is more than one such check to emit.
19.377+
19.378+(defun emit-common-checks (strings to-check)
19.379+ (labels ((emit-char= (pairs)
19.380+ (mapcar (lambda (pair)
19.381+ (destructuring-bind (posn . char)
19.382+ pair
19.383+ `(numeric-char= ,char
19.384+ (aref ,*input-string* ,posn))))
19.385+ pairs))
19.386+ (emit-checking-form (common-chars)
19.387+ (when common-chars
19.388+ (let ((common-chars (sort common-chars '< :key 'car)))
19.389+ #+ (and) `(and ,@(mapcar
19.390+ (lambda (chunk)
19.391+ (if (null (rest chunk))
19.392+ (destructuring-bind ((posn . char))
19.393+ chunk
19.394+ `(eql ,char
19.395+ (aref ,*input-string* ,posn)))
19.396+ `(zerop
19.397+ (logior ,@(emit-char= chunk)))))
19.398+ (split-at common-chars 4)))
19.399+ #+ (or) `(and ,@(mapcar
19.400+ (lambda (pair)
19.401+ (destructuring-bind (posn . char)
19.402+ pair
19.403+ `(eql ,char
19.404+ (aref ,*input-string* ,posn))))
19.405+ common-chars))))))
19.406+ (let ((common-chars ())
19.407+ (left-to-check ()))
19.408+ (dolist (posn to-check (values (emit-checking-form common-chars)
19.409+ (nreverse left-to-check)))
19.410+ (if (all-equal strings :key (lambda (string)
19.411+ (aref string posn)))
19.412+ (push (cons posn (aref (first strings) posn))
19.413+ common-chars)
19.414+ (push posn left-to-check))))))
19.415+
19.416+;;; The driving function: First, emit any test that is
19.417+;;; common to all the candidates. If there's only one
19.418+;;; candidate, then we just have to execute the body;
19.419+;;; if not, we look for the `best' test and emit the
19.420+;;; corresponding code: execute the test, and recurse
19.421+;;; on the candidates that match the test and on those
19.422+;;; that don't.
19.423+
19.424+(defun make-search-tree (strings bodies to-check)
19.425+ (multiple-value-bind (guard to-check)
19.426+ (emit-common-checks strings to-check)
19.427+ (if (null (rest strings))
19.428+ (progn
19.429+ (assert (null to-check)) ; there shouldn't be anything left to check
19.430+ (if guard
19.431+ `(if ,guard
19.432+ (progn ,@(first bodies))
19.433+ ,*no-match-form*)
19.434+ `(progn ,@(first bodies))))
19.435+ (multiple-value-bind (posn char)
19.436+ (find-best-split strings to-check)
19.437+ (assert posn) ; this can only happen if all strings are equal
19.438+ (let ((=strings ())
19.439+ (=bodies ())
19.440+ (/=strings ())
19.441+ (/=bodies ()))
19.442+ (loop
19.443+ for string in strings
19.444+ for body in bodies
19.445+ do (if (eql char (aref string posn))
19.446+ (progn
19.447+ (push string =strings)
19.448+ (push body =bodies))
19.449+ (progn
19.450+ (push string /=strings)
19.451+ (push body /=bodies))))
19.452+ (let ((tree `(if (eql ,char (aref ,*input-string* ,posn))
19.453+ ,(make-search-tree =strings =bodies
19.454+ (remove posn to-check))
19.455+ ,(make-search-tree /=strings /=bodies
19.456+ to-check))))
19.457+ (if guard
19.458+ `(if ,guard
19.459+ ,tree
19.460+ ,*no-match-form*)
19.461+ tree)))))))
19.462+
19.463+;;; Finally, we can glue it all together.
19.464+;;; To recapitulate, first, dispatch on string
19.465+;;; length, then execute a search tree for the
19.466+;;; few candidates left, and finally make sure
19.467+;;; the input string actually matches the one
19.468+;;; candidate left at the leaf.
19.469+
19.470+(defun emit-string-case (cases input-var no-match)
19.471+ (flet ((case-string-length (x)
19.472+ (length (first x))))
19.473+ (let ((*input-string* input-var)
19.474+ (*no-match-form* no-match)
19.475+ (cases-lists (split-tree (sort cases '<
19.476+ :key #'case-string-length)
19.477+ :key #'case-string-length)))
19.478+ `(locally (declare (type vector ,input-var))
19.479+ (case (length ,input-var)
19.480+ ,@(loop for cases in cases-lists
19.481+ for length = (case-string-length (first cases))
19.482+ collect `((,length)
19.483+ ;; arrays with fill pointers expose the total length
19.484+ ;; in their type, not the position of the fill-pointer.
19.485+ ;; The type below only applies to simple-arrays.
19.486+ (locally (declare (type (or (not simple-array)
19.487+ (simple-array * (,length)))
19.488+ ,input-var))
19.489+ ,(make-search-tree (mapcar 'first cases)
19.490+ (mapcar 'rest cases)
19.491+ (iota length)))))
19.492+ (t ,no-match))))))
19.493+
19.494+;;; Just wrapping the previous function in a macro,
19.495+;;; and adding some error checking (the rest of the code
19.496+;;; just assumes there won't be duplicate patterns).
19.497+;;; Note how we use a local function instead of passing
19.498+;;; the default form directly. This can save a lot on
19.499+;;; code size, especially when the default form is
19.500+;;; large.
19.501+
19.502+(defmacro string-case ((string &key (default '(error "No match")))
19.503+ &body cases)
19.504+ "(string-case (string &key default)
19.505+ case*)
19.506+ case ::= string form*
19.507+ | t form*
19.508+ Where t is the default case."
19.509+ (let ((cases-table (make-hash-table :test 'equal)))
19.510+ "Error checking cruft"
19.511+ (dolist (case cases)
19.512+ (assert (typep case '(cons (or string (eql t)))))
19.513+ (let ((other-case (gethash (first case) cases-table)))
19.514+ (if other-case
19.515+ (warn "Duplicate string-case cases: ~A -> ~A or ~A~%"
19.516+ (first case)
19.517+ (rest other-case)
19.518+ (rest case))
19.519+ (setf (gethash (first case) cases-table)
19.520+ (rest case)))))
19.521+ (let ((input-var (gensym "INPUT"))
19.522+ (default-fn (gensym "ON-ERROR"))
19.523+ (default-body (gethash t cases-table (list default))))
19.524+ `(let ((,input-var ,string))
19.525+ (flet ((,default-fn ()
19.526+ ,@default-body))
19.527+ ,(emit-string-case (progn
19.528+ (remhash t cases-table)
19.529+ (hash-table->list cases-table))
19.530+ input-var
19.531+ `(,default-fn)))))))
20.1--- a/lisp/std/tests.lisp Fri Apr 12 21:19:02 2024 -0400
20.2+++ b/lisp/std/tests.lisp Sun Apr 14 01:19:10 2024 -0400
20.3@@ -62,7 +62,7 @@
20.4 (is (eq (make-keyword 'fizz) :fizz)))
20.5
20.6 ;;;; TODO
20.7-(deftest str ()
20.8+(deftest string ()
20.9 "Test standard string utils"
20.10 (is (typep "test" 'string-designator))
20.11 (is (typep 'test 'string-designator))
21.1--- a/lisp/std/thread.lisp Fri Apr 12 21:19:02 2024 -0400
21.2+++ b/lisp/std/thread.lisp Sun Apr 14 01:19:10 2024 -0400
21.3@@ -165,6 +165,9 @@
21.4 (defgeneric push-result (task pool))
21.5 (defgeneric push-worker (thread pool))
21.6 (defgeneric push-stage (stage pool))
21.7+(defgeneric start-task-pool (pool))
21.8+(defgeneric pause-task-pool (pool))
21.9+(defgeneric stop-task-pool (pool))
21.10
21.11 (defstruct task-pool
21.12 (oracle nil :type (or null oracle))
21.13@@ -184,7 +187,7 @@
21.14 (oracle-thread (task-pool-oracle self)))
21.15
21.16 (defmethod push-worker ((worker thread) (pool task-pool))
21.17- (vector-push-extend worker (task-pool-workers pool)))
21.18+ (vector-push worker (task-pool-workers pool)))
21.19
21.20 (defclass task ()
21.21 ((object :initarg :object :accessor task-object)))
22.1--- /dev/null Thu Jan 01 00:00:00 1970 +0000
22.2+++ b/lisp/std/types.lisp Sun Apr 14 01:19:10 2024 -0400
22.3@@ -0,0 +1,154 @@
22.4+;;; std/types.lisp --- Standard Types
22.5+
22.6+;;
22.7+
22.8+;;; Code:
22.9+(in-package :std)
22.10+(deftype array-index (&optional (length (1- array-dimension-limit)))
22.11+ "Type designator for an index into array of LENGTH: an integer between
22.12+0 (inclusive) and LENGTH (exclusive). LENGTH defaults to one less than
22.13+ARRAY-DIMENSION-LIMIT."
22.14+ `(integer 0 (,length)))
22.15+
22.16+(deftype array-length (&optional (length (1- array-dimension-limit)))
22.17+ "Type designator for a dimension of an array of LENGTH: an integer between
22.18+0 (inclusive) and LENGTH (inclusive). LENGTH defaults to one less than
22.19+ARRAY-DIMENSION-LIMIT."
22.20+ `(integer 0 ,length))
22.21+
22.22+;; This MACROLET will generate most of CDR5 (http://cdr.eurolisp.org/document/5/)
22.23+;; except the RATIO related definitions and ARRAY-INDEX.
22.24+(macrolet
22.25+ ((frob (type &optional (base-type type))
22.26+ (let ((subtype-names (list))
22.27+ (predicate-names (list)))
22.28+ (flet ((make-subtype-name (format-control)
22.29+ (let ((result (format-symbol :std format-control
22.30+ (symbol-name type))))
22.31+ (push result subtype-names)
22.32+ result))
22.33+ (make-predicate-name (sybtype-name)
22.34+ (let ((result (format-symbol :std '#:~A-p
22.35+ (symbol-name sybtype-name))))
22.36+ (push result predicate-names)
22.37+ result))
22.38+ (make-docstring (range-beg range-end range-type)
22.39+ (let ((inf (ecase range-type (:negative "-inf") (:positive "+inf"))))
22.40+ (format nil "Type specifier denoting the ~(~A~) range from ~A to ~A."
22.41+ type
22.42+ (if (equal range-beg ''*) inf (ensure-car range-beg))
22.43+ (if (equal range-end ''*) inf (ensure-car range-end))))))
22.44+ (let* ((negative-name (make-subtype-name '#:negative-~a))
22.45+ (non-positive-name (make-subtype-name '#:non-positive-~a))
22.46+ (non-negative-name (make-subtype-name '#:non-negative-~a))
22.47+ (positive-name (make-subtype-name '#:positive-~a))
22.48+ (negative-p-name (make-predicate-name negative-name))
22.49+ (non-positive-p-name (make-predicate-name non-positive-name))
22.50+ (non-negative-p-name (make-predicate-name non-negative-name))
22.51+ (positive-p-name (make-predicate-name positive-name))
22.52+ (negative-extremum)
22.53+ (positive-extremum)
22.54+ (below-zero)
22.55+ (above-zero)
22.56+ (zero))
22.57+ (setf (values negative-extremum below-zero
22.58+ above-zero positive-extremum zero)
22.59+ (ecase type
22.60+ (fixnum (values 'most-negative-fixnum -1 1 'most-positive-fixnum 0))
22.61+ (integer (values ''* -1 1 ''* 0))
22.62+ (rational (values ''* '(0) '(0) ''* 0))
22.63+ (real (values ''* '(0) '(0) ''* 0))
22.64+ (float (values ''* '(0.0E0) '(0.0E0) ''* 0.0E0))
22.65+ (short-float (values ''* '(0.0S0) '(0.0S0) ''* 0.0S0))
22.66+ (single-float (values ''* '(0.0F0) '(0.0F0) ''* 0.0F0))
22.67+ (double-float (values ''* '(0.0D0) '(0.0D0) ''* 0.0D0))
22.68+ (long-float (values ''* '(0.0L0) '(0.0L0) ''* 0.0L0))))
22.69+ `(progn
22.70+ (deftype ,negative-name ()
22.71+ ,(make-docstring negative-extremum below-zero :negative)
22.72+ `(,',base-type ,,negative-extremum ,',below-zero))
22.73+
22.74+ (deftype ,non-positive-name ()
22.75+ ,(make-docstring negative-extremum zero :negative)
22.76+ `(,',base-type ,,negative-extremum ,',zero))
22.77+
22.78+ (deftype ,non-negative-name ()
22.79+ ,(make-docstring zero positive-extremum :positive)
22.80+ `(,',base-type ,',zero ,,positive-extremum))
22.81+
22.82+ (deftype ,positive-name ()
22.83+ ,(make-docstring above-zero positive-extremum :positive)
22.84+ `(,',base-type ,',above-zero ,,positive-extremum))
22.85+
22.86+ (declaim (inline ,@predicate-names))
22.87+
22.88+ (defun ,negative-p-name (n)
22.89+ (and (typep n ',type)
22.90+ (< n ,zero)))
22.91+
22.92+ (defun ,non-positive-p-name (n)
22.93+ (and (typep n ',type)
22.94+ (<= n ,zero)))
22.95+
22.96+ (defun ,non-negative-p-name (n)
22.97+ (and (typep n ',type)
22.98+ (<= ,zero n)))
22.99+
22.100+ (defun ,positive-p-name (n)
22.101+ (and (typep n ',type)
22.102+ (< ,zero n)))))))))
22.103+ (frob fixnum integer)
22.104+ (frob integer)
22.105+ (frob rational)
22.106+ (frob real)
22.107+ (frob float)
22.108+ (frob short-float)
22.109+ (frob single-float)
22.110+ (frob double-float)
22.111+ (frob long-float))
22.112+
22.113+(defun of-type (type)
22.114+ "Returns a function of one argument, which returns true when its argument is
22.115+of TYPE."
22.116+ (lambda (thing) (typep thing type)))
22.117+
22.118+(define-compiler-macro of-type (&whole form type &environment env)
22.119+ ;; This can yeild a big benefit, but no point inlining the function
22.120+ ;; all over the place if TYPE is not constant.
22.121+ (if (constantp type env)
22.122+ (with-gensyms (thing)
22.123+ `(lambda (,thing)
22.124+ (typep ,thing ,type)))
22.125+ form))
22.126+
22.127+(declaim (inline type=))
22.128+(defun type= (type1 type2)
22.129+ "Returns a primary value of T if TYPE1 and TYPE2 are the same type,
22.130+and a secondary value that is true is the type equality could be reliably
22.131+determined: primary value of NIL and secondary value of T indicates that the
22.132+types are not equivalent."
22.133+ (multiple-value-bind (sub ok) (subtypep type1 type2)
22.134+ (cond ((and ok sub) ; type1 is known to be a subtype of type 2
22.135+ ; so type= return values come from the second invocation of subtypep
22.136+ (subtypep type2 type1))
22.137+ ;; type1 is assuredly NOT a subtype of type2,
22.138+ ;; so assuredly type1 and type2 cannot be type=
22.139+ (ok
22.140+ (values nil t))
22.141+ ;; our first result is uncertain ( ok == nil ) and it follows
22.142+ ;; from specification of SUBTYPEP that sub = ok = NIL
22.143+ (t
22.144+ (assert (not sub)) ; is the implementation correct?
22.145+ (multiple-value-bind (sub2 ok2)
22.146+ (subtypep type2 type1)
22.147+ (if (and (not sub2) ok2) ; we KNOW type2 is not a subtype of type1
22.148+ ;; so our results are certain...
22.149+ (values nil t)
22.150+ ;; otherwise, either type2 is surely a subtype of type1 (t t)
22.151+ ;; or type2 is not a subtype of type1, but we don't
22.152+ ;; know that for sure (nil nil)
22.153+ ;; In either case our result is negative but unsure
22.154+ (values nil nil)))))))
22.155+
22.156+(define-modify-macro coercef (type-spec) coerce
22.157+ "Modify-macro for COERCE.")