Mercurial > core / lisp/lib/dat/csv.lisp
changeset 580: |
571685ae64f1 |
parent: |
c0fc6b87557f
|
child: |
d3e2829521a3 |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Mon, 05 Aug 2024 21:57:13 -0400 |
permissions: |
-rw-r--r-- |
description: |
queries, cli fixes, dat/csv, emacs org-columns |
1 ;;; lib/dat/csv.lisp --- CSV Data Format 3 ;; Comma Separated Values (or tabs or whatever) 7 ;; This package prioritizes flexibility. If you want speed, convert to 10 ;; Still, efficiency is worth pursuing here and there are some obvious gaps to 13 ;; - remove sequence functions 14 ;; - research optimized access patterns used in other langs/state of art 21 (defun parse-number-no-error (string &optional default) 24 (parse-number string)))) 29 (defparameter *csv-separator* #\,) 30 (defparameter *csv-quote* #\") 31 (defparameter *csv-print-quote-p* t "print \" when the element is a string?") 32 (defparameter *csv-default-external-format* :utf-8) 34 (defun write-csv-line (record &key stream (delimiter *csv-separator*)) 35 "Accept a record and print it in one line as a csv record. 37 A record is a sequence of element. A element can be of any type. 38 If record is nil, nothing will be printed. 39 If stream is nil (default case), it will return a string, otherwise it will return nil. 40 For efficiency reason, no intermediate string will be constructed. " 42 (with-output-to-string (s) 43 (let ((*standard-output* s) 44 (record-size (length record))) 45 (loop for e across record 49 (if *csv-print-quote-p* 51 (write-char *csv-quote*) 53 (write-char *csv-quote*)) 56 (when (< i (1- record-size)) 57 (write-char delimiter))))))) 58 (format stream "~&~a" result))) 60 (defun write-csv-stream (stream table &key (delimiter *csv-separator*)) 61 "Accept a stream and a table and output the table as csv form to the stream. 63 A table is a sequence of lines. A line is a sequence of elements. 64 Elements can be any types" 65 (loop for l across table 66 do (write-csv-line l :stream stream :delimiter delimiter)) 67 (write-char #\newline stream) 70 (defun write-csv-file (filename table &key (external-format *csv-default-external-format*) (delimiter *csv-separator*)) 71 "Accept a filename and a table and output the table as csv form to the file. 73 A table is a sequence of lines. A line is a sequence of elements. 74 Elements can be any types" 75 (with-open-file (f filename :direction :output 76 :if-does-not-exist :create 78 :external-format external-format) 79 (write-csv-stream f table :delimiter delimiter))) 81 (defun parse-csv-string (str &key (delimiter *csv-separator*)) ;; refer RFC4180 83 ;; (regexp:split-re "," str) 84 (let ((q-count (count *csv-quote* str :test #'char-equal))) 85 (when (oddp q-count) (warn 'simple-warning :format-control "odd number of #\" in a line (~A)" 86 :format-arguments (list q-count))) 88 (cl-ppcre:split delimiter str) ;(cl-ppcre:split *csv-separator* str) 89 (macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds))) 90 (loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q 91 with field with fields 92 for chr of-type character across str 93 do (cond ((eq state :at-first) 95 (cond ((char-equal chr *csv-quote*) (setf state :data-q)) 96 ((char-equal chr delimiter) (push "" fields)) 97 (t (setf state :data-nq) (push chr field)))) 99 (cond ((char-equal chr *csv-quote*) (setf state :q-in-nq)) 100 ((char-equal chr delimiter) 101 (push-f field fields) 102 (setf state :at-first)) 103 (t (push chr field)))) 105 (cond ((char-equal chr *csv-quote*) (setf state :q-in-q)) 106 ((char-equal chr delimiter) 107 (push-f field fields) 108 (setf state :at-first)) 109 (t (setf state :data-nq) (push chr field)))) 111 (cond ((char-equal chr *csv-quote*) (setf state :q-in-q)) 112 ((char-equal chr delimiter) (push-f field fields) (setf state :at-first)) 113 (t (push chr field)))) 115 (cond ((char-equal chr *csv-quote*) (setf state :data-q)) 116 ;; this should only be done conditionally - early escapes quotes 117 ((char-equal chr delimiter) 118 (push-f field fields) 119 (setf state :at-first)) 121 ;; (error "illegal value ( ~A ) after quotation" chr) 125 (progn (push-f field fields) (reverse fields))))))) 129 (defun read-csv-line (stream &key type-conv-fns map-fns (delimiter *csv-separator*) (start 0) end) 130 "Read one line from stream and return a csv record. 132 A CSV record is a vector of elements. 134 type-conv-fns should be a list of functions. 135 If type-conv-fns is nil (the default case), then all will be treated 138 map-fns is a list of functions of one argument and output one result. 139 each function in it will be applied to the parsed element. 140 If map-fns is nil, then nothing will be applied. 142 start and end specifies how many elements per record will be included. 143 If start or end is negative, it counts from the end. -1 is the last element. 145 (declare (type (or (simple-array function *) null) type-conv-fns map-fns)) 146 (let* ((rline (read-line stream nil nil))) 148 (let* ((line (string-trim '(#\Space #\Newline #\Return) rline)) 149 (strs (parse-csv-string line :delimiter delimiter)) 150 (strs-size (length strs))) 152 (setf start (+ start strs-size))) 153 (when (and end (< end 0)) 154 (setf end (+ end strs-size))) 155 (setf strs (subseq strs start end)) 157 (unless (= (length strs) (length type-conv-fns)) 158 (error "Number of type specifier (~a) does not match the number of elements (~a)." 159 (length type-conv-fns) (length strs)))) 161 (unless (= (length strs) (length map-fns)) 162 (error "Number of mapping functions (~a) does not match the number of elements (~a)." 163 (length map-fns) (length strs)))) 165 ;; strs is not needed so we simply overwrite it 168 (map 'vector #'funcall type-conv-fns result))) 171 (map 'vector #'funcall map-fns result))) 174 (defun read-csv-stream (stream &key (header t) type-spec map-fns (delimiter *csv-separator*) (start 0) end) 175 "Read from stream until eof and return a csv table. 177 A csv table is a vector of csv records. 178 A csv record is a vector of elements. 180 Type spec should be a list of type specifier (symbols). 181 If the type specifier is nil or t, it will be treated as string. 182 If type-spec is nil (the default case), then all will be treated 185 map-fns is a list of functions of one argument and output one result. 186 each function in it will be applied to the parsed element. 187 If any function in the list is nil or t, it equals to #'identity. 188 If map-fns is nil, then nothing will be applied. 190 start and end specifies how many elements per record will be included. 191 If start or end is negative, it counts from the end. -1 is the last element. 195 (macrolet ((make-num-specifier (specifier) 196 `(lambda (s) (let ((s (parse-number-no-error s s))) 197 (if (numberp s) (funcall ,specifier s) s))))) 201 ((t nil string) #'identity) 202 (number #'(lambda (s) (parse-number-no-error s s))) 203 (float (make-num-specifier #'float)) 204 (single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float)))) 205 (double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float)))) 206 (integer (make-num-specifier #'round)) 207 (pathname #'pathname) 209 (keyword (lambda (s) (intern s :keyword))))) 222 (symbol-function fn)) 223 (t (error "~a is not a valid function specifier." fn)))) 227 (cons (coerce header 'vector)) 228 (boolean (when header 229 (read-csv-line stream :delimiter delimiter)))))) 230 (loop for rec = (read-csv-line stream :type-conv-fns type-conv-fns :map-fns map-fns :delimiter delimiter 231 :start start :end end) 233 collect rec into result 236 (coerce result 'vector) 239 (defun read-csv-file (filename &key (header t) 242 (delimiter *csv-separator*) 243 (external-format *csv-default-external-format*) 246 "Read from stream until eof and return a csv table. 248 A csv table is a vector of csv records. 249 A csv record is a vector of elements. 251 Type spec should be a list of type specifier (symbols). 252 If the type specifier is nil or t, it will be treated as string. 253 If type-spec is nil (the default case), then all will be treated 256 map-fns is a list of functions of one argument and output one result. 257 each function in it will be applied to the parsed element. 258 If any function in the list is nil or t, it equals to #'identity. 259 If map-fns is nil, then nothing will be applied. 261 external-format (default is :UTF-8) 263 start and end specifies how many elements per record will be included. 264 If start or end is negative, it counts from the end. -1 is the last element. 266 (with-open-file (f filename :external-format external-format) 267 (read-csv-stream f :type-spec type-spec :map-fns map-fns 269 :start start :end end 273 (defun read-csv-file-and-sort (filename sort-order &key (header t) (order :ascend) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*)) 274 (let ((table (read-csv-file filename 279 :external-format external-format))) 280 (loop for i in (reverse sort-order) 282 (stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=)) 283 :key (lambda (rec) (aref rec i)))) 284 finally (return table)))) 286 (defclass csv-file-data (file-data-source) ()) 289 (defmethod scan-data ((self csv-file-data) (projection sequence)) 290 (if (null projection) 291 (read-csv-file (file-data-path self))