Mercurial > core / lisp/lib/dat/csv.lisp
changeset 698: |
96958d3eb5b0 |
parent: |
d3e2829521a3
|
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Fri, 04 Oct 2024 22:04:59 -0400 |
permissions: |
-rw-r--r-- |
description: |
fixes |
1 ;;; lib/dat/csv.lisp --- CSV Data Format 3 ;; Comma Separated Values (or tabs or whatever) 7 ;; This package prioritizes flexibility. If you want speed, convert to 10 ;; Still, efficiency is worth pursuing here and there are some obvious gaps to 13 ;; - remove sequence functions 14 ;; - research optimized access patterns used in other langs/state of art 18 ;; ref: https://datatracker.ietf.org/doc/html/rfc4180 23 (defun parse-number-no-error (string &optional default) 26 (parse-number string)))) 31 (defparameter *csv-separator* #\,) 32 (defparameter *csv-quote* #\") 33 (defparameter *csv-print-quote-p* t "print \" when the element is a string?") 34 (defparameter *csv-default-external-format* :utf-8) 36 (defun write-csv-line (record &key stream (delimiter *csv-separator*)) 37 "Accept a record and print it in one line as a csv record. 39 A record is a sequence of element. A element can be of any type. 40 If record is nil, nothing will be printed. 41 If stream is nil (default case), it will return a string, otherwise it will return nil. 42 For efficiency reason, no intermediate string will be constructed. " 44 (with-output-to-string (s) 45 (let ((*standard-output* s) 46 (record-size (length record))) 47 (loop for e across record 51 (if *csv-print-quote-p* 53 (write-char *csv-quote*) 55 (write-char *csv-quote*)) 58 (when (< i (1- record-size)) 59 (write-char delimiter))))))) 60 (format stream "~&~a" result))) 62 (defun write-csv-stream (stream table &key (delimiter *csv-separator*)) 63 "Accept a stream and a table and output the table as csv form to the stream. 65 A table is a sequence of lines. A line is a sequence of elements. 66 Elements can be any types" 67 (loop for l across table 68 do (write-csv-line l :stream stream :delimiter delimiter)) 69 (write-char #\newline stream) 72 (defun write-csv-file (filename table &key (external-format *csv-default-external-format*) (delimiter *csv-separator*)) 73 "Accept a filename and a table and output the table as csv form to the file. 75 A table is a sequence of lines. A line is a sequence of elements. 76 Elements can be any types" 77 (with-open-file (f filename :direction :output 78 :if-does-not-exist :create 80 :external-format external-format) 81 (write-csv-stream f table :delimiter delimiter))) 83 (defun write-csv-string (table) 84 (with-output-to-string (str) 85 (write-csv-stream str table))) 87 (defun parse-csv-string (str &key (delimiter *csv-separator*)) ;; refer RFC4180 89 ;; (regexp:split-re "," str) 90 (let ((q-count (count *csv-quote* str :test #'char-equal))) 91 (when (oddp q-count) (warn 'simple-warning :format-control "odd number of #\" in a line (~A)" 92 :format-arguments (list q-count))) 94 (cl-ppcre:split delimiter str) ;(cl-ppcre:split *csv-separator* str) 95 (macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds))) 96 (loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q 97 with field with fields 98 for chr of-type character across str 99 do (cond ((eq state :at-first) 101 (cond ((char-equal chr *csv-quote*) (setf state :data-q)) 102 ((char-equal chr delimiter) (push "" fields)) 103 (t (setf state :data-nq) (push chr field)))) 105 (cond ((char-equal chr *csv-quote*) (setf state :q-in-nq)) 106 ((char-equal chr delimiter) 107 (push-f field fields) 108 (setf state :at-first)) 109 (t (push chr field)))) 111 (cond ((char-equal chr *csv-quote*) (setf state :q-in-q)) 112 ((char-equal chr delimiter) 113 (push-f field fields) 114 (setf state :at-first)) 115 (t (setf state :data-nq) (push chr field)))) 117 (cond ((char-equal chr *csv-quote*) (setf state :q-in-q)) 118 ((char-equal chr delimiter) (push-f field fields) (setf state :at-first)) 119 (t (push chr field)))) 121 (cond ((char-equal chr *csv-quote*) (setf state :data-q)) 122 ;; this should only be done conditionally - early escapes quotes 123 ((char-equal chr delimiter) 124 (push-f field fields) 125 (setf state :at-first)) 127 ;; (error "illegal value ( ~A ) after quotation" chr) 131 (progn (push-f field fields) (reverse fields))))))) 135 (defun read-csv-line (stream &key type-conv-fns map-fns (delimiter *csv-separator*) (start 0) end) 136 "Read one line from stream and return a csv record. 138 A CSV record is a vector of elements. 140 type-conv-fns should be a list of functions. 141 If type-conv-fns is nil (the default case), then all will be treated 144 map-fns is a list of functions of one argument and output one result. 145 each function in it will be applied to the parsed element. 146 If map-fns is nil, then nothing will be applied. 148 start and end specifies how many elements per record will be included. 149 If start or end is negative, it counts from the end. -1 is the last element. 151 (declare (type (or (simple-array function *) null) type-conv-fns map-fns)) 152 (let* ((rline (read-line stream nil nil))) 154 (let* ((line (string-trim '(#\Space #\Newline #\Return) rline)) 155 (strs (parse-csv-string line :delimiter delimiter)) 156 (strs-size (length strs))) 158 (setf start (+ start strs-size))) 159 (when (and end (< end 0)) 160 (setf end (+ end strs-size))) 161 (setf strs (subseq strs start end)) 163 (unless (= (length strs) (length type-conv-fns)) 164 (error "Number of type specifier (~a) does not match the number of elements (~a)." 165 (length type-conv-fns) (length strs)))) 167 (unless (= (length strs) (length map-fns)) 168 (error "Number of mapping functions (~a) does not match the number of elements (~a)." 169 (length map-fns) (length strs)))) 171 ;; strs is not needed so we simply overwrite it 174 (map 'vector #'funcall type-conv-fns result))) 177 (map 'vector #'funcall map-fns result))) 180 (defun read-csv-stream (stream &key (header t) type-spec map-fns (delimiter *csv-separator*) (start 0) end) 181 "Read from stream until eof and return a csv table. 183 A csv table is a vector of csv records. 184 A csv record is a vector of elements. 186 Type spec should be a list of type specifier (symbols). 187 If the type specifier is nil or t, it will be treated as string. 188 If type-spec is nil (the default case), then all will be treated 191 map-fns is a list of functions of one argument and output one result. 192 each function in it will be applied to the parsed element. 193 If any function in the list is nil or t, it equals to #'identity. 194 If map-fns is nil, then nothing will be applied. 196 start and end specifies how many elements per record will be included. 197 If start or end is negative, it counts from the end. -1 is the last element. 201 (macrolet ((make-num-specifier (specifier) 202 `(lambda (s) (let ((s (parse-number-no-error s s))) 203 (if (numberp s) (funcall ,specifier s) s))))) 207 ((t nil string) #'identity) 208 (number #'(lambda (s) (parse-number-no-error s s))) 209 (float (make-num-specifier #'float)) 210 (single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float)))) 211 (double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float)))) 212 (integer (make-num-specifier #'round)) 213 (pathname #'pathname) 215 (keyword (lambda (s) (intern s :keyword))))) 228 (symbol-function fn)) 229 (t (error "~a is not a valid function specifier." fn)))) 233 (cons (coerce header 'vector)) 234 (boolean (when header 235 (read-csv-line stream :delimiter delimiter)))))) 236 (loop for rec = (read-csv-line stream :type-conv-fns type-conv-fns :map-fns map-fns :delimiter delimiter 237 :start start :end end) 239 collect rec into result 242 (coerce result 'vector) 245 (defun read-csv-file (filename &key (header t) 248 (delimiter *csv-separator*) 249 (external-format *csv-default-external-format*) 252 "Read from stream until eof and return a csv table. 254 A csv table is a vector of csv records. 255 A csv record is a vector of elements. 257 Type spec should be a list of type specifier (symbols). 258 If the type specifier is nil or t, it will be treated as string. 259 If type-spec is nil (the default case), then all will be treated 262 map-fns is a list of functions of one argument and output one result. 263 each function in it will be applied to the parsed element. 264 If any function in the list is nil or t, it equals to #'identity. 265 If map-fns is nil, then nothing will be applied. 267 external-format (default is :UTF-8) 269 start and end specifies how many elements per record will be included. 270 If start or end is negative, it counts from the end. -1 is the last element. 272 (with-open-file (f filename :external-format external-format) 273 (read-csv-stream f :type-spec type-spec :map-fns map-fns 275 :start start :end end 279 (defun read-csv-file-and-sort (filename sort-order &key (header t) (order :ascend) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*)) 280 (let ((table (read-csv-file filename 285 :external-format external-format))) 286 (loop for i in (reverse sort-order) 288 (stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=)) 289 :key (lambda (rec) (aref rec i)))) 290 finally (return table)))) 292 (defclass csv-file-data (file-data-source) ()) 295 (defmethod scan-data ((self csv-file-data) (projection sequence)) 296 (if (null projection) 297 (read-csv-file (file-data-path self))