Mercurial > core / lisp/lib/dat/csv.lisp
changeset 277: |
10faf95f90dd |
parent: |
78ef6145e272
|
child: |
c0fc6b87557f |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Sun, 14 Apr 2024 01:19:10 -0400 |
permissions: |
-rw-r--r-- |
description: |
stream and basic type upgrades. fixed some bugs and improved csv parsing |
1 ;;; lib/dat/csv.lisp --- CSV Data Format 3 ;; Character Separated Values 8 (defun parse-number-no-error (string &optional default) 11 (parse-number string)))) 16 (defparameter *csv-separator* #\,) 17 (defparameter *csv-quote* #\") 18 (defparameter *csv-print-quote-p* t "print \" when the element is a string?") 19 (defparameter *csv-default-external-format* :utf-8) 21 (defun write-csv-line (record &key stream (delimiter *csv-separator*)) 22 "Accept a record and print it in one line as a csv record. 24 A record is a sequence of element. A element can be of any type. 25 If record is nil, nothing will be printed. 26 If stream is nil (default case), it will return a string, otherwise it will return nil. 27 For efficiency reason, no intermediate string will be constructed. " 29 (with-output-to-string (s) 30 (let ((*standard-output* s) 31 (record-size (length record))) 32 (loop for e across record 36 (if *csv-print-quote-p* 38 (write-char *csv-quote*) 40 (write-char *csv-quote*)) 43 (when (< i (1- record-size)) 44 (write-char delimiter))))))) 45 (format stream "~&~a" result))) 47 (defun write-csv-stream (stream table &key (delimiter *csv-separator*)) 48 "Accept a stream and a table and output the table as csv form to the stream. 50 A table is a sequence of lines. A line is a sequence of elements. 51 Elements can be any types" 52 (loop for l across table 53 do (write-csv-line l :stream stream :delimiter delimiter)) 54 (write-char #\newline stream) 57 (defun write-csv-file (filename table &key (external-format *csv-default-external-format*) (delimiter *csv-separator*)) 58 "Accept a filename and a table and output the table as csv form to the file. 60 A table is a sequence of lines. A line is a sequence of elements. 61 Elements can be any types" 62 (with-open-file (f filename :direction :output 63 :if-does-not-exist :create 65 :external-format external-format) 66 (write-csv-stream f table :delimiter delimiter))) 68 (defun parse-csv-string (str &key (delimiter *csv-separator*)) ;; refer RFC4180 70 ;; (regexp:split-re "," str) 71 (let ((q-count (count *csv-quote* str :test #'char-equal))) 72 (when (oddp q-count) (warn 'simple-warning :format-control "odd number of #\" in a line (~A)" 73 :format-arguments (list q-count))) 75 (cl-ppcre:split delimiter str) ;(cl-ppcre:split *csv-separator* str) 76 (macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds))) 77 (loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q 78 with field with fields 79 for chr of-type character across str 80 do (cond ((eq state :at-first) 82 (cond ((char-equal chr *csv-quote*) (setf state :data-q)) 83 ((char-equal chr delimiter) (push "" fields)) 84 (t (setf state :data-nq) (push chr field)))) 86 (cond ((char-equal chr *csv-quote*) (setf state :q-in-nq)) 87 ((char-equal chr delimiter) 89 (setf state :at-first)) 90 (t (push chr field)))) 92 (cond ((char-equal chr *csv-quote*) (setf state :q-in-q)) 93 ((char-equal chr delimiter) 95 (setf state :at-first)) 96 (t (setf state :data-nq) (push chr field)))) 98 (cond ((char-equal chr *csv-quote*) (setf state :q-in-q)) 99 ((char-equal chr delimiter) (push-f field fields) (setf state :at-first)) 100 (t (push chr field)))) 102 (cond ((char-equal chr *csv-quote*) (setf state :data-q)) 103 ;; this should only be done conditionally - early escapes quotes 104 ((char-equal chr delimiter) 105 (push-f field fields) 106 (setf state :at-first)) 108 ;; (error "illegal value ( ~A ) after quotation" chr) 112 (progn (push-f field fields) (reverse fields))))))) 116 (defun read-csv-line (stream &key type-conv-fns map-fns (delimiter *csv-separator*) (start 0) end) 117 "Read one line from stream and return a csv record. 119 A CSV record is a vector of elements. 121 type-conv-fns should be a list of functions. 122 If type-conv-fns is nil (the default case), then all will be treated 125 map-fns is a list of functions of one argument and output one result. 126 each function in it will be applied to the parsed element. 127 If map-fns is nil, then nothing will be applied. 129 start and end specifies how many elements per record will be included. 130 If start or end is negative, it counts from the end. -1 is the last element. 132 (declare (type (or (simple-array function *) null) type-conv-fns map-fns)) 133 (let* ((rline (read-line stream nil nil))) 135 (let* ((line (string-trim '(#\Space #\Newline #\Return) rline)) 136 (strs (parse-csv-string line :delimiter delimiter)) 137 (strs-size (length strs))) 139 (setf start (+ start strs-size))) 140 (when (and end (< end 0)) 141 (setf end (+ end strs-size))) 142 (setf strs (subseq strs start end)) 144 (unless (= (length strs) (length type-conv-fns)) 145 (error "Number of type specifier (~a) does not match the number of elements (~a)." 146 (length type-conv-fns) (length strs)))) 148 (unless (= (length strs) (length map-fns)) 149 (error "Number of mapping functions (~a) does not match the number of elements (~a)." 150 (length map-fns) (length strs)))) 152 ;; strs is not needed so we simply overwrite it 155 (map 'vector #'funcall type-conv-fns result))) 158 (map 'vector #'funcall map-fns result))) 161 (defun read-csv-stream (stream &key (header t) type-spec map-fns (delimiter *csv-separator*) (start 0) end) 162 "Read from stream until eof and return a csv table. 164 A csv table is a vector of csv records. 165 A csv record is a vector of elements. 167 Type spec should be a list of type specifier (symbols). 168 If the type specifier is nil or t, it will be treated as string. 169 If type-spec is nil (the default case), then all will be treated 172 map-fns is a list of functions of one argument and output one result. 173 each function in it will be applied to the parsed element. 174 If any function in the list is nil or t, it equals to #'identity. 175 If map-fns is nil, then nothing will be applied. 177 start and end specifies how many elements per record will be included. 178 If start or end is negative, it counts from the end. -1 is the last element. 182 (macrolet ((make-num-specifier (specifier) 183 `(lambda (s) (let ((s (parse-number-no-error s s))) 184 (if (numberp s) (funcall ,specifier s) s))))) 188 ((t nil string) #'identity) 189 (number #'(lambda (s) (parse-number-no-error s s))) 190 (float (make-num-specifier #'float)) 191 (single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float)))) 192 (double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float)))) 193 (integer (make-num-specifier #'round)) 194 (pathname #'pathname) 196 (keyword (lambda (s) (intern s :keyword))))) 209 (symbol-function fn)) 210 (t (error "~a is not a valid function specifier." fn)))) 214 (cons (coerce header 'vector)) 215 (boolean (when header 216 (read-csv-line stream :delimiter delimiter)))))) 217 (loop for rec = (read-csv-line stream :type-conv-fns type-conv-fns :map-fns map-fns :delimiter delimiter 218 :start start :end end) 220 collect rec into result 223 (coerce result 'vector) 226 (defun read-csv-file (filename &key (header t) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*) 228 "Read from stream until eof and return a csv table. 230 A csv table is a vector of csv records. 231 A csv record is a vector of elements. 233 Type spec should be a list of type specifier (symbols). 234 If the type specifier is nil or t, it will be treated as string. 235 If type-spec is nil (the default case), then all will be treated 238 map-fns is a list of functions of one argument and output one result. 239 each function in it will be applied to the parsed element. 240 If any function in the list is nil or t, it equals to #'identity. 241 If map-fns is nil, then nothing will be applied. 242 https://cgit.gentoo.org/proj/lisp.git/tree/dev-lisp/cl-rsm-finance/cl-rsm-finance-1.1.ebuild?h=old-portage&id=e9b71910b0d4f22aeb66f14e158a2451f9955b0d 243 external-format (default is shift-jis) is a valid AllegroCL external-format type. 245 OS is a set to eol-convention of the file stream. 247 start and end specifies how many elements per record will be included. 248 If start or end is negative, it counts from the end. -1 is the last element. 250 (with-open-file (f filename :external-format external-format) 251 (read-csv-stream f :type-spec type-spec :map-fns map-fns 253 :start start :end end 257 (defun read-csv-file-and-sort (filename sort-order &key (header t) (order :ascend) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*)) 258 (let ((table (read-csv-file filename 263 :external-format external-format))) 264 (loop for i in (reverse sort-order) 266 (stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=)) 267 :key (lambda (rec) (aref rec i)))) 268 finally (return table))))