changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/csv.lisp

changeset 277: 10faf95f90dd
parent: 78ef6145e272
child: c0fc6b87557f
author: Richard Westhaver <ellis@rwest.io>
date: Sun, 14 Apr 2024 01:19:10 -0400
permissions: -rw-r--r--
description: stream and basic type upgrades. fixed some bugs and improved csv parsing
1 ;;; lib/dat/csv.lisp --- CSV Data Format
2 
3 ;; Character Separated Values
4 
5 ;;; Code:
6 (in-package :dat/csv)
7 
8 (defun parse-number-no-error (string &optional default)
9  (let ((result
10  (ignore-errors
11  (parse-number string))))
12  (if result
13  result
14  default)))
15 
16 (defparameter *csv-separator* #\,)
17 (defparameter *csv-quote* #\")
18 (defparameter *csv-print-quote-p* t "print \" when the element is a string?")
19 (defparameter *csv-default-external-format* :utf-8)
20 
21 (defun write-csv-line (record &key stream (delimiter *csv-separator*))
22  "Accept a record and print it in one line as a csv record.
23 
24 A record is a sequence of element. A element can be of any type.
25 If record is nil, nothing will be printed.
26 If stream is nil (default case), it will return a string, otherwise it will return nil.
27 For efficiency reason, no intermediate string will be constructed. "
28  (let ((result
29  (with-output-to-string (s)
30  (let ((*standard-output* s)
31  (record-size (length record)))
32  (loop for e across record
33  for i from 0
34  do (typecase e
35  (string (progn
36  (if *csv-print-quote-p*
37  (progn
38  (write-char *csv-quote*)
39  (write-string e)
40  (write-char *csv-quote*))
41  (write-string e))))
42  (t (princ e)))
43  (when (< i (1- record-size))
44  (write-char delimiter)))))))
45  (format stream "~&~a" result)))
46 
47 (defun write-csv-stream (stream table &key (delimiter *csv-separator*))
48  "Accept a stream and a table and output the table as csv form to the stream.
49 
50 A table is a sequence of lines. A line is a sequence of elements.
51 Elements can be any types"
52  (loop for l across table
53  do (write-csv-line l :stream stream :delimiter delimiter))
54  (write-char #\newline stream)
55  '(ok))
56 
57 (defun write-csv-file (filename table &key (external-format *csv-default-external-format*) (delimiter *csv-separator*))
58  "Accept a filename and a table and output the table as csv form to the file.
59 
60 A table is a sequence of lines. A line is a sequence of elements.
61 Elements can be any types"
62  (with-open-file (f filename :direction :output
63  :if-does-not-exist :create
64  :if-exists :supersede
65  :external-format external-format)
66  (write-csv-stream f table :delimiter delimiter)))
67 
68 (defun parse-csv-string (str &key (delimiter *csv-separator*)) ;; refer RFC4180
69  (coerce
70  ;; (regexp:split-re "," str)
71  (let ((q-count (count *csv-quote* str :test #'char-equal)))
72  (when (oddp q-count) (warn 'simple-warning :format-control "odd number of #\" in a line (~A)"
73  :format-arguments (list q-count)))
74  (if (zerop q-count)
75  (cl-ppcre:split delimiter str) ;(cl-ppcre:split *csv-separator* str)
76  (macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds)))
77  (loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q
78  with field with fields
79  for chr of-type character across str
80  do (cond ((eq state :at-first)
81  (setf field nil)
82  (cond ((char-equal chr *csv-quote*) (setf state :data-q))
83  ((char-equal chr delimiter) (push "" fields))
84  (t (setf state :data-nq) (push chr field))))
85  ((eq state :data-nq)
86  (cond ((char-equal chr *csv-quote*) (setf state :q-in-nq))
87  ((char-equal chr delimiter)
88  (push-f field fields)
89  (setf state :at-first))
90  (t (push chr field))))
91  ((eq state :q-in-nq)
92  (cond ((char-equal chr *csv-quote*) (setf state :q-in-q))
93  ((char-equal chr delimiter)
94  (push-f field fields)
95  (setf state :at-first))
96  (t (setf state :data-nq) (push chr field))))
97  ((eq state :data-q)
98  (cond ((char-equal chr *csv-quote*) (setf state :q-in-q))
99  ((char-equal chr delimiter) (push-f field fields) (setf state :at-first))
100  (t (push chr field))))
101  ((eq state :q-in-q)
102  (cond ((char-equal chr *csv-quote*) (setf state :data-q))
103  ;; this should only be done conditionally - early escapes quotes
104  ((char-equal chr delimiter)
105  (push-f field fields)
106  (setf state :at-first))
107  (t
108  ;; (error "illegal value ( ~A ) after quotation" chr)
109  (push chr field)
110  ))))
111  finally (return
112  (progn (push-f field fields) (reverse fields)))))))
113  'vector))
114 
115 
116 (defun read-csv-line (stream &key type-conv-fns map-fns (delimiter *csv-separator*) (start 0) end)
117  "Read one line from stream and return a csv record.
118 
119 A CSV record is a vector of elements.
120 
121 type-conv-fns should be a list of functions.
122 If type-conv-fns is nil (the default case), then all will be treated
123 as string.
124 
125 map-fns is a list of functions of one argument and output one result.
126 each function in it will be applied to the parsed element.
127 If map-fns is nil, then nothing will be applied.
128 
129 start and end specifies how many elements per record will be included.
130 If start or end is negative, it counts from the end. -1 is the last element.
131 "
132  (declare (type (or (simple-array function *) null) type-conv-fns map-fns))
133  (let* ((rline (read-line stream nil nil)))
134  (when rline
135  (let* ((line (string-trim '(#\Space #\Newline #\Return) rline))
136  (strs (parse-csv-string line :delimiter delimiter))
137  (strs-size (length strs)))
138  (when (< start 0)
139  (setf start (+ start strs-size)))
140  (when (and end (< end 0))
141  (setf end (+ end strs-size)))
142  (setf strs (subseq strs start end))
143  (when type-conv-fns
144  (unless (= (length strs) (length type-conv-fns))
145  (error "Number of type specifier (~a) does not match the number of elements (~a)."
146  (length type-conv-fns) (length strs))))
147  (when map-fns
148  (unless (= (length strs) (length map-fns))
149  (error "Number of mapping functions (~a) does not match the number of elements (~a)."
150  (length map-fns) (length strs))))
151  (let ((result strs))
152  ;; strs is not needed so we simply overwrite it
153  (when type-conv-fns
154  (setf result
155  (map 'vector #'funcall type-conv-fns result)))
156  (when map-fns
157  (setf result
158  (map 'vector #'funcall map-fns result)))
159  result)))))
160 
161 (defun read-csv-stream (stream &key (header t) type-spec map-fns (delimiter *csv-separator*) (start 0) end)
162  "Read from stream until eof and return a csv table.
163 
164 A csv table is a vector of csv records.
165 A csv record is a vector of elements.
166 
167 Type spec should be a list of type specifier (symbols).
168 If the type specifier is nil or t, it will be treated as string.
169 If type-spec is nil (the default case), then all will be treated
170 as string.
171 
172 map-fns is a list of functions of one argument and output one result.
173 each function in it will be applied to the parsed element.
174 If any function in the list is nil or t, it equals to #'identity.
175 If map-fns is nil, then nothing will be applied.
176 
177 start and end specifies how many elements per record will be included.
178 If start or end is negative, it counts from the end. -1 is the last element.
179 "
180  (let ((type-conv-fns
181  (when type-spec
182  (macrolet ((make-num-specifier (specifier)
183  `(lambda (s) (let ((s (parse-number-no-error s s)))
184  (if (numberp s) (funcall ,specifier s) s)))))
185  (map 'vector
186  (lambda (type)
187  (ecase type
188  ((t nil string) #'identity)
189  (number #'(lambda (s) (parse-number-no-error s s)))
190  (float (make-num-specifier #'float))
191  (single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float))))
192  (double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float))))
193  (integer (make-num-specifier #'round))
194  (pathname #'pathname)
195  (symbol #'intern)
196  (keyword (lambda (s) (intern s :keyword)))))
197  type-spec))))
198  (map-fns
199  (when map-fns
200  (map 'vector
201  (lambda (fn)
202  (cond ((or (eq fn t)
203  (eq fn nil))
204  #'identity)
205  ((functionp fn)
206  fn)
207  ((and (symbolp fn)
208  (not (keywordp fn)))
209  (symbol-function fn))
210  (t (error "~a is not a valid function specifier." fn))))
211  map-fns)))
212  (header
213  (etypecase header
214  (cons (coerce header 'vector))
215  (boolean (when header
216  (read-csv-line stream :delimiter delimiter))))))
217  (loop for rec = (read-csv-line stream :type-conv-fns type-conv-fns :map-fns map-fns :delimiter delimiter
218  :start start :end end)
219  while rec
220  collect rec into result
221  finally (return
222  (values
223  (coerce result 'vector)
224  header)))))
225 
226 (defun read-csv-file (filename &key (header t) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*)
227  (start 0) end)
228  "Read from stream until eof and return a csv table.
229 
230 A csv table is a vector of csv records.
231 A csv record is a vector of elements.
232 
233 Type spec should be a list of type specifier (symbols).
234 If the type specifier is nil or t, it will be treated as string.
235 If type-spec is nil (the default case), then all will be treated
236 as string.
237 
238 map-fns is a list of functions of one argument and output one result.
239 each function in it will be applied to the parsed element.
240 If any function in the list is nil or t, it equals to #'identity.
241 If map-fns is nil, then nothing will be applied.
242 https://cgit.gentoo.org/proj/lisp.git/tree/dev-lisp/cl-rsm-finance/cl-rsm-finance-1.1.ebuild?h=old-portage&id=e9b71910b0d4f22aeb66f14e158a2451f9955b0d
243 external-format (default is shift-jis) is a valid AllegroCL external-format type.
244 
245 OS is a set to eol-convention of the file stream.
246 
247 start and end specifies how many elements per record will be included.
248 If start or end is negative, it counts from the end. -1 is the last element.
249 "
250  (with-open-file (f filename :external-format external-format)
251  (read-csv-stream f :type-spec type-spec :map-fns map-fns
252  :delimiter delimiter
253  :start start :end end
254  :header header)))
255 
256 
257 (defun read-csv-file-and-sort (filename sort-order &key (header t) (order :ascend) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*))
258  (let ((table (read-csv-file filename
259  :header header
260  :type-spec type-spec
261  :map-fns map-fns
262  :delimiter delimiter
263  :external-format external-format)))
264  (loop for i in (reverse sort-order)
265  do (setf table
266  (stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=))
267  :key (lambda (rec) (aref rec i))))
268  finally (return table))))