changelog shortlog graph tags branches changeset file revisions annotate raw help

Mercurial > core / lisp/lib/dat/csv.lisp

revision 122: 4ba88cac5bc7
child 123: a4ed30cbe083
     1.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2+++ b/lisp/lib/dat/csv.lisp	Sun Dec 24 01:06:20 2023 -0500
     1.3@@ -0,0 +1,260 @@
     1.4+(in-package :dat/csv)
     1.5+
     1.6+(defun parse-number-no-error (string &optional default)
     1.7+  (let ((result
     1.8+         (ignore-errors
     1.9+          (parse-number string))))
    1.10+    (if result
    1.11+        result
    1.12+      default)))
    1.13+
    1.14+(defparameter *csv-separator* #\,)
    1.15+(defparameter *csv-quote* #\")
    1.16+(defparameter *csv-print-quote-p* nil "print \" when the element is a string?")
    1.17+(defparameter *csv-default-external-format* #+allegro :932 #+ccl :Windows-31j #+(or sbcl lispworks) :sjis)
    1.18+
    1.19+(defun write-csv-line (record &key stream (delimiter *csv-separator*))
    1.20+  "Accept a record and print it in one line as a csv record.
    1.21+
    1.22+A record is a sequence of element. A element can be of any type.
    1.23+If record is nil, nothing will be printed.
    1.24+If stream is nil (default case), it will return a string, otherwise it will return nil.
    1.25+For efficiency reason, no intermediate string will be constructed. "
    1.26+  (let ((result
    1.27+         (with-output-to-string (s)
    1.28+           (let ((*standard-output* s)
    1.29+                 (record-size (length record)))
    1.30+             (loop for e across record
    1.31+                   for i from 0
    1.32+                   do (typecase e
    1.33+                        (string (progn
    1.34+                                  (if *csv-print-quote-p*
    1.35+                                      (progn
    1.36+                                        (write-char *csv-quote*)
    1.37+                                        (write-string e)
    1.38+                                        (write-char *csv-quote*))
    1.39+                                      (write-string e))))
    1.40+                        (t (princ e)))
    1.41+                      (when (< i (1- record-size))
    1.42+                        (write-char delimiter)))))))
    1.43+    (format stream "~&~a" result)))
    1.44+
    1.45+(defun write-csv-stream (stream table &key (delimiter *csv-separator*))
    1.46+  "Accept a stream and a table and output the table as csv form to the stream.
    1.47+
    1.48+A table is a sequence of lines. A line is a sequence of elements.
    1.49+Elements can be any types"
    1.50+  (loop for l across table
    1.51+        do (write-csv-line l :stream stream :delimiter delimiter))
    1.52+  (write-char #\newline stream)
    1.53+  '(ok))
    1.54+
    1.55+(defun write-csv-file (filename table &key (external-format *csv-default-external-format*) (delimiter *csv-separator*))
    1.56+  "Accept a filename and a table and output the table as csv form to the file.
    1.57+
    1.58+A table is a sequence of lines. A line is a sequence of elements.
    1.59+Elements can be any types"
    1.60+  (with-open-file (f filename :direction :output
    1.61+                     :if-does-not-exist :create
    1.62+                     :if-exists :supersede
    1.63+                     :external-format external-format)
    1.64+    (write-csv-stream f table :delimiter delimiter)))
    1.65+
    1.66+(defun parse-csv-string (str &key (delimiter *csv-separator*)) ;; refer RFC4180
    1.67+  (coerce
    1.68+   ;; (regexp:split-re "," str)
    1.69+   (let ((q-count (count *csv-quote* str :test #'char-equal)))
    1.70+     (cond ((zerop q-count) (cl-ppcre:split *csv-separator* str)) ;(cl-ppcre:split *csv-separator* str)
    1.71+           ((evenp q-count)
    1.72+            (macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds)))
    1.73+              (loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q
    1.74+                  with field with fields
    1.75+                  for chr of-type character across str
    1.76+                  do (cond ((eq state :at-first)
    1.77+                            (setf field nil)
    1.78+                            (cond ((char-equal chr *csv-quote*) (setf state :data-q))
    1.79+                                  ((char-equal chr delimiter) (push "" fields))
    1.80+                                  (t (setf state :data-nq) (push chr field))))
    1.81+                           ((eq state :data-nq)
    1.82+                            (cond ((char-equal chr *csv-quote*) (setf state :q-in-nq))
    1.83+                                  ((char-equal chr delimiter)
    1.84+                                   (push-f field fields)
    1.85+                                   (setf state :at-first))
    1.86+                                  (t (push chr field))))
    1.87+                           ((eq state :q-in-nq)
    1.88+                            (cond ((char-equal chr *csv-quote*) (error "#\" inside the non quoted field"))
    1.89+                                  ((char-equal chr delimiter)
    1.90+                                   (push-f field fields)
    1.91+                                   (setf state :at-first))
    1.92+                                  (t (setf state :data-nq) (push chr field))))
    1.93+                           ((eq state :data-q)
    1.94+                            (if (char-equal chr *csv-quote*) (setf state :q-in-q)
    1.95+                              (push chr field)))
    1.96+                           ((eq state :q-in-q)
    1.97+                            (cond ((char-equal chr *csv-quote*) (push chr field) (setf state :data-q))
    1.98+                                  ((char-equal chr delimiter)
    1.99+                                   (push-f field fields)
   1.100+                                   (setf state :at-first))
   1.101+                                  (t (error "illegal value ( ~A ) after quotation" chr)))))
   1.102+                  finally (return
   1.103+                            (progn (push-f field fields) (reverse fields))))))
   1.104+           (t (error "odd number of \" ( ~A ) in a line." q-count))))
   1.105+   'vector))
   1.106+
   1.107+
   1.108+(defun read-csv-line (stream &key type-conv-fns map-fns (delimiter *csv-separator*) (start 0) end)
   1.109+  "Read one line from stream and return a csv record.
   1.110+
   1.111+A CSV record is a vector of elements.
   1.112+
   1.113+type-conv-fns should be a list of functions.
   1.114+If type-conv-fns is nil (the default case), then all will be treated
   1.115+as string.
   1.116+
   1.117+map-fns is a list of functions of one argument and output one result.
   1.118+each function in it will be applied to the parsed element.
   1.119+If map-fns is nil, then nothing will be applied.
   1.120+
   1.121+start and end specifies how many elements per record will be included.
   1.122+If start or end is negative, it counts from the end. -1 is the last element.
   1.123+"
   1.124+  (declare (type (or (simple-array function *) null) type-conv-fns map-fns))
   1.125+  (let* ((rline (read-line stream nil nil)))
   1.126+
   1.127+    (when rline
   1.128+      (let* ((line (string-trim '(#\Space #\Tab #\Newline #\Return) rline))
   1.129+             (strs (parse-csv-string line :delimiter delimiter))
   1.130+             (strs-size (length strs)))
   1.131+        (when (< start 0)
   1.132+          (setf start (+ start strs-size)))
   1.133+        (when (and end (< end 0))
   1.134+          (setf end (+ end strs-size)))
   1.135+        (setf strs (subseq strs start end))
   1.136+        (when type-conv-fns
   1.137+          (unless (= (length strs) (length type-conv-fns))
   1.138+            (error "Number of type specifier (~a) does not match the number of elements (~a)."
   1.139+                   (length strs) (length type-conv-fns))))
   1.140+        (when map-fns
   1.141+          (unless (= (length strs) (length map-fns))
   1.142+            (error "Number of mapping functions (~a) does not match the number of elements (~a)."
   1.143+                   (length strs) (length map-fns))))
   1.144+        (let ((result strs))
   1.145+          ;; strs is not needed so we simply overwrite it
   1.146+          (when type-conv-fns
   1.147+            (setf result
   1.148+              (map 'vector #'funcall type-conv-fns result)))
   1.149+          (when map-fns
   1.150+            (setf result
   1.151+              (map 'vector #'funcall map-fns result)))
   1.152+          result)))))
   1.153+
   1.154+(defun read-csv-stream (stream &key (header t) type-spec map-fns (delimiter *csv-separator*) (start 0) end)
   1.155+  "Read from stream until eof and return a csv table.
   1.156+
   1.157+A csv table is a vector of csv records.
   1.158+A csv record is a vector of elements.
   1.159+
   1.160+Type spec should be a list of type specifier (symbols).
   1.161+If the type specifier is nil or t, it will be treated as string.
   1.162+If type-spec is nil (the default case), then all will be treated
   1.163+as string.
   1.164+
   1.165+map-fns is a list of functions of one argument and output one result.
   1.166+each function in it will be applied to the parsed element.
   1.167+If any function in the list is nil or t, it equals to #'identity.
   1.168+If map-fns is nil, then nothing will be applied.
   1.169+
   1.170+start and end specifies how many elements per record will be included.
   1.171+If start or end is negative, it counts from the end. -1 is the last element.
   1.172+"
   1.173+  (let ((type-conv-fns
   1.174+         (when type-spec
   1.175+           (macrolet ((make-num-specifier (specifier)
   1.176+                        `(lambda (s) (let ((s (parse-number-no-error s s)))
   1.177+                                       (if (numberp s) (funcall ,specifier s) s)))))
   1.178+           (map 'vector
   1.179+             (lambda (type)
   1.180+               (ecase type
   1.181+                 ((t nil string) #'identity)
   1.182+                 (number #'(lambda (s) (parse-number-no-error s s)))
   1.183+                 (float (make-num-specifier #'float))
   1.184+                 (single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float))))
   1.185+                 (double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float))))
   1.186+                 (integer (make-num-specifier #'round))
   1.187+                 (pathname #'pathname)
   1.188+                 (symbol #'intern)
   1.189+                 (keyword (lambda (s) (intern s :keyword)))))
   1.190+             type-spec))))
   1.191+        (map-fns
   1.192+         (when map-fns
   1.193+           (map 'vector
   1.194+             (lambda (fn)
   1.195+               (cond ((or (eq fn t)
   1.196+                          (eq fn nil))
   1.197+                      #'identity)
   1.198+                     ((functionp fn)
   1.199+                      fn)
   1.200+                     ((and (symbolp fn)
   1.201+                           (not (keywordp fn)))
   1.202+                      (symbol-function fn))
   1.203+                     (t (error "~a is not a valid function specifier." fn))))
   1.204+             map-fns)))
   1.205+        (header
   1.206+         (etypecase header
   1.207+             (cons (coerce header 'vector))
   1.208+             (boolean (when header
   1.209+                        (read-csv-line stream :delimiter delimiter))))))
   1.210+    (loop for rec = (read-csv-line stream :type-conv-fns type-conv-fns :map-fns map-fns :delimiter delimiter
   1.211+                                   :start start :end end)
   1.212+        while rec
   1.213+        collect rec into result
   1.214+        finally (return
   1.215+                  (values
   1.216+                   (coerce result 'vector)
   1.217+                   header)))))
   1.218+
   1.219+(defun read-csv-file (filename &key (header t) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*)
   1.220+                      (os :anynl-dos) (start 0) end)
   1.221+  "Read from stream until eof and return a csv table.
   1.222+
   1.223+A csv table is a vector of csv records.
   1.224+A csv record is a vector of elements.
   1.225+
   1.226+Type spec should be a list of type specifier (symbols).
   1.227+If the type specifier is nil or t, it will be treated as string.
   1.228+If type-spec is nil (the default case), then all will be treated
   1.229+as string.
   1.230+
   1.231+map-fns is a list of functions of one argument and output one result.
   1.232+each function in it will be applied to the parsed element.
   1.233+If any function in the list is nil or t, it equals to #'identity.
   1.234+If map-fns is nil, then nothing will be applied.
   1.235+https://cgit.gentoo.org/proj/lisp.git/tree/dev-lisp/cl-rsm-finance/cl-rsm-finance-1.1.ebuild?h=old-portage&id=e9b71910b0d4f22aeb66f14e158a2451f9955b0d
   1.236+external-format (default is shift-jis) is a valid AllegroCL external-format type.
   1.237+
   1.238+OS is a set to eol-convention of the file stream.
   1.239+
   1.240+start and end specifies how many elements per record will be included.
   1.241+If start or end is negative, it counts from the end. -1 is the last element.
   1.242+"
   1.243+  #+sbcl (declare (ignorable os))
   1.244+  (with-open-file (f filename :external-format external-format)
   1.245+    #+allegro (setf (excl:eol-convention f) os)
   1.246+    (read-csv-stream f :type-spec type-spec :map-fns map-fns
   1.247+                     :delimiter delimiter
   1.248+                     :start start :end end
   1.249+                     :header header)))
   1.250+
   1.251+
   1.252+(defun read-csv-file-and-sort (filename sort-order &key (header t) (order :ascend) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*))
   1.253+  (let ((table (read-csv-file filename
   1.254+                              :header header
   1.255+                              :type-spec type-spec
   1.256+                              :map-fns map-fns
   1.257+                              :delimiter delimiter
   1.258+                              :external-format external-format)))
   1.259+    (loop for i in (reverse sort-order)
   1.260+        do (setf table
   1.261+             (stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=))
   1.262+                          :key (lambda (rec) (aref rec i))))
   1.263+        finally (return table))))