changelog shortlog graph tags branches changeset files file revisions raw help

Mercurial > core / annotate lisp/lib/dat/csv.lisp

changeset 698: 96958d3eb5b0
parent: d3e2829521a3
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
123
a4ed30cbe083 data testing, added ical and vcard formats
ellis <ellis@rwest.io>
parents: 122
diff changeset
1
 ;;; lib/dat/csv.lisp --- CSV Data Format
a4ed30cbe083 data testing, added ical and vcard formats
ellis <ellis@rwest.io>
parents: 122
diff changeset
2
 
580
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
3
 ;; Comma Separated Values (or tabs or whatever)
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
4
 
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
5
 ;;; Commentary:
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
6
 
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
7
 ;; This package prioritizes flexibility. If you want speed, convert to
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
8
 ;; parquet.
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
9
 
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
10
 ;; Still, efficiency is worth pursuing here and there are some obvious gaps to
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
11
 ;; remedy.
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
12
 
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
13
 ;; - remove sequence functions
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
14
 ;; - research optimized access patterns used in other langs/state of art
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
15
 ;; - buffered reads
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
16
 ;; - multithreading
123
a4ed30cbe083 data testing, added ical and vcard formats
ellis <ellis@rwest.io>
parents: 122
diff changeset
17
 
581
d3e2829521a3 tmux work, fuzzer
Richard Westhaver <ellis@rwest.io>
parents: 580
diff changeset
18
 ;; ref: https://datatracker.ietf.org/doc/html/rfc4180
d3e2829521a3 tmux work, fuzzer
Richard Westhaver <ellis@rwest.io>
parents: 580
diff changeset
19
 
123
a4ed30cbe083 data testing, added ical and vcard formats
ellis <ellis@rwest.io>
parents: 122
diff changeset
20
 ;;; Code:
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
21
 (in-package :dat/csv)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
22
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
23
 (defun parse-number-no-error (string &optional default)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
24
   (let ((result
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
25
           (ignore-errors
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
26
            (parse-number string))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
27
     (if result
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
28
         result
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
29
         default)))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
30
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
31
 (defparameter *csv-separator* #\,)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
32
 (defparameter *csv-quote* #\")
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
33
 (defparameter *csv-print-quote-p* t "print \" when the element is a string?")
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
34
 (defparameter *csv-default-external-format* :utf-8)
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
35
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
36
 (defun write-csv-line (record &key stream (delimiter *csv-separator*))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
37
   "Accept a record and print it in one line as a csv record.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
38
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
39
 A record is a sequence of element. A element can be of any type.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
40
 If record is nil, nothing will be printed.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
41
 If stream is nil (default case), it will return a string, otherwise it will return nil.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
42
 For efficiency reason, no intermediate string will be constructed. "
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
43
   (let ((result
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
44
           (with-output-to-string (s)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
45
             (let ((*standard-output* s)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
46
                   (record-size (length record)))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
47
               (loop for e across record
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
48
                     for i from 0
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
49
                     do (typecase e
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
50
                          (string (progn
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
51
                                    (if *csv-print-quote-p*
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
52
                                        (progn
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
53
                                          (write-char *csv-quote*)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
54
                                          (write-string e)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
55
                                          (write-char *csv-quote*))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
56
                                        (write-string e))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
57
                          (t (princ e)))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
58
                        (when (< i (1- record-size))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
59
                          (write-char delimiter)))))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
60
     (format stream "~&~a" result)))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
61
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
62
 (defun write-csv-stream (stream table &key (delimiter *csv-separator*))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
63
   "Accept a stream and a table and output the table as csv form to the stream.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
64
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
65
 A table is a sequence of lines. A line is a sequence of elements.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
66
 Elements can be any types"
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
67
   (loop for l across table
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
68
         do (write-csv-line l :stream stream :delimiter delimiter))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
69
   (write-char #\newline stream)
465
c0fc6b87557f messing around
Richard Westhaver <ellis@rwest.io>
parents: 277
diff changeset
70
   nil)
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
71
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
72
 (defun write-csv-file (filename table &key (external-format *csv-default-external-format*) (delimiter *csv-separator*))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
73
   "Accept a filename and a table and output the table as csv form to the file.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
74
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
75
 A table is a sequence of lines. A line is a sequence of elements.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
76
 Elements can be any types"
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
77
   (with-open-file (f filename :direction :output
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
78
                               :if-does-not-exist :create
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
79
                               :if-exists :supersede
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
80
                               :external-format external-format)
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
81
     (write-csv-stream f table :delimiter delimiter)))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
82
 
581
d3e2829521a3 tmux work, fuzzer
Richard Westhaver <ellis@rwest.io>
parents: 580
diff changeset
83
 (defun write-csv-string (table)
d3e2829521a3 tmux work, fuzzer
Richard Westhaver <ellis@rwest.io>
parents: 580
diff changeset
84
   (with-output-to-string (str)
d3e2829521a3 tmux work, fuzzer
Richard Westhaver <ellis@rwest.io>
parents: 580
diff changeset
85
     (write-csv-stream str table)))
d3e2829521a3 tmux work, fuzzer
Richard Westhaver <ellis@rwest.io>
parents: 580
diff changeset
86
 
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
87
 (defun parse-csv-string (str &key (delimiter *csv-separator*)) ;; refer RFC4180
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
88
   (coerce
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
89
    ;; (regexp:split-re "," str)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
90
    (let ((q-count (count *csv-quote* str :test #'char-equal)))
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
91
      (when (oddp q-count) (warn 'simple-warning :format-control "odd number of #\" in a line (~A)"
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
92
                                                 :format-arguments (list q-count)))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
93
      (if (zerop q-count)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
94
          (cl-ppcre:split delimiter str) ;(cl-ppcre:split *csv-separator* str)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
95
          (macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds)))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
96
            (loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
97
                  with field with fields
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
98
                  for chr of-type character across str
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
99
                  do (cond ((eq state :at-first)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
100
                            (setf field nil)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
101
                            (cond ((char-equal chr *csv-quote*) (setf state :data-q))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
102
                                  ((char-equal chr delimiter) (push "" fields))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
103
                                  (t (setf state :data-nq) (push chr field))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
104
                           ((eq state :data-nq)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
105
                            (cond ((char-equal chr *csv-quote*) (setf state :q-in-nq))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
106
                                  ((char-equal chr delimiter)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
107
                                   (push-f field fields)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
108
                                   (setf state :at-first))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
109
                                  (t (push chr field))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
110
                           ((eq state :q-in-nq)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
111
                            (cond ((char-equal chr *csv-quote*) (setf state :q-in-q))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
112
                                  ((char-equal chr delimiter)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
113
                                   (push-f field fields)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
114
                                   (setf state :at-first))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
115
                                  (t (setf state :data-nq) (push chr field))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
116
                           ((eq state :data-q)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
117
                            (cond ((char-equal chr *csv-quote*) (setf state :q-in-q))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
118
                                  ((char-equal chr delimiter) (push-f field fields) (setf state :at-first))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
119
                                  (t (push chr field))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
120
                           ((eq state :q-in-q)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
121
                            (cond ((char-equal chr *csv-quote*) (setf state :data-q))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
122
                                  ;; this should only be done conditionally - early escapes quotes
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
123
                                  ((char-equal chr delimiter)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
124
                                   (push-f field fields)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
125
                                   (setf state :at-first))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
126
                                  (t
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
127
                                   ;; (error "illegal value ( ~A ) after quotation" chr)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
128
                                   (push chr field)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
129
                                   ))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
130
                  finally (return
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
131
                            (progn (push-f field fields) (reverse fields)))))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
132
    'vector))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
133
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
134
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
135
 (defun read-csv-line (stream &key type-conv-fns map-fns (delimiter *csv-separator*) (start 0) end)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
136
   "Read one line from stream and return a csv record.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
137
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
138
 A CSV record is a vector of elements.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
139
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
140
 type-conv-fns should be a list of functions.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
141
 If type-conv-fns is nil (the default case), then all will be treated
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
142
 as string.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
143
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
144
 map-fns is a list of functions of one argument and output one result.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
145
 each function in it will be applied to the parsed element.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
146
 If map-fns is nil, then nothing will be applied.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
147
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
148
 start and end specifies how many elements per record will be included.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
149
 If start or end is negative, it counts from the end. -1 is the last element.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
150
 "
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
151
   (declare (type (or (simple-array function *) null) type-conv-fns map-fns))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
152
   (let* ((rline (read-line stream nil nil)))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
153
     (when rline
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
154
       (let* ((line (string-trim '(#\Space #\Newline #\Return) rline))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
155
              (strs (parse-csv-string line :delimiter delimiter))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
156
              (strs-size (length strs)))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
157
         (when (< start 0)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
158
           (setf start (+ start strs-size)))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
159
         (when (and end (< end 0))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
160
           (setf end (+ end strs-size)))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
161
         (setf strs (subseq strs start end))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
162
         (when type-conv-fns
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
163
           (unless (= (length strs) (length type-conv-fns))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
164
             (error "Number of type specifier (~a) does not match the number of elements (~a)."
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
165
                    (length type-conv-fns) (length strs))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
166
         (when map-fns
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
167
           (unless (= (length strs) (length map-fns))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
168
             (error "Number of mapping functions (~a) does not match the number of elements (~a)."
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
169
                    (length map-fns) (length strs))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
170
         (let ((result strs))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
171
           ;; strs is not needed so we simply overwrite it
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
172
           (when type-conv-fns
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
173
             (setf result
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
174
                   (map 'vector #'funcall type-conv-fns result)))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
175
           (when map-fns
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
176
             (setf result
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
177
                   (map 'vector #'funcall map-fns result)))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
178
           result)))))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
179
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
180
 (defun read-csv-stream (stream &key (header t) type-spec map-fns (delimiter *csv-separator*) (start 0) end)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
181
   "Read from stream until eof and return a csv table.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
182
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
183
 A csv table is a vector of csv records.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
184
 A csv record is a vector of elements.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
185
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
186
 Type spec should be a list of type specifier (symbols).
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
187
 If the type specifier is nil or t, it will be treated as string.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
188
 If type-spec is nil (the default case), then all will be treated
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
189
 as string.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
190
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
191
 map-fns is a list of functions of one argument and output one result.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
192
 each function in it will be applied to the parsed element.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
193
 If any function in the list is nil or t, it equals to #'identity.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
194
 If map-fns is nil, then nothing will be applied.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
195
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
196
 start and end specifies how many elements per record will be included.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
197
 If start or end is negative, it counts from the end. -1 is the last element.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
198
 "
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
199
   (let ((type-conv-fns
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
200
           (when type-spec
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
201
             (macrolet ((make-num-specifier (specifier)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
202
                          `(lambda (s) (let ((s (parse-number-no-error s s)))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
203
                                         (if (numberp s) (funcall ,specifier s) s)))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
204
               (map 'vector
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
205
                    (lambda (type)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
206
                      (ecase type
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
207
                        ((t nil string) #'identity)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
208
                        (number #'(lambda (s) (parse-number-no-error s s)))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
209
                        (float (make-num-specifier #'float))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
210
                        (single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
211
                        (double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
212
                        (integer (make-num-specifier #'round))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
213
                        (pathname #'pathname)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
214
                        (symbol #'intern)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
215
                        (keyword (lambda (s) (intern s :keyword)))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
216
                    type-spec))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
217
         (map-fns
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
218
           (when map-fns
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
219
             (map 'vector
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
220
                  (lambda (fn)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
221
                    (cond ((or (eq fn t)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
222
                               (eq fn nil))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
223
                           #'identity)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
224
                          ((functionp fn)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
225
                           fn)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
226
                          ((and (symbolp fn)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
227
                                (not (keywordp fn)))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
228
                           (symbol-function fn))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
229
                          (t (error "~a is not a valid function specifier." fn))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
230
                  map-fns)))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
231
         (header
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
232
           (etypecase header
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
233
             (cons (coerce header 'vector))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
234
             (boolean (when header
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
235
                        (read-csv-line stream :delimiter delimiter))))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
236
     (loop for rec = (read-csv-line stream :type-conv-fns type-conv-fns :map-fns map-fns :delimiter delimiter
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
237
                                           :start start :end end)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
238
           while rec
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
239
           collect rec into result
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
240
           finally (return
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
241
                     (values
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
242
                      (coerce result 'vector)
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
243
                      header)))))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
244
 
580
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
245
 (defun read-csv-file (filename &key (header t)
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
246
                                     type-spec
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
247
                                     map-fns
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
248
                                     (delimiter *csv-separator*)
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
249
                                     (external-format *csv-default-external-format*)
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
250
                                     (start 0)
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
251
                                     end)
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
252
   "Read from stream until eof and return a csv table.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
253
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
254
 A csv table is a vector of csv records.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
255
 A csv record is a vector of elements.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
256
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
257
 Type spec should be a list of type specifier (symbols).
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
258
 If the type specifier is nil or t, it will be treated as string.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
259
 If type-spec is nil (the default case), then all will be treated
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
260
 as string.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
261
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
262
 map-fns is a list of functions of one argument and output one result.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
263
 each function in it will be applied to the parsed element.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
264
 If any function in the list is nil or t, it equals to #'identity.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
265
 If map-fns is nil, then nothing will be applied.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
266
 
580
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
267
 external-format (default is :UTF-8)
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
268
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
269
 start and end specifies how many elements per record will be included.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
270
 If start or end is negative, it counts from the end. -1 is the last element.
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
271
 "
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
272
   (with-open-file (f filename :external-format external-format)
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
273
     (read-csv-stream f :type-spec type-spec :map-fns map-fns
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
274
                        :delimiter delimiter
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
275
                        :start start :end end
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
276
                        :header header)))
122
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
277
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
278
 
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
279
 (defun read-csv-file-and-sort (filename sort-order &key (header t) (order :ascend) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
280
   (let ((table (read-csv-file filename
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
281
                               :header header
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
282
                               :type-spec type-spec
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
283
                               :map-fns map-fns
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
284
                               :delimiter delimiter
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
285
                               :external-format external-format)))
4ba88cac5bc7 num/parse, added DAT system, net/fetch, time/local, refactored trees
ellis <ellis@rwest.io>
parents:
diff changeset
286
     (loop for i in (reverse sort-order)
277
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
287
           do (setf table
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
288
                    (stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
289
                                 :key (lambda (rec) (aref rec i))))
10faf95f90dd stream and basic type upgrades. fixed some bugs and improved csv parsing
Richard Westhaver <ellis@rwest.io>
parents: 275
diff changeset
290
           finally (return table))))
580
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
291
 
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
292
 (defclass csv-file-data (file-data-source) ())
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
293
 
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
294
 ;; TODO 2024-08-05: 
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
295
 (defmethod scan-data ((self csv-file-data) (projection sequence))
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
296
   (if (null projection)
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
297
       (read-csv-file (file-data-path self))
571685ae64f1 queries, cli fixes, dat/csv, emacs org-columns
Richard Westhaver <ellis@rwest.io>
parents: 465
diff changeset
298
       (nyi!)))