changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/parquet/gen.lisp

changeset 542: d83b4d25d5c9
parent: 10c4bb778030
child: b88bd4b0a039
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 12 Jul 2024 22:58:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; gen.lisp --- Parquet Lisp Code Generator
2 
3 ;;
4 
5 ;;; Code:
6 (in-package :dat/parquet/gen)
7 (defparameter *parquet-json-file*
8  (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")))
9 (defvar *parquet-json* nil)
10 (defun load-parquet-json (&optional (json-file *parquet-json-file*))
11  (with-open-file (file json-file)
12  (setq *parquet-json* (json-read file))))
13 
14 (defun %parquet-json-enums ()
15  (json-getf *parquet-json* "enums"))
16 
17 (defun parquet-json-enum-getf (name)
18  (json-getf
19  (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums))
20  "members"))
21 
22 (defvar *parquet-enums* nil)
23 
24 (defmacro def-parquet-enum (sym name)
25  `(progn
26  (defun ,(symbolicate "PARQUET-JSON-" sym) ()
27  (mapcar (lambda (x) (json-getf x "name")) (parquet-json-enum-getf ,name)))
28  (defparameter ,(intern
29  (concatenate 'string "*PARQUET-" (symbol-name sym) "*")
30  :dat/parquet)
31  (,(symbolicate "PARQUET-JSON-" sym)))))
32 
33 (defun camelcase-name-to-lisp-name (string)
34  (string-upcase
35  (with-output-to-string (name)
36  (loop for i from 0 below (length string)
37  for c across string
38  when (and (upper-case-p c) (not (zerop i)))
39  do (write-char #\- name)
40  do (write-char c name)))))
41 
42 (defun snakecase-name-to-lisp-name (string)
43  (string-upcase
44  (substitute #\- #\_ string)))
45 
46 (labels ((parse-type-id (type-id)
47  (string-case (type-id :default nil)
48  ("bool" 'boolean)
49  ("byte" 'signed-byte)
50  ("i16" '(signed-byte 16))
51  ("i32" '(signed-byte 32))
52  ("i64" '(signed-byte 64))
53  ("double" 'double-float)
54  ("string" 'string)
55  ("list" 'list)
56  ("binary" 'octet-vector)
57  ("set" 'list)))
58  (parse-type (o)
59  (let ((name (string-case ((json-getf o "typeId"))
60  ("union" (json-getf o "class"))
61  ("struct" (json-getf o "class"))
62  ("enum" (json-getf o "class")))))
63  (intern
64  (cond
65  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
66  (t (concatenate 'string
67  "PARQUET-"
68  (camelcase-name-to-lisp-name name))))
69  :dat/parquet))))
70  (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system
71  (let* ((type-id (parquet-struct-field-type-id field))
72  (type (parquet-struct-field-type field))
73  (required (parquet-struct-field-required field))
74  (unit-type (or (when type-id (parse-type-id type-id)) (when type (parse-type type)))))
75  (if (and (equal "optional" required) (not (equal unit-type 'list))) ;; (listp nil) = t
76  `(or null ,unit-type)
77  unit-type))))
78 
79 (defun parquet-json-enums ()
80  (list
81  (def-parquet-enum types "Type")
82  (def-parquet-enum converted-types "ConvertedType")
83  (def-parquet-enum field-repetition-types "FieldRepetitionType")
84  (def-parquet-enum encodings "Encoding")
85  (def-parquet-enum compression-codecs "CompressionCodec")
86  (def-parquet-enum page-types "PageType")
87  (def-parquet-enum boundary-orders "BoundaryOrder")))
88 
89 (defvar *parquet-structs* nil)
90 (defstruct (parquet-struct
91  (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
92  name doc exceptionp unionp (fields nil :type list))
93 
94 (defstruct (parquet-struct-field
95  (:constructor make-parquet-struct-field (key name type-id type doc required)))
96  key name type-id type doc required)
97 
98 (defun parquet-destruct-field (field)
99  (list (parquet-struct-field-name field)
100  (parquet-struct-field-key field)
101  (parquet-struct-field-doc field)
102  (parquet-struct-field-type-id field)
103  (parquet-struct-field-type field)
104  (parquet-struct-field-required field)))
105 
106 (defun parquet-destruct (struct)
107  (list (parquet-struct-name struct)
108  (parquet-struct-doc struct)
109  (parquet-struct-unionp struct)
110  (parquet-struct-exceptionp struct)
111  (mapcar #'parquet-destruct-field (parquet-struct-fields struct))))
112 
113 (defun parquet-json-structs () ;; name doc isException isUnion fields
114  (mapcar
115  (lambda (s)
116  (let ((name (json-getf s "name"))
117  (doc (json-getf s "doc"))
118  (exceptionp (json-getf s "isException"))
119  (unionp (json-getf s "isUnion"))
120  (fields (loop for f in (json-getf s "fields")
121  collect
122  (let ((key (json-getf f "key"))
123  (name (json-getf f "name"))
124  (type-id (json-getf f "typeId"))
125  ;; json object - needs additional parsing
126  (type (json-getf f "type"))
127  (doc (json-getf f "doc"))
128  (required (json-getf f "required")))
129  (make-parquet-struct-field key name type-id type doc required)))))
130  (make-parquet-struct name doc exceptionp unionp fields)))
131  (json-getf *parquet-json* "structs")))
132 
133 (defun parquet-json-namespaces ()
134  (json-getf *parquet-json* "namespaces"))
135 
136 (defun init-parquet-json (&optional (file *parquet-json-file*))
137  (load-parquet-json file)
138  (setq *parquet-enums* (parquet-json-enums))
139  (setq *parquet-structs* (parquet-json-structs)))
140 
141 ;;; CLOS
142 (defclass parquet-object () ())
143 
144 ;; (defmethod print-object ((obj parquet-object) stream)
145 ;; "Output a Parquet object to a stream."
146 ;; (print-unreadable-object (obj stream :type t)))
147 
148 (defmacro define-parquet-class (name superclasses slots &rest options)
149  "Define a new subclass of PARQUET-OBJECT with NAME."
150  `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((parquet-object))) ,slots ,@options))
151 
152 ;;; Codegen
153 
154 ;; 8)
155 (eval-always
156  (defun %define-parquet-structs ()
157  "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)."
158  (loop for struct in *parquet-structs*
159  unless (null struct)
160  collect (let ((name (parquet-struct-name struct))
161  (doc (parquet-struct-doc struct))
162  (fields (parquet-struct-fields struct)))
163  `(define-parquet-class ,(intern (cond
164  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
165  (t (concatenate 'string
166  "PARQUET-"
167  (camelcase-name-to-lisp-name name))))
168  :dat/parquet)
169  (parquet-struct-object)
170  (,@(mapcar (lambda (f)
171  (let ((fdoc (parquet-struct-field-doc f))
172  (fname (snakecase-name-to-lisp-name
173  (parquet-struct-field-name f))))
174  `(,(intern fname :dat/parquet)
175  ,@(when fdoc `(:documentation ,fdoc))
176  :initarg ,(keywordicate fname)
177  ;; TODO 2024-07-12:
178  ,@(when (equal "optional" (parquet-struct-field-required f))
179  `(:initform nil))
180  ,@(when-let ((ty (convert-parquet-struct-field-type f)))
181  `(:type ,ty)))))
182  fields))
183  ,@(when doc `((:documentation ,doc))))))))
184 
185 (defmacro define-parquet-structs ()
186  `(list
187  ,@(%define-parquet-structs)))
188 
189 (defmacro define-parquet-type (name opts &body body)
190  "Define a parquet type with DEFTYPE which maps to LISP-TYPE."
191  `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body))
192 
193 (defun define-parquet-types ()
194  "Define all known values in *PARQUET-TYPES* using DEFINE-PARQUET-TYPE (DEFTYPE)."
195  (list
196  (define-parquet-type "BOOLEAN" () 'boolean)
197  (define-parquet-type "INT32" () '(signed-byte 32))
198  (define-parquet-type "INT64" () '(signed-byte 64))
199  (define-parquet-type "INT96" () '(signed-byte 96))
200  (define-parquet-type "FLOAT" () 'float)
201  (define-parquet-type "DOUBLE" () 'double-float)
202  (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size))
203  (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size))))
204 
205 (defun load-parquet (&key (file *parquet-json-file*))
206  (init-parquet-json file)
207  (with-package (:dat/parquet)
208  (define-parquet-class parquet-enum-object () ())
209  (define-parquet-class parquet-struct-object () ())
210  (export (define-parquet-types))
211  (export (mapcar 'class-name (define-parquet-structs)))
212  (export *parquet-enums*)))