changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/parquet/gen.lisp

changeset 544: ec1d4d544c36
parent: b88bd4b0a039
child: b57066450cfa
author: Richard Westhaver <ellis@rwest.io>
date: Sat, 13 Jul 2024 18:18:01 -0400
permissions: -rw-r--r--
description: parquet expansion, init leb128, add little-endian octet encoders
1 ;;; gen.lisp --- Parquet Lisp Code Generator
2 
3 ;;
4 
5 ;;; Code:
6 (defpackage :dat/parquet/gen ;; not public API
7  (:use :cl :std :dat/proto :dat/json)
8  (:export :load-parquet))
9 
10 (in-package :dat/parquet/gen)
11 (defparameter *parquet-json-file*
12  (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")))
13 (defvar *parquet-json* nil)
14 (defun load-parquet-json (&optional (json-file *parquet-json-file*))
15  (with-open-file (file json-file)
16  (setq *parquet-json* (json-read file))))
17 
18 (defun %parquet-json-enums ()
19  (json-getf *parquet-json* "enums"))
20 
21 (defun parquet-json-enum-getf (name)
22  (json-getf
23  (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums))
24  "members"))
25 
26 (defvar *parquet-enums* nil)
27 
28 (defmacro def-parquet-enum (sym name)
29  `(progn
30  (defun ,(symbolicate "PARQUET-JSON-" sym) ()
31  (mapcar (lambda (x) (keywordicate (snakecase-name-to-lisp-name (json-getf x "name"))))
32  (parquet-json-enum-getf ,name)))
33  (defparameter ,(intern
34  (concatenate 'string "*PARQUET-" (symbol-name sym) "*")
35  :dat/parquet)
36  (,(symbolicate "PARQUET-JSON-" sym)))))
37 
38 (defun camelcase-name-to-lisp-name (string)
39  (string-upcase
40  (with-output-to-string (name)
41  (loop for i from 0 below (length string)
42  for c across string
43  when (and (upper-case-p c) (not (zerop i)))
44  do (write-char #\- name)
45  do (write-char c name)))))
46 
47 (defun snakecase-name-to-lisp-name (string)
48  (string-upcase
49  (substitute #\- #\_ string)))
50 
51 (labels ((parse-type-id (type-id)
52  (string-case (type-id :default nil)
53  ("bool" 'boolean)
54  ("byte" 'signed-byte)
55  ("i16" '(signed-byte 16))
56  ("i32" '(signed-byte 32))
57  ("i64" '(signed-byte 64))
58  ("double" 'double-float)
59  ("string" 'string)
60  ("list" 'list)
61  ("binary" 'octet-vector)
62  ("set" 'list)))
63  (parse-type (o)
64  (let ((name (string-case ((json-getf o "typeId"))
65  ("union" (json-getf o "class"))
66  ("struct" (json-getf o "class"))
67  ("enum" (json-getf o "class")))))
68  (intern
69  (cond
70  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
71  (t (concatenate 'string
72  "PARQUET-"
73  (camelcase-name-to-lisp-name name))))
74  :dat/parquet))))
75  (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system
76  (let* ((type-id (parquet-struct-field-type-id field))
77  (type (parquet-struct-field-type field))
78  (required (parquet-struct-field-required field))
79  (unit-type (or (when type-id (parse-type-id type-id)) (when type (parse-type type)))))
80  (if (and (equal "optional" required) (not (equal unit-type 'list))) ;; (listp nil) = t
81  `(or null ,unit-type)
82  unit-type))))
83 
84 (defun parquet-json-enums ()
85  (list
86  (def-parquet-enum types "Type")
87  (def-parquet-enum converted-types "ConvertedType")
88  (def-parquet-enum field-repetition-types "FieldRepetitionType")
89  (def-parquet-enum encodings "Encoding")
90  (def-parquet-enum compression-codecs "CompressionCodec")
91  (def-parquet-enum page-types "PageType")
92  (def-parquet-enum boundary-orders "BoundaryOrder")))
93 
94 (defvar *parquet-structs* nil)
95 (defstruct (parquet-struct
96  (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
97  name doc exceptionp unionp (fields nil :type list))
98 
99 (defstruct (parquet-struct-field
100  (:constructor make-parquet-struct-field (key name type-id type doc required)))
101  key name type-id type doc required)
102 
103 (defun parquet-destruct-field (field)
104  (list (parquet-struct-field-name field)
105  (parquet-struct-field-key field)
106  (parquet-struct-field-doc field)
107  (parquet-struct-field-type-id field)
108  (parquet-struct-field-type field)
109  (parquet-struct-field-required field)))
110 
111 (defun parquet-destruct (struct)
112  (list (parquet-struct-name struct)
113  (parquet-struct-doc struct)
114  (parquet-struct-unionp struct)
115  (parquet-struct-exceptionp struct)
116  (mapcar #'parquet-destruct-field (parquet-struct-fields struct))))
117 
118 (defun parquet-json-structs () ;; name doc isException isUnion fields
119  (mapcar
120  (lambda (s)
121  (let ((name (json-getf s "name"))
122  (doc (json-getf s "doc"))
123  (exceptionp (json-getf s "isException"))
124  (unionp (json-getf s "isUnion"))
125  (fields (loop for f in (json-getf s "fields")
126  collect
127  (let ((key (json-getf f "key"))
128  (name (json-getf f "name"))
129  (type-id (json-getf f "typeId"))
130  ;; json object - needs additional parsing
131  (type (json-getf f "type"))
132  (doc (json-getf f "doc"))
133  (required (json-getf f "required")))
134  (make-parquet-struct-field key name type-id type doc required)))))
135  (make-parquet-struct name doc exceptionp unionp fields)))
136  (json-getf *parquet-json* "structs")))
137 
138 (defun parquet-json-namespaces ()
139  (json-getf *parquet-json* "namespaces"))
140 
141 (defun init-parquet-json (&optional (file *parquet-json-file*))
142  (load-parquet-json file)
143  (setq *parquet-enums* (parquet-json-enums))
144  (setq *parquet-structs* (parquet-json-structs)))
145 
146 ;;; CLOS
147 (defclass parquet-object () ())
148 
149 ;; (defmethod print-object ((obj parquet-object) stream)
150 ;; "Output a Parquet object to a stream."
151 ;; (print-unreadable-object (obj stream :type t)))
152 
153 (defmacro define-parquet-class (name superclasses slots &rest options)
154  "Define a new subclass of PARQUET-OBJECT with NAME."
155  `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((parquet-object))) ,slots ,@options))
156 
157 ;;; Codegen
158 
159 ;; 8)
160 (eval-always
161  (defun %define-parquet-structs ()
162  "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)."
163  (loop for struct in *parquet-structs*
164  unless (null struct)
165  collect (let ((name (parquet-struct-name struct))
166  (doc (parquet-struct-doc struct))
167  (fields (parquet-struct-fields struct)))
168  `(define-parquet-class ,(intern (cond
169  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
170  (t (concatenate 'string
171  "PARQUET-"
172  (camelcase-name-to-lisp-name name))))
173  :dat/parquet)
174  (parquet-struct-object)
175  (,@(mapcar (lambda (f)
176  (let ((fdoc (parquet-struct-field-doc f))
177  (fname (snakecase-name-to-lisp-name
178  (parquet-struct-field-name f))))
179  `(,(intern fname :dat/parquet)
180  ,@(when fdoc `(:documentation ,fdoc))
181  :initarg ,(keywordicate fname)
182  ;; TODO 2024-07-12:
183  ,@(when (equal "optional" (parquet-struct-field-required f))
184  `(:initform nil))
185  ,@(when-let ((ty (convert-parquet-struct-field-type f)))
186  `(:type ,ty)))))
187  fields))
188  ,@(when doc `((:documentation ,doc))))))))
189 
190 (defmacro define-parquet-structs ()
191  `(list
192  ,@(%define-parquet-structs)))
193 
194 (defmacro define-parquet-type (name opts &body body)
195  "Define a parquet type with DEFTYPE which maps to LISP-TYPE."
196  `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body))
197 
198 (defun define-parquet-types ()
199  "Define all known values in *PARQUET-TYPES* using DEFINE-PARQUET-TYPE (DEFTYPE)."
200  (list
201  (define-parquet-type "BOOLEAN" () 'boolean)
202  (define-parquet-type "INT32" () '(signed-byte 32))
203  (define-parquet-type "INT64" () '(signed-byte 64))
204  (define-parquet-type "INT96" () '(signed-byte 96))
205  (define-parquet-type "FLOAT" () 'float)
206  (define-parquet-type "DOUBLE" () 'double-float)
207  (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size))
208  (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size))))
209 
210 (defun load-parquet (&key (file *parquet-json-file*))
211  (init-parquet-json file)
212  (with-package (:dat/parquet)
213  (define-parquet-class parquet-struct-object () ())
214  (let ((types (define-parquet-types)))
215  (export types)
216  (deftype dat/parquet::parquet-type (&optional (designator octet-vector) optional)
217  (if optional
218  (if (eql designator 'list)
219  list
220  `(or null ,designator))
221  designator)))
222  (export (mapcar 'class-name (define-parquet-structs)))
223  (export *parquet-enums*)))