changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/parquet/gen.lisp

changeset 550: 4d34907c69eb
parent: 32bd859533b3
child: 849f72b72b41
author: Richard Westhaver <ellis@rwest.io>
date: Tue, 16 Jul 2024 21:52:09 -0400
permissions: -rw-r--r--
description: more work on tcompact/thrift, fixed type info in parquet-struct-objects
1 ;;; gen.lisp --- Parquet Lisp Code Generator
2 
3 ;;
4 
5 ;;; Code:
6 (defpackage :dat/parquet/gen ;; not public API
7  (:use :cl :std :dat/proto :dat/json)
8  (:export :load-parquet))
9 
10 (in-package :dat/parquet/gen)
11 (defparameter *parquet-json-file*
12  (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")))
13 (defvar *parquet-json* nil)
14 (defun load-parquet-json (&optional (json-file *parquet-json-file*))
15  (with-open-file (file json-file)
16  (setq *parquet-json* (json-read file))))
17 
18 (defun %parquet-json-enums ()
19  (json-getf *parquet-json* "enums"))
20 
21 (defun parquet-json-enum-getf (name)
22  (json-getf
23  (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums))
24  "members"))
25 
26 (defvar *parquet-enums* nil)
27 
28 (defmacro def-parquet-enum (sym name)
29  `(progn
30  (defun ,(symbolicate "PARQUET-JSON-" sym) ()
31  (mapcar (lambda (x) (keywordicate (snakecase-name-to-lisp-name (json-getf x "name"))))
32  (parquet-json-enum-getf ,name)))
33  (defparameter ,(intern
34  (concatenate 'string "*PARQUET-" (symbol-name sym) "*")
35  :dat/parquet)
36  (,(symbolicate "PARQUET-JSON-" sym)))))
37 
38 (defun camelcase-name-to-lisp-name (string)
39  (string-upcase
40  (with-output-to-string (name)
41  (loop for i from 0 below (length string)
42  for c across string
43  when (and (upper-case-p c) (not (zerop i)))
44  do (write-char #\- name)
45  do (write-char c name)))))
46 
47 (defun snakecase-name-to-lisp-name (string)
48  (string-upcase
49  (substitute #\- #\_ string)))
50 
51 (labels ((parse-type-id (type-id)
52  (when type-id
53  (string-case (type-id :default nil)
54  ("bool" 'boolean)
55  ("byte" 'signed-byte)
56  ("i16" '(signed-byte 16))
57  ("i32" '(signed-byte 32))
58  ("i64" '(signed-byte 64))
59  ("double" 'double-float)
60  ("string" 'string)
61  ("list" 'vector)
62  ("binary" 'octet-vector)
63  ("set" 'vector)
64  ("enum" '(signed-byte 32))
65  ("union" 'union)
66  ("struct" 'struct))))
67  (%intern (name)
68  (if (stringp name)
69  (intern
70  (cond
71  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
72  (t (concatenate 'string
73  "PARQUET-"
74  (camelcase-name-to-lisp-name name))))
75  :dat/parquet)
76  name))
77  (parse-type (o)
78  (when o
79  (string-case ((json-getf o "typeId"))
80  ("union" (%intern (json-getf o "class")))
81  ("list"
82  (if-let ((elt (json-getf o "elemType" nil)))
83  (%intern (parse-type elt))
84  (parse-type-id (json-getf o "elemTypeId"))))
85  ("set"
86  (if-let ((elt (json-getf o "elemType" nil)))
87  (%intern (parse-type elt))
88  (parse-type-id (json-getf o "elemTypeId"))))
89  ("struct" (%intern (json-getf o "class")))
90  ("enum" (%intern (json-getf o "class")))))))
91  (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system
92  (let* ((type-id (parse-type-id (parquet-struct-field-type-id field)))
93  (type (parse-type (parquet-struct-field-type field)))
94  (required (parquet-struct-field-required field)))
95  (let ((ret (cond
96  ((eql 'vector type-id) `(vector ,type))
97  (t (or type type-id)))))
98  (if (equal "optional" required)
99  `(or null ,ret)
100  ret)))))
101 
102 (defun parquet-json-enums ()
103  (list
104  (def-parquet-enum types "Type")
105  (def-parquet-enum converted-types "ConvertedType")
106  (def-parquet-enum field-repetition-types "FieldRepetitionType")
107  (def-parquet-enum encodings "Encoding")
108  (def-parquet-enum compression-codecs "CompressionCodec")
109  (def-parquet-enum page-types "PageType")
110  (def-parquet-enum boundary-orders "BoundaryOrder")))
111 
112 (eval-always
113  (defvar *parquet-structs* nil))
114 (defstruct (parquet-struct
115  (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
116  name doc exceptionp unionp (fields nil :type list))
117 
118 (defstruct (parquet-struct-field
119  (:constructor make-parquet-struct-field (key name type-id type doc required)))
120  key name type-id type doc required)
121 
122 (defun parquet-json-structs () ;; name doc isException isUnion fields
123  (mapcar
124  (lambda (s)
125  (let ((name (json-getf s "name"))
126  (doc (json-getf s "doc"))
127  (exceptionp (json-getf s "isException"))
128  (unionp (json-getf s "isUnion"))
129  (fields (loop for f in (json-getf s "fields")
130  collect
131  (let ((key (json-getf f "key"))
132  (name (json-getf f "name"))
133  (type-id (json-getf f "typeId"))
134  ;; json object - needs additional parsing
135  (type (json-getf f "type"))
136  (doc (json-getf f "doc"))
137  (required (json-getf f "required")))
138  (make-parquet-struct-field key name type-id type doc required)))))
139  (make-parquet-struct name doc exceptionp unionp fields)))
140  (json-getf *parquet-json* "structs")))
141 
142 (defun parquet-json-namespaces ()
143  (json-getf *parquet-json* "namespaces"))
144 
145 (defun init-parquet-json (&optional (file *parquet-json-file*))
146  (load-parquet-json file)
147  (setq *parquet-enums* (parquet-json-enums))
148  (setq *parquet-structs* (parquet-json-structs)))
149 
150 ;;; CLOS
151 
152 ;; (defmethod print-object ((obj parquet-object) stream)
153 ;; "Output a Parquet object to a stream."
154 ;; (print-unreadable-object (obj stream :type t)))
155 
156 (defmacro define-parquet-class (name superclasses slots &rest options)
157  "Define a new subclass of PARQUET-OBJECT with NAME."
158  `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((dat/parquet::parquet-object))) ,slots ,@options))
159 
160 ;;; Codegen
161 
162 ;; 8)
163 (eval-always
164  (defun %define-parquet-structs ()
165  "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)."
166  (loop for struct in *parquet-structs*
167  unless (null struct)
168  collect (let* ((name (parquet-struct-name struct))
169  (doc (parquet-struct-doc struct))
170  (fields (parquet-struct-fields struct))
171  (class-name (intern (cond
172  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
173  (t (concatenate 'string
174  "PARQUET-"
175  (camelcase-name-to-lisp-name name))))
176  :dat/parquet)))
177  `(progn
178  (define-parquet-class ,class-name
179  (dat/parquet::parquet-struct-object)
180  (,@(mapcar (lambda (f)
181  (let ((fdoc (parquet-struct-field-doc f))
182  (fname (snakecase-name-to-lisp-name
183  (parquet-struct-field-name f))))
184  `(,(intern fname :dat/parquet)
185  ,@(when fdoc `(:documentation ,fdoc))
186  :initarg ,(keywordicate fname)
187  ;; TODO 2024-07-12:
188  ,@(when (equal "optional" (parquet-struct-field-required f))
189  `(:initform nil))
190  ,@(when-let ((ty (convert-parquet-struct-field-type f)))
191  `(:type ,ty)))))
192  fields))
193  ,@(when doc `((:documentation ,doc))))
194  ',class-name)))))
195 
196 (defmacro define-parquet-structs ()
197  `(list
198  ,@(%define-parquet-structs)))
199 
200 (defmacro define-parquet-type (name opts &body body)
201  "Define a parquet type with DEFTYPE which maps to LISP-TYPE."
202  `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body))
203 
204 (defun define-parquet-types ()
205  "Define all known values in *PARQUET-TYPES* using DEFINE-PARQUET-TYPE (DEFTYPE)."
206  (list
207  (define-parquet-type "BOOLEAN" () 'boolean)
208  (define-parquet-type "INT32" () '(signed-byte 32))
209  (define-parquet-type "INT64" () '(signed-byte 64))
210  (define-parquet-type "INT96" () '(signed-byte 96))
211  (define-parquet-type "FLOAT" () 'float)
212  (define-parquet-type "DOUBLE" () 'double-float)
213  (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size))
214  (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size))))
215 
216 (defun load-parquet (&key (file *parquet-json-file*))
217  (init-parquet-json file)
218  (with-package (:dat/parquet)
219  (let ((types (define-parquet-types)))
220  (export types))
221  (export (define-parquet-structs))
222  (export *parquet-enums*)))