changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/parquet/gen.lisp

changeset 637: b88bf15f60d0
parent: 849f72b72b41
child: 642b3b82b20d
author: Richard Westhaver <ellis@rwest.io>
date: Wed, 04 Sep 2024 22:02:21 -0400
permissions: -rw-r--r--
description: parquet tweaks, import ox-man
1 ;;; gen.lisp --- Parquet Lisp Code Generator
2 
3 ;;
4 
5 ;;; Code:
6 (defpackage :dat/parquet/gen ;; not public API
7  (:use :cl :std :dat/proto :dat/json)
8  (:export :load-parquet))
9 
10 (in-package :dat/parquet/gen)
11 
12 (defparameter *parquet-json-file*
13  (or (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json"))
14  (warn "*PARQUET-JSON-FILE* not found")))
15 
16 (defparameter *parquet-output-file*
17  #.(asdf:system-relative-pathname :dat #P"parquet/thrift.lisp"))
18 
19 (defvar *parquet-json* nil)
20 
21 (defun %parquet-json-enums ()
22  (json-getf *parquet-json* "enums"))
23 
24 (defun dat/parquet::parquet-json-enum-getf (name)
25  (json-getf
26  (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums))
27  "members"))
28 
29 (defun dat/parquet::snakecase-name-to-lisp-name (string)
30  (string-upcase
31  (substitute #\- #\_ string)))
32 
33 (defun dat/parquet::camelcase-name-to-lisp-name (string)
34  (string-upcase
35  (with-output-to-string (name)
36  (loop for i from 0 below (length string)
37  for c across string
38  when (and (upper-case-p c) (not (zerop i)))
39  do (write-char #\- name)
40  do (write-char c name)))))
41 
42 (defvar *parquet-enums* nil)
43 
44 (defmacro define-parquet-enum (sym name)
45  `(progn
46  (defun ,(symbolicate "PARQUET-JSON-" sym) ()
47  (mapcar (lambda (x) (keywordicate (snakecase-name-to-lisp-name (json-getf x "name"))))
48  (parquet-json-enum-getf ,name)))
49  (defparameter ,(symbolicate
50  (concatenate 'string "*PARQUET-" (symbol-name sym) "*"))
51  (,(symbolicate "PARQUET-JSON-" sym)))))
52 
53 (labels ((parse-type-id (type-id)
54  (when type-id
55  (string-case (type-id :default nil)
56  ("bool" 'boolean)
57  ("byte" 'signed-byte)
58  ("i16" '(signed-byte 16))
59  ("i32" '(signed-byte 32))
60  ("i64" '(signed-byte 64))
61  ("double" 'double-float)
62  ("string" 'string)
63  ("list" 'vector)
64  ("binary" 'octet-vector)
65  ("set" 'vector)
66  ("enum" '(signed-byte 32))
67  ("union" 'union)
68  ("struct" 'struct))))
69  (%intern (name)
70  (if (stringp name)
71  (symbolicate
72  (cond
73  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
74  (t (concatenate 'string
75  "PARQUET-"
76  (camelcase-name-to-lisp-name name)))))
77  name))
78  (parse-type (o)
79  (when o
80  (string-case ((json-getf o "typeId"))
81  ("union" (%intern (json-getf o "class")))
82  ("list"
83  (if-let ((elt (json-getf o "elemType" nil)))
84  (%intern (parse-type elt))
85  (parse-type-id (json-getf o "elemTypeId"))))
86  ("set"
87  (if-let ((elt (json-getf o "elemType" nil)))
88  (%intern (parse-type elt))
89  (parse-type-id (json-getf o "elemTypeId"))))
90  ("struct" (%intern (json-getf o "class")))
91  ("enum" (%intern (json-getf o "class")))))))
92  (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system
93  (let* ((type-id (parse-type-id (parquet-struct-field-type-id field)))
94  (type (parse-type (parquet-struct-field-type field)))
95  (required (parquet-struct-field-required field)))
96  (let ((ret (cond
97  ((eql 'vector type-id) `(vector ,type))
98  (t (or type type-id)))))
99  (if (equal "optional" required)
100  `(or null ,ret)
101  ret)))))
102 
103 (defparameter *parquet-structs* nil)
104 
105 (defstruct (parquet-struct
106  (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
107  name doc exceptionp unionp (fields nil :type list))
108 
109 (defstruct (parquet-struct-field
110  (:constructor make-parquet-struct-field (key name type-id type doc required)))
111  key name type-id type doc required)
112 
113 (defun parquet-json-structs () ;; name doc isException isUnion fields
114  (mapcar
115  (lambda (s)
116  (let ((name (json-getf s "name"))
117  (doc (json-getf s "doc"))
118  (exceptionp (json-getf s "isException"))
119  (unionp (json-getf s "isUnion"))
120  (fields (loop for f in (json-getf s "fields")
121  collect
122  (let ((key (json-getf f "key"))
123  (name (json-getf f "name"))
124  (type-id (json-getf f "typeId"))
125  ;; json object - needs additional parsing
126  (type (json-getf f "type"))
127  (doc (json-getf f "doc"))
128  (required (json-getf f "required")))
129  (make-parquet-struct-field key name type-id type doc required)))))
130  (make-parquet-struct name doc exceptionp unionp fields)))
131  (json-getf *parquet-json* "structs")))
132 
133 (defun parquet-json-namespaces ()
134  (json-getf *parquet-json* "namespaces"))
135 
136 (eval-always
137  (defun init-parquet-json (&optional (file *parquet-json-file*))
138  (with-open-file (file file)
139  (setq *parquet-json* (json-read file)))
140  (setq *parquet-enums* (%parquet-json-enums))
141  (setq *parquet-structs* (parquet-json-structs))))
142 
143 ;;; CLOS
144 
145 ;; (defmethod print-object ((obj parquet-object) stream)
146 ;; "Output a Parquet object to a stream."
147 ;; (print-unreadable-object (obj stream :type t)))
148 
149 (defmacro define-parquet-class (name superclasses slots &rest options)
150  "Define a new subclass of PARQUET-OBJECT with NAME."
151  `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((dat/parquet::parquet-object))) ,slots ,@options))
152 
153 ;;; Codegen
154 
155 ;; 8)
156 (eval-always
157  (defun %define-parquet-structs ()
158  "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)."
159  (loop for struct in *parquet-structs*
160  unless (null struct)
161  collect (let* ((name (parquet-struct-name struct))
162  (doc (parquet-struct-doc struct))
163  (fields (parquet-struct-fields struct))
164  (class-name (symbolicate (cond
165  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
166  (t (concatenate 'string
167  "PARQUET-"
168  (camelcase-name-to-lisp-name name)))))))
169  `(progn
170  (defclass ,class-name (dat/parquet::parquet-object)
171  (,@(mapcar (lambda (f)
172  (let ((fdoc (parquet-struct-field-doc f))
173  (fname (snakecase-name-to-lisp-name
174  (parquet-struct-field-name f))))
175  `(,(symbolicate fname)
176  ,@(when fdoc `(:documentation ,fdoc))
177  :initarg ,(keywordicate fname)
178  ;; TODO 2024-07-12:
179  ,@(when (equal "optional" (parquet-struct-field-required f))
180  `(:initform nil))
181  ,@(when-let ((ty (convert-parquet-struct-field-type f)))
182  `(:type ,ty)))))
183  fields))
184  ,@(when doc `((:documentation ,doc)))))))))
185 
186 (defmacro define-parquet-type (name opts &body body)
187  "Define a parquet type with DEFTYPE which maps to LISP-TYPE."
188  `(deftype ,(symbolicate "PARQUET-" (substitute #\- #\_ name)) ,opts ,@body))
189 
190 (defun parse-parquet-thrift-definitions (&key (input *parquet-json-file*)
191  (output #.(asdf:system-relative-pathname :dat "parquet/thrift.lisp")))
192  (init-parquet-json input)
193  (with-open-file (defs output :direction :output :if-exists :supersede :if-does-not-exist :create)
194  (format defs ";;; ~a --- Parquet Thrift Definitions -*- buffer-read-only:t -*-
195 
196 ;; input = ~a
197 
198 ;; This file was generated automatically by
199 ;; DAT/PARQUET/GEN:PARSE-PARQUET-THRIFT-DEFINITIONS
200 
201 ;; Do not modify.
202 
203 ;;; Code:
204 (in-package :dat/parquet)" output input)
205  (format defs "~2%")
206  (let ((enums '((define-parquet-enum types "Type")
207  (define-parquet-enum converted-types "ConvertedType")
208  (define-parquet-enum field-repetition-types "FieldRepetitionType")
209  (define-parquet-enum encodings "Encoding")
210  (define-parquet-enum compression-codecs "CompressionCodec")
211  (define-parquet-enum page-types "PageType")
212  (define-parquet-enum boundary-orders "BoundaryOrder")))
213  (types '((define-parquet-type "BOOLEAN" () 'boolean)
214  (define-parquet-type "INT32" () '(signed-byte 32))
215  (define-parquet-type "INT64" () '(signed-byte 64))
216  (define-parquet-type "INT96" () '(signed-byte 96))
217  (define-parquet-type "FLOAT" () 'float)
218  (define-parquet-type "DOUBLE" () 'double-float)
219  (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size))
220  (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size))))
221  (structs (mapcar #'macroexpand-1 (%define-parquet-structs))))
222  ;; expands to a progn, so we just take the cdr
223  (dolist (en enums)
224  (dolist (f (cdr (macroexpand en)))
225  (write f :stream defs :case :downcase :readably t)
226  (terpri defs)))
227  (dolist (ty types)
228  (dolist (f (cdr (macroexpand ty)))
229  (write f :stream defs :case :downcase :readably t)
230  (terpri defs)))
231  (dolist (st structs)
232  (dolist (f (cdr (macroexpand st)))
233  (write f :stream defs :case :downcase :readably t)
234  (terpri defs))))))