changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/dat/parquet/gen.lisp

changeset 640: 642b3b82b20d
parent: b88bf15f60d0
author: Richard Westhaver <ellis@rwest.io>
date: Sun, 08 Sep 2024 17:35:03 -0400
permissions: -rw-r--r--
description: thrift fixes, org-get-with-inheritance init
1 ;;; gen.lisp --- Parquet Lisp Code Generator
2 
3 ;;
4 
5 ;;; Code:
6 (defpackage :dat/parquet/gen ;; not public API
7  (:use :cl :std :dat/proto :dat/json)
8  (:export :load-parquet))
9 
10 (in-package :dat/parquet/gen)
11 
12 (defparameter *parquet-json-file*
13  (or (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json"))
14  (warn "*PARQUET-JSON-FILE* not found")))
15 
16 (defparameter *parquet-output-file*
17  #.(asdf:system-relative-pathname :dat #P"parquet/thrift.lisp"))
18 
19 (defvar *parquet-json* nil)
20 
21 (eval-always
22  (defun %parquet-json-enums ()
23  (json-getf *parquet-json* "enums"))
24 
25  (defun dat/parquet::parquet-json-enum-getf (name)
26  (json-getf
27  (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums))
28  "members"))
29 
30  (defun dat/parquet::snakecase-name-to-lisp-name (string)
31  (string-upcase
32  (substitute #\- #\_ string)))
33 
34  (defun dat/parquet::camelcase-name-to-lisp-name (string)
35  (string-upcase
36  (with-output-to-string (name)
37  (loop for i from 0 below (length string)
38  for c across string
39  when (and (upper-case-p c) (not (zerop i)))
40  do (write-char #\- name)
41  do (write-char c name))))))
42 
43 (defvar *parquet-enums* nil)
44 
45 (defmacro define-parquet-enum (sym name)
46  `(progn
47  (defvar ,(symbolicate "*PARQUET-JSON-" sym "*")
48  ',(mapcar (lambda (x) (keywordicate (dat/parquet::snakecase-name-to-lisp-name (json-getf x "name"))))
49  (dat/parquet::parquet-json-enum-getf name)))))
50 
51 (labels ((parse-type-id (type-id)
52  (when type-id
53  (string-case (type-id :default nil)
54  ("bool" 'boolean)
55  ("byte" 'signed-byte)
56  ("i16" '(signed-byte 16))
57  ("i32" '(signed-byte 32))
58  ("i64" '(signed-byte 64))
59  ("double" 'double-float)
60  ("string" 'string)
61  ("list" 'vector)
62  ("binary" 'octet-vector)
63  ("set" 'vector)
64  ("enum" '(signed-byte 32))
65  ("union" 'union)
66  ("struct" 'struct))))
67  (%intern (name)
68  (if (stringp name)
69  (symbolicate
70  (cond
71  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
72  (t (concatenate 'string
73  "PARQUET-"
74  (dat/parquet::camelcase-name-to-lisp-name name)))))
75  name))
76  (parse-type (o)
77  (when o
78  (string-case ((json-getf o "typeId"))
79  ("union" (%intern (json-getf o "class")))
80  ("list"
81  (if-let ((elt (json-getf o "elemType" nil)))
82  (%intern (parse-type elt))
83  (parse-type-id (json-getf o "elemTypeId"))))
84  ("set"
85  (if-let ((elt (json-getf o "elemType" nil)))
86  (%intern (parse-type elt))
87  (parse-type-id (json-getf o "elemTypeId"))))
88  ("struct" (%intern (json-getf o "class")))
89  ("enum" (%intern (json-getf o "class")))))))
90  (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system
91  (let* ((type-id (parse-type-id (parquet-struct-field-type-id field)))
92  (type (parse-type (parquet-struct-field-type field)))
93  (required (parquet-struct-field-required field)))
94  (let ((ret (cond
95  ((eql 'vector type-id) `(vector ,type))
96  (t (or type type-id)))))
97  (if (equal "optional" required)
98  `(or null ,ret)
99  ret)))))
100 
101 (defparameter *parquet-structs* nil)
102 
103 (defstruct (parquet-struct
104  (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
105  name doc exceptionp unionp (fields nil :type list))
106 
107 (defstruct (parquet-struct-field
108  (:constructor make-parquet-struct-field (key name type-id type doc required)))
109  key name type-id type doc required)
110 
111 (defun parquet-json-structs () ;; name doc isException isUnion fields
112  (mapcar
113  (lambda (s)
114  (let ((name (json-getf s "name"))
115  (doc (json-getf s "doc"))
116  (exceptionp (json-getf s "isException"))
117  (unionp (json-getf s "isUnion"))
118  (fields (loop for f in (json-getf s "fields")
119  collect
120  (let ((key (json-getf f "key"))
121  (name (json-getf f "name"))
122  (type-id (json-getf f "typeId"))
123  ;; json object - needs additional parsing
124  (type (json-getf f "type"))
125  (doc (json-getf f "doc"))
126  (required (json-getf f "required")))
127  (make-parquet-struct-field key name type-id type doc required)))))
128  (make-parquet-struct name doc exceptionp unionp fields)))
129  (json-getf *parquet-json* "structs")))
130 
131 (defun parquet-json-namespaces ()
132  (json-getf *parquet-json* "namespaces"))
133 
134 (eval-always
135  (defun init-parquet-json (&optional (file *parquet-json-file*))
136  (with-open-file (file file)
137  (setq *parquet-json* (json-read file)))
138  (setq *parquet-enums* (%parquet-json-enums))
139  (setq *parquet-structs* (parquet-json-structs))))
140 
141 ;;; CLOS
142 
143 ;; (defmethod print-object ((obj parquet-object) stream)
144 ;; "Output a Parquet object to a stream."
145 ;; (print-unreadable-object (obj stream :type t)))
146 
147 (defmacro define-parquet-class (name superclasses slots &rest options)
148  "Define a new subclass of PARQUET-OBJECT with NAME."
149  `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((dat/parquet::parquet-object))) ,slots ,@options))
150 
151 ;;; Codegen
152 
153 ;; 8)
154 (eval-always
155  (defun %define-parquet-structs ()
156  "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)."
157  (loop for struct in *parquet-structs*
158  unless (null struct)
159  collect (let* ((name (parquet-struct-name struct))
160  (doc (parquet-struct-doc struct))
161  (fields (parquet-struct-fields struct))
162  (class-name (symbolicate (cond
163  ((equal name "UUIDType") "PARQUET-UUID-TYPE")
164  (t (concatenate 'string
165  "PARQUET-"
166  (dat/parquet::camelcase-name-to-lisp-name name)))))))
167  `(progn
168  (defclass ,class-name (dat/parquet::parquet-object)
169  (,@(mapcar (lambda (f)
170  (let ((fdoc (parquet-struct-field-doc f))
171  (fname (dat/parquet::snakecase-name-to-lisp-name
172  (parquet-struct-field-name f))))
173  `(,(symbolicate fname)
174  ,@(when fdoc `(:documentation ,fdoc))
175  :initarg ,(keywordicate fname)
176  ;; TODO 2024-07-12:
177  ,@(when (equal "optional" (parquet-struct-field-required f))
178  `(:initform nil))
179  ,@(when-let ((ty (convert-parquet-struct-field-type f)))
180  `(:type ,ty)))))
181  fields))
182  ,@(when doc `((:documentation ,doc)))))))))
183 
184 (defmacro define-parquet-type (name opts &body body)
185  "Define a parquet type with DEFTYPE which maps to LISP-TYPE."
186  `(progn (deftype ,(symbolicate "PARQUET-" (substitute #\- #\_ name)) ,opts ,@body)))
187 
188 (defun parse-parquet-thrift-definitions (&key (input *parquet-json-file*)
189  (output #.(asdf:system-relative-pathname :dat "parquet/thrift.lisp")))
190  (init-parquet-json input)
191  (with-open-file (defs output :direction :output :if-exists :supersede :if-does-not-exist :create)
192  (format defs ";;; ~a --- Parquet Thrift Definitions -*- buffer-read-only:t -*-
193 
194 ;; input = ~a
195 
196 ;; This file was generated automatically by
197 ;; DAT/PARQUET/GEN:PARSE-PARQUET-THRIFT-DEFINITIONS
198 
199 ;; Do not modify.
200 
201 ;;; Code:
202 (in-package :dat/parquet)" output input)
203  (format defs "~2%")
204  (let ((enums '((define-parquet-enum types "Type")
205  (define-parquet-enum converted-types "ConvertedType")
206  (define-parquet-enum field-repetition-types "FieldRepetitionType")
207  (define-parquet-enum encodings "Encoding")
208  (define-parquet-enum compression-codecs "CompressionCodec")
209  (define-parquet-enum page-types "PageType")
210  (define-parquet-enum boundary-orders "BoundaryOrder")))
211  (types '((define-parquet-type "BOOLEAN" () 'boolean)
212  (define-parquet-type "INT32" () '(signed-byte 32))
213  (define-parquet-type "INT64" () '(signed-byte 64))
214  (define-parquet-type "INT96" () '(signed-byte 96))
215  (define-parquet-type "FLOAT" () 'float)
216  (define-parquet-type "DOUBLE" () 'double-float)
217  (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size))
218  (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size))))
219  (structs (mapcar #'macroexpand-1 (%define-parquet-structs))))
220  ;; expands to a progn, so we just take the cdr
221  (dolist (en enums)
222  (dolist (f (cdr (macroexpand en)))
223  (write f :stream defs :case :downcase :readably t)
224  (terpri defs)))
225  (dolist (ty types)
226  (dolist (f (cdr (macroexpand ty)))
227  (write f :stream defs :case :downcase :readably t)
228  (terpri defs)))
229  (dolist (st structs)
230  (dolist (f (cdr (macroexpand st)))
231  (write f :stream defs :case :downcase :readably t)
232  (terpri defs))))))