Mercurial > core / lisp/lib/dat/parquet/gen.lisp
changeset 542: |
d83b4d25d5c9 |
parent: |
10c4bb778030
|
child: |
b88bd4b0a039 |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Fri, 12 Jul 2024 22:58:59 -0400 |
permissions: |
-rw-r--r-- |
description: |
fixes |
1 ;;; gen.lisp --- Parquet Lisp Code Generator 6 (in-package :dat/parquet/gen) 7 (defparameter *parquet-json-file* 8 (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json"))) 9 (defvar *parquet-json* nil) 10 (defun load-parquet-json (&optional (json-file *parquet-json-file*)) 11 (with-open-file (file json-file) 12 (setq *parquet-json* (json-read file)))) 14 (defun %parquet-json-enums () 15 (json-getf *parquet-json* "enums")) 17 (defun parquet-json-enum-getf (name) 19 (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums)) 22 (defvar *parquet-enums* nil) 24 (defmacro def-parquet-enum (sym name) 26 (defun ,(symbolicate "PARQUET-JSON-" sym) () 27 (mapcar (lambda (x) (json-getf x "name")) (parquet-json-enum-getf ,name))) 28 (defparameter ,(intern 29 (concatenate 'string "*PARQUET-" (symbol-name sym) "*") 31 (,(symbolicate "PARQUET-JSON-" sym))))) 33 (defun camelcase-name-to-lisp-name (string) 35 (with-output-to-string (name) 36 (loop for i from 0 below (length string) 38 when (and (upper-case-p c) (not (zerop i))) 39 do (write-char #\- name) 40 do (write-char c name))))) 42 (defun snakecase-name-to-lisp-name (string) 44 (substitute #\- #\_ string))) 46 (labels ((parse-type-id (type-id) 47 (string-case (type-id :default nil) 50 ("i16" '(signed-byte 16)) 51 ("i32" '(signed-byte 32)) 52 ("i64" '(signed-byte 64)) 53 ("double" 'double-float) 56 ("binary" 'octet-vector) 59 (let ((name (string-case ((json-getf o "typeId")) 60 ("union" (json-getf o "class")) 61 ("struct" (json-getf o "class")) 62 ("enum" (json-getf o "class"))))) 65 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 66 (t (concatenate 'string 68 (camelcase-name-to-lisp-name name)))) 70 (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system 71 (let* ((type-id (parquet-struct-field-type-id field)) 72 (type (parquet-struct-field-type field)) 73 (required (parquet-struct-field-required field)) 74 (unit-type (or (when type-id (parse-type-id type-id)) (when type (parse-type type))))) 75 (if (and (equal "optional" required) (not (equal unit-type 'list))) ;; (listp nil) = t 79 (defun parquet-json-enums () 81 (def-parquet-enum types "Type") 82 (def-parquet-enum converted-types "ConvertedType") 83 (def-parquet-enum field-repetition-types "FieldRepetitionType") 84 (def-parquet-enum encodings "Encoding") 85 (def-parquet-enum compression-codecs "CompressionCodec") 86 (def-parquet-enum page-types "PageType") 87 (def-parquet-enum boundary-orders "BoundaryOrder"))) 89 (defvar *parquet-structs* nil) 90 (defstruct (parquet-struct 91 (:constructor make-parquet-struct (name doc exceptionp unionp fields))) 92 name doc exceptionp unionp (fields nil :type list)) 94 (defstruct (parquet-struct-field 95 (:constructor make-parquet-struct-field (key name type-id type doc required))) 96 key name type-id type doc required) 98 (defun parquet-destruct-field (field) 99 (list (parquet-struct-field-name field) 100 (parquet-struct-field-key field) 101 (parquet-struct-field-doc field) 102 (parquet-struct-field-type-id field) 103 (parquet-struct-field-type field) 104 (parquet-struct-field-required field))) 106 (defun parquet-destruct (struct) 107 (list (parquet-struct-name struct) 108 (parquet-struct-doc struct) 109 (parquet-struct-unionp struct) 110 (parquet-struct-exceptionp struct) 111 (mapcar #'parquet-destruct-field (parquet-struct-fields struct)))) 113 (defun parquet-json-structs () ;; name doc isException isUnion fields 116 (let ((name (json-getf s "name")) 117 (doc (json-getf s "doc")) 118 (exceptionp (json-getf s "isException")) 119 (unionp (json-getf s "isUnion")) 120 (fields (loop for f in (json-getf s "fields") 122 (let ((key (json-getf f "key")) 123 (name (json-getf f "name")) 124 (type-id (json-getf f "typeId")) 125 ;; json object - needs additional parsing 126 (type (json-getf f "type")) 127 (doc (json-getf f "doc")) 128 (required (json-getf f "required"))) 129 (make-parquet-struct-field key name type-id type doc required))))) 130 (make-parquet-struct name doc exceptionp unionp fields))) 131 (json-getf *parquet-json* "structs"))) 133 (defun parquet-json-namespaces () 134 (json-getf *parquet-json* "namespaces")) 136 (defun init-parquet-json (&optional (file *parquet-json-file*)) 137 (load-parquet-json file) 138 (setq *parquet-enums* (parquet-json-enums)) 139 (setq *parquet-structs* (parquet-json-structs))) 142 (defclass parquet-object () ()) 144 ;; (defmethod print-object ((obj parquet-object) stream) 145 ;; "Output a Parquet object to a stream." 146 ;; (print-unreadable-object (obj stream :type t))) 148 (defmacro define-parquet-class (name superclasses slots &rest options) 149 "Define a new subclass of PARQUET-OBJECT with NAME." 150 `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((parquet-object))) ,slots ,@options)) 156 (defun %define-parquet-structs () 157 "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)." 158 (loop for struct in *parquet-structs* 160 collect (let ((name (parquet-struct-name struct)) 161 (doc (parquet-struct-doc struct)) 162 (fields (parquet-struct-fields struct))) 163 `(define-parquet-class ,(intern (cond 164 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 165 (t (concatenate 'string 167 (camelcase-name-to-lisp-name name)))) 169 (parquet-struct-object) 170 (,@(mapcar (lambda (f) 171 (let ((fdoc (parquet-struct-field-doc f)) 172 (fname (snakecase-name-to-lisp-name 173 (parquet-struct-field-name f)))) 174 `(,(intern fname :dat/parquet) 175 ,@(when fdoc `(:documentation ,fdoc)) 176 :initarg ,(keywordicate fname) 178 ,@(when (equal "optional" (parquet-struct-field-required f)) 180 ,@(when-let ((ty (convert-parquet-struct-field-type f))) 183 ,@(when doc `((:documentation ,doc)))))))) 185 (defmacro define-parquet-structs () 187 ,@(%define-parquet-structs))) 189 (defmacro define-parquet-type (name opts &body body) 190 "Define a parquet type with DEFTYPE which maps to LISP-TYPE." 191 `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body)) 193 (defun define-parquet-types () 194 "Define all known values in *PARQUET-TYPES* using DEFINE-PARQUET-TYPE (DEFTYPE)." 196 (define-parquet-type "BOOLEAN" () 'boolean) 197 (define-parquet-type "INT32" () '(signed-byte 32)) 198 (define-parquet-type "INT64" () '(signed-byte 64)) 199 (define-parquet-type "INT96" () '(signed-byte 96)) 200 (define-parquet-type "FLOAT" () 'float) 201 (define-parquet-type "DOUBLE" () 'double-float) 202 (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size)) 203 (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size)))) 205 (defun load-parquet (&key (file *parquet-json-file*)) 206 (init-parquet-json file) 207 (with-package (:dat/parquet) 208 (define-parquet-class parquet-enum-object () ()) 209 (define-parquet-class parquet-struct-object () ()) 210 (export (define-parquet-types)) 211 (export (mapcar 'class-name (define-parquet-structs))) 212 (export *parquet-enums*)))