Mercurial > core / lisp/lib/dat/parquet/gen.lisp
changeset 544: |
ec1d4d544c36 |
parent: |
b88bd4b0a039
|
child: |
b57066450cfa |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Sat, 13 Jul 2024 18:18:01 -0400 |
permissions: |
-rw-r--r-- |
description: |
parquet expansion, init leb128, add little-endian octet encoders |
1 ;;; gen.lisp --- Parquet Lisp Code Generator 6 (defpackage :dat/parquet/gen ;; not public API 7 (:use :cl :std :dat/proto :dat/json) 8 (:export :load-parquet)) 10 (in-package :dat/parquet/gen) 11 (defparameter *parquet-json-file* 12 (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json"))) 13 (defvar *parquet-json* nil) 14 (defun load-parquet-json (&optional (json-file *parquet-json-file*)) 15 (with-open-file (file json-file) 16 (setq *parquet-json* (json-read file)))) 18 (defun %parquet-json-enums () 19 (json-getf *parquet-json* "enums")) 21 (defun parquet-json-enum-getf (name) 23 (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums)) 26 (defvar *parquet-enums* nil) 28 (defmacro def-parquet-enum (sym name) 30 (defun ,(symbolicate "PARQUET-JSON-" sym) () 31 (mapcar (lambda (x) (keywordicate (snakecase-name-to-lisp-name (json-getf x "name")))) 32 (parquet-json-enum-getf ,name))) 33 (defparameter ,(intern 34 (concatenate 'string "*PARQUET-" (symbol-name sym) "*") 36 (,(symbolicate "PARQUET-JSON-" sym))))) 38 (defun camelcase-name-to-lisp-name (string) 40 (with-output-to-string (name) 41 (loop for i from 0 below (length string) 43 when (and (upper-case-p c) (not (zerop i))) 44 do (write-char #\- name) 45 do (write-char c name))))) 47 (defun snakecase-name-to-lisp-name (string) 49 (substitute #\- #\_ string))) 51 (labels ((parse-type-id (type-id) 52 (string-case (type-id :default nil) 55 ("i16" '(signed-byte 16)) 56 ("i32" '(signed-byte 32)) 57 ("i64" '(signed-byte 64)) 58 ("double" 'double-float) 61 ("binary" 'octet-vector) 64 (let ((name (string-case ((json-getf o "typeId")) 65 ("union" (json-getf o "class")) 66 ("struct" (json-getf o "class")) 67 ("enum" (json-getf o "class"))))) 70 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 71 (t (concatenate 'string 73 (camelcase-name-to-lisp-name name)))) 75 (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system 76 (let* ((type-id (parquet-struct-field-type-id field)) 77 (type (parquet-struct-field-type field)) 78 (required (parquet-struct-field-required field)) 79 (unit-type (or (when type-id (parse-type-id type-id)) (when type (parse-type type))))) 80 (if (and (equal "optional" required) (not (equal unit-type 'list))) ;; (listp nil) = t 84 (defun parquet-json-enums () 86 (def-parquet-enum types "Type") 87 (def-parquet-enum converted-types "ConvertedType") 88 (def-parquet-enum field-repetition-types "FieldRepetitionType") 89 (def-parquet-enum encodings "Encoding") 90 (def-parquet-enum compression-codecs "CompressionCodec") 91 (def-parquet-enum page-types "PageType") 92 (def-parquet-enum boundary-orders "BoundaryOrder"))) 94 (defvar *parquet-structs* nil) 95 (defstruct (parquet-struct 96 (:constructor make-parquet-struct (name doc exceptionp unionp fields))) 97 name doc exceptionp unionp (fields nil :type list)) 99 (defstruct (parquet-struct-field 100 (:constructor make-parquet-struct-field (key name type-id type doc required))) 101 key name type-id type doc required) 103 (defun parquet-destruct-field (field) 104 (list (parquet-struct-field-name field) 105 (parquet-struct-field-key field) 106 (parquet-struct-field-doc field) 107 (parquet-struct-field-type-id field) 108 (parquet-struct-field-type field) 109 (parquet-struct-field-required field))) 111 (defun parquet-destruct (struct) 112 (list (parquet-struct-name struct) 113 (parquet-struct-doc struct) 114 (parquet-struct-unionp struct) 115 (parquet-struct-exceptionp struct) 116 (mapcar #'parquet-destruct-field (parquet-struct-fields struct)))) 118 (defun parquet-json-structs () ;; name doc isException isUnion fields 121 (let ((name (json-getf s "name")) 122 (doc (json-getf s "doc")) 123 (exceptionp (json-getf s "isException")) 124 (unionp (json-getf s "isUnion")) 125 (fields (loop for f in (json-getf s "fields") 127 (let ((key (json-getf f "key")) 128 (name (json-getf f "name")) 129 (type-id (json-getf f "typeId")) 130 ;; json object - needs additional parsing 131 (type (json-getf f "type")) 132 (doc (json-getf f "doc")) 133 (required (json-getf f "required"))) 134 (make-parquet-struct-field key name type-id type doc required))))) 135 (make-parquet-struct name doc exceptionp unionp fields))) 136 (json-getf *parquet-json* "structs"))) 138 (defun parquet-json-namespaces () 139 (json-getf *parquet-json* "namespaces")) 141 (defun init-parquet-json (&optional (file *parquet-json-file*)) 142 (load-parquet-json file) 143 (setq *parquet-enums* (parquet-json-enums)) 144 (setq *parquet-structs* (parquet-json-structs))) 147 (defclass parquet-object () ()) 149 ;; (defmethod print-object ((obj parquet-object) stream) 150 ;; "Output a Parquet object to a stream." 151 ;; (print-unreadable-object (obj stream :type t))) 153 (defmacro define-parquet-class (name superclasses slots &rest options) 154 "Define a new subclass of PARQUET-OBJECT with NAME." 155 `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((parquet-object))) ,slots ,@options)) 161 (defun %define-parquet-structs () 162 "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)." 163 (loop for struct in *parquet-structs* 165 collect (let ((name (parquet-struct-name struct)) 166 (doc (parquet-struct-doc struct)) 167 (fields (parquet-struct-fields struct))) 168 `(define-parquet-class ,(intern (cond 169 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 170 (t (concatenate 'string 172 (camelcase-name-to-lisp-name name)))) 174 (parquet-struct-object) 175 (,@(mapcar (lambda (f) 176 (let ((fdoc (parquet-struct-field-doc f)) 177 (fname (snakecase-name-to-lisp-name 178 (parquet-struct-field-name f)))) 179 `(,(intern fname :dat/parquet) 180 ,@(when fdoc `(:documentation ,fdoc)) 181 :initarg ,(keywordicate fname) 183 ,@(when (equal "optional" (parquet-struct-field-required f)) 185 ,@(when-let ((ty (convert-parquet-struct-field-type f))) 188 ,@(when doc `((:documentation ,doc)))))))) 190 (defmacro define-parquet-structs () 192 ,@(%define-parquet-structs))) 194 (defmacro define-parquet-type (name opts &body body) 195 "Define a parquet type with DEFTYPE which maps to LISP-TYPE." 196 `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body)) 198 (defun define-parquet-types () 199 "Define all known values in *PARQUET-TYPES* using DEFINE-PARQUET-TYPE (DEFTYPE)." 201 (define-parquet-type "BOOLEAN" () 'boolean) 202 (define-parquet-type "INT32" () '(signed-byte 32)) 203 (define-parquet-type "INT64" () '(signed-byte 64)) 204 (define-parquet-type "INT96" () '(signed-byte 96)) 205 (define-parquet-type "FLOAT" () 'float) 206 (define-parquet-type "DOUBLE" () 'double-float) 207 (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size)) 208 (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size)))) 210 (defun load-parquet (&key (file *parquet-json-file*)) 211 (init-parquet-json file) 212 (with-package (:dat/parquet) 213 (define-parquet-class parquet-struct-object () ()) 214 (let ((types (define-parquet-types))) 216 (deftype dat/parquet::parquet-type (&optional (designator octet-vector) optional) 218 (if (eql designator 'list) 220 `(or null ,designator)) 222 (export (mapcar 'class-name (define-parquet-structs))) 223 (export *parquet-enums*)))