Mercurial > core / lisp/lib/dat/parquet/gen.lisp
changeset 550: |
4d34907c69eb |
parent: |
32bd859533b3
|
child: |
849f72b72b41 |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Tue, 16 Jul 2024 21:52:09 -0400 |
permissions: |
-rw-r--r-- |
description: |
more work on tcompact/thrift, fixed type info in parquet-struct-objects |
1 ;;; gen.lisp --- Parquet Lisp Code Generator 6 (defpackage :dat/parquet/gen ;; not public API 7 (:use :cl :std :dat/proto :dat/json) 8 (:export :load-parquet)) 10 (in-package :dat/parquet/gen) 11 (defparameter *parquet-json-file* 12 (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json"))) 13 (defvar *parquet-json* nil) 14 (defun load-parquet-json (&optional (json-file *parquet-json-file*)) 15 (with-open-file (file json-file) 16 (setq *parquet-json* (json-read file)))) 18 (defun %parquet-json-enums () 19 (json-getf *parquet-json* "enums")) 21 (defun parquet-json-enum-getf (name) 23 (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums)) 26 (defvar *parquet-enums* nil) 28 (defmacro def-parquet-enum (sym name) 30 (defun ,(symbolicate "PARQUET-JSON-" sym) () 31 (mapcar (lambda (x) (keywordicate (snakecase-name-to-lisp-name (json-getf x "name")))) 32 (parquet-json-enum-getf ,name))) 33 (defparameter ,(intern 34 (concatenate 'string "*PARQUET-" (symbol-name sym) "*") 36 (,(symbolicate "PARQUET-JSON-" sym))))) 38 (defun camelcase-name-to-lisp-name (string) 40 (with-output-to-string (name) 41 (loop for i from 0 below (length string) 43 when (and (upper-case-p c) (not (zerop i))) 44 do (write-char #\- name) 45 do (write-char c name))))) 47 (defun snakecase-name-to-lisp-name (string) 49 (substitute #\- #\_ string))) 51 (labels ((parse-type-id (type-id) 53 (string-case (type-id :default nil) 56 ("i16" '(signed-byte 16)) 57 ("i32" '(signed-byte 32)) 58 ("i64" '(signed-byte 64)) 59 ("double" 'double-float) 62 ("binary" 'octet-vector) 64 ("enum" '(signed-byte 32)) 71 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 72 (t (concatenate 'string 74 (camelcase-name-to-lisp-name name)))) 79 (string-case ((json-getf o "typeId")) 80 ("union" (%intern (json-getf o "class"))) 82 (if-let ((elt (json-getf o "elemType" nil))) 83 (%intern (parse-type elt)) 84 (parse-type-id (json-getf o "elemTypeId")))) 86 (if-let ((elt (json-getf o "elemType" nil))) 87 (%intern (parse-type elt)) 88 (parse-type-id (json-getf o "elemTypeId")))) 89 ("struct" (%intern (json-getf o "class"))) 90 ("enum" (%intern (json-getf o "class"))))))) 91 (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system 92 (let* ((type-id (parse-type-id (parquet-struct-field-type-id field))) 93 (type (parse-type (parquet-struct-field-type field))) 94 (required (parquet-struct-field-required field))) 96 ((eql 'vector type-id) `(vector ,type)) 97 (t (or type type-id))))) 98 (if (equal "optional" required) 102 (defun parquet-json-enums () 104 (def-parquet-enum types "Type") 105 (def-parquet-enum converted-types "ConvertedType") 106 (def-parquet-enum field-repetition-types "FieldRepetitionType") 107 (def-parquet-enum encodings "Encoding") 108 (def-parquet-enum compression-codecs "CompressionCodec") 109 (def-parquet-enum page-types "PageType") 110 (def-parquet-enum boundary-orders "BoundaryOrder"))) 113 (defvar *parquet-structs* nil)) 114 (defstruct (parquet-struct 115 (:constructor make-parquet-struct (name doc exceptionp unionp fields))) 116 name doc exceptionp unionp (fields nil :type list)) 118 (defstruct (parquet-struct-field 119 (:constructor make-parquet-struct-field (key name type-id type doc required))) 120 key name type-id type doc required) 122 (defun parquet-json-structs () ;; name doc isException isUnion fields 125 (let ((name (json-getf s "name")) 126 (doc (json-getf s "doc")) 127 (exceptionp (json-getf s "isException")) 128 (unionp (json-getf s "isUnion")) 129 (fields (loop for f in (json-getf s "fields") 131 (let ((key (json-getf f "key")) 132 (name (json-getf f "name")) 133 (type-id (json-getf f "typeId")) 134 ;; json object - needs additional parsing 135 (type (json-getf f "type")) 136 (doc (json-getf f "doc")) 137 (required (json-getf f "required"))) 138 (make-parquet-struct-field key name type-id type doc required))))) 139 (make-parquet-struct name doc exceptionp unionp fields))) 140 (json-getf *parquet-json* "structs"))) 142 (defun parquet-json-namespaces () 143 (json-getf *parquet-json* "namespaces")) 145 (defun init-parquet-json (&optional (file *parquet-json-file*)) 146 (load-parquet-json file) 147 (setq *parquet-enums* (parquet-json-enums)) 148 (setq *parquet-structs* (parquet-json-structs))) 152 ;; (defmethod print-object ((obj parquet-object) stream) 153 ;; "Output a Parquet object to a stream." 154 ;; (print-unreadable-object (obj stream :type t))) 156 (defmacro define-parquet-class (name superclasses slots &rest options) 157 "Define a new subclass of PARQUET-OBJECT with NAME." 158 `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((dat/parquet::parquet-object))) ,slots ,@options)) 164 (defun %define-parquet-structs () 165 "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)." 166 (loop for struct in *parquet-structs* 168 collect (let* ((name (parquet-struct-name struct)) 169 (doc (parquet-struct-doc struct)) 170 (fields (parquet-struct-fields struct)) 171 (class-name (intern (cond 172 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 173 (t (concatenate 'string 175 (camelcase-name-to-lisp-name name)))) 178 (define-parquet-class ,class-name 179 (dat/parquet::parquet-struct-object) 180 (,@(mapcar (lambda (f) 181 (let ((fdoc (parquet-struct-field-doc f)) 182 (fname (snakecase-name-to-lisp-name 183 (parquet-struct-field-name f)))) 184 `(,(intern fname :dat/parquet) 185 ,@(when fdoc `(:documentation ,fdoc)) 186 :initarg ,(keywordicate fname) 188 ,@(when (equal "optional" (parquet-struct-field-required f)) 190 ,@(when-let ((ty (convert-parquet-struct-field-type f))) 193 ,@(when doc `((:documentation ,doc)))) 196 (defmacro define-parquet-structs () 198 ,@(%define-parquet-structs))) 200 (defmacro define-parquet-type (name opts &body body) 201 "Define a parquet type with DEFTYPE which maps to LISP-TYPE." 202 `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body)) 204 (defun define-parquet-types () 205 "Define all known values in *PARQUET-TYPES* using DEFINE-PARQUET-TYPE (DEFTYPE)." 207 (define-parquet-type "BOOLEAN" () 'boolean) 208 (define-parquet-type "INT32" () '(signed-byte 32)) 209 (define-parquet-type "INT64" () '(signed-byte 64)) 210 (define-parquet-type "INT96" () '(signed-byte 96)) 211 (define-parquet-type "FLOAT" () 'float) 212 (define-parquet-type "DOUBLE" () 'double-float) 213 (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size)) 214 (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size)))) 216 (defun load-parquet (&key (file *parquet-json-file*)) 217 (init-parquet-json file) 218 (with-package (:dat/parquet) 219 (let ((types (define-parquet-types))) 221 (export (define-parquet-structs)) 222 (export *parquet-enums*)))