Mercurial > core / lisp/lib/dat/parquet/gen.lisp
changeset 637: |
b88bf15f60d0 |
parent: |
849f72b72b41
|
child: |
642b3b82b20d |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Wed, 04 Sep 2024 22:02:21 -0400 |
permissions: |
-rw-r--r-- |
description: |
parquet tweaks, import ox-man |
1 ;;; gen.lisp --- Parquet Lisp Code Generator 6 (defpackage :dat/parquet/gen ;; not public API 7 (:use :cl :std :dat/proto :dat/json) 8 (:export :load-parquet)) 10 (in-package :dat/parquet/gen) 12 (defparameter *parquet-json-file* 13 (or (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")) 14 (warn "*PARQUET-JSON-FILE* not found"))) 16 (defparameter *parquet-output-file* 17 #.(asdf:system-relative-pathname :dat #P"parquet/thrift.lisp")) 19 (defvar *parquet-json* nil) 21 (defun %parquet-json-enums () 22 (json-getf *parquet-json* "enums")) 24 (defun dat/parquet::parquet-json-enum-getf (name) 26 (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums)) 29 (defun dat/parquet::snakecase-name-to-lisp-name (string) 31 (substitute #\- #\_ string))) 33 (defun dat/parquet::camelcase-name-to-lisp-name (string) 35 (with-output-to-string (name) 36 (loop for i from 0 below (length string) 38 when (and (upper-case-p c) (not (zerop i))) 39 do (write-char #\- name) 40 do (write-char c name))))) 42 (defvar *parquet-enums* nil) 44 (defmacro define-parquet-enum (sym name) 46 (defun ,(symbolicate "PARQUET-JSON-" sym) () 47 (mapcar (lambda (x) (keywordicate (snakecase-name-to-lisp-name (json-getf x "name")))) 48 (parquet-json-enum-getf ,name))) 49 (defparameter ,(symbolicate 50 (concatenate 'string "*PARQUET-" (symbol-name sym) "*")) 51 (,(symbolicate "PARQUET-JSON-" sym))))) 53 (labels ((parse-type-id (type-id) 55 (string-case (type-id :default nil) 58 ("i16" '(signed-byte 16)) 59 ("i32" '(signed-byte 32)) 60 ("i64" '(signed-byte 64)) 61 ("double" 'double-float) 64 ("binary" 'octet-vector) 66 ("enum" '(signed-byte 32)) 73 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 74 (t (concatenate 'string 76 (camelcase-name-to-lisp-name name))))) 80 (string-case ((json-getf o "typeId")) 81 ("union" (%intern (json-getf o "class"))) 83 (if-let ((elt (json-getf o "elemType" nil))) 84 (%intern (parse-type elt)) 85 (parse-type-id (json-getf o "elemTypeId")))) 87 (if-let ((elt (json-getf o "elemType" nil))) 88 (%intern (parse-type elt)) 89 (parse-type-id (json-getf o "elemTypeId")))) 90 ("struct" (%intern (json-getf o "class"))) 91 ("enum" (%intern (json-getf o "class"))))))) 92 (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system 93 (let* ((type-id (parse-type-id (parquet-struct-field-type-id field))) 94 (type (parse-type (parquet-struct-field-type field))) 95 (required (parquet-struct-field-required field))) 97 ((eql 'vector type-id) `(vector ,type)) 98 (t (or type type-id))))) 99 (if (equal "optional" required) 103 (defparameter *parquet-structs* nil) 105 (defstruct (parquet-struct 106 (:constructor make-parquet-struct (name doc exceptionp unionp fields))) 107 name doc exceptionp unionp (fields nil :type list)) 109 (defstruct (parquet-struct-field 110 (:constructor make-parquet-struct-field (key name type-id type doc required))) 111 key name type-id type doc required) 113 (defun parquet-json-structs () ;; name doc isException isUnion fields 116 (let ((name (json-getf s "name")) 117 (doc (json-getf s "doc")) 118 (exceptionp (json-getf s "isException")) 119 (unionp (json-getf s "isUnion")) 120 (fields (loop for f in (json-getf s "fields") 122 (let ((key (json-getf f "key")) 123 (name (json-getf f "name")) 124 (type-id (json-getf f "typeId")) 125 ;; json object - needs additional parsing 126 (type (json-getf f "type")) 127 (doc (json-getf f "doc")) 128 (required (json-getf f "required"))) 129 (make-parquet-struct-field key name type-id type doc required))))) 130 (make-parquet-struct name doc exceptionp unionp fields))) 131 (json-getf *parquet-json* "structs"))) 133 (defun parquet-json-namespaces () 134 (json-getf *parquet-json* "namespaces")) 137 (defun init-parquet-json (&optional (file *parquet-json-file*)) 138 (with-open-file (file file) 139 (setq *parquet-json* (json-read file))) 140 (setq *parquet-enums* (%parquet-json-enums)) 141 (setq *parquet-structs* (parquet-json-structs)))) 145 ;; (defmethod print-object ((obj parquet-object) stream) 146 ;; "Output a Parquet object to a stream." 147 ;; (print-unreadable-object (obj stream :type t))) 149 (defmacro define-parquet-class (name superclasses slots &rest options) 150 "Define a new subclass of PARQUET-OBJECT with NAME." 151 `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((dat/parquet::parquet-object))) ,slots ,@options)) 157 (defun %define-parquet-structs () 158 "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)." 159 (loop for struct in *parquet-structs* 161 collect (let* ((name (parquet-struct-name struct)) 162 (doc (parquet-struct-doc struct)) 163 (fields (parquet-struct-fields struct)) 164 (class-name (symbolicate (cond 165 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 166 (t (concatenate 'string 168 (camelcase-name-to-lisp-name name))))))) 170 (defclass ,class-name (dat/parquet::parquet-object) 171 (,@(mapcar (lambda (f) 172 (let ((fdoc (parquet-struct-field-doc f)) 173 (fname (snakecase-name-to-lisp-name 174 (parquet-struct-field-name f)))) 175 `(,(symbolicate fname) 176 ,@(when fdoc `(:documentation ,fdoc)) 177 :initarg ,(keywordicate fname) 179 ,@(when (equal "optional" (parquet-struct-field-required f)) 181 ,@(when-let ((ty (convert-parquet-struct-field-type f))) 184 ,@(when doc `((:documentation ,doc))))))))) 186 (defmacro define-parquet-type (name opts &body body) 187 "Define a parquet type with DEFTYPE which maps to LISP-TYPE." 188 `(deftype ,(symbolicate "PARQUET-" (substitute #\- #\_ name)) ,opts ,@body)) 190 (defun parse-parquet-thrift-definitions (&key (input *parquet-json-file*) 191 (output #.(asdf:system-relative-pathname :dat "parquet/thrift.lisp"))) 192 (init-parquet-json input) 193 (with-open-file (defs output :direction :output :if-exists :supersede :if-does-not-exist :create) 194 (format defs ";;; ~a --- Parquet Thrift Definitions -*- buffer-read-only:t -*- 198 ;; This file was generated automatically by 199 ;; DAT/PARQUET/GEN:PARSE-PARQUET-THRIFT-DEFINITIONS 204 (in-package :dat/parquet)" output input) 206 (let ((enums '((define-parquet-enum types "Type") 207 (define-parquet-enum converted-types "ConvertedType") 208 (define-parquet-enum field-repetition-types "FieldRepetitionType") 209 (define-parquet-enum encodings "Encoding") 210 (define-parquet-enum compression-codecs "CompressionCodec") 211 (define-parquet-enum page-types "PageType") 212 (define-parquet-enum boundary-orders "BoundaryOrder"))) 213 (types '((define-parquet-type "BOOLEAN" () 'boolean) 214 (define-parquet-type "INT32" () '(signed-byte 32)) 215 (define-parquet-type "INT64" () '(signed-byte 64)) 216 (define-parquet-type "INT96" () '(signed-byte 96)) 217 (define-parquet-type "FLOAT" () 'float) 218 (define-parquet-type "DOUBLE" () 'double-float) 219 (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size)) 220 (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size)))) 221 (structs (mapcar #'macroexpand-1 (%define-parquet-structs)))) 222 ;; expands to a progn, so we just take the cdr 224 (dolist (f (cdr (macroexpand en))) 225 (write f :stream defs :case :downcase :readably t) 228 (dolist (f (cdr (macroexpand ty))) 229 (write f :stream defs :case :downcase :readably t) 232 (dolist (f (cdr (macroexpand st))) 233 (write f :stream defs :case :downcase :readably t)