Mercurial > core / lisp/lib/dat/parquet/gen.lisp
changeset 635: |
849f72b72b41 |
parent: |
4d34907c69eb
|
child: |
b88bf15f60d0 |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Mon, 02 Sep 2024 18:31:19 -0400 |
permissions: |
-rw-r--r-- |
description: |
add back fuzz.lisp and proper codegen for parquet.json thrift definitions |
1 ;;; gen.lisp --- Parquet Lisp Code Generator 6 (defpackage :dat/parquet/gen ;; not public API 7 (:use :cl :std :dat/proto :dat/json) 8 (:export :load-parquet)) 10 (in-package :dat/parquet/gen) 12 (defparameter *parquet-json-file* 13 (or (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")) 14 (warn "*PARQUET-JSON-FILE* not found"))) 16 (defparameter *parquet-output-file* 17 #.(asdf:system-relative-pathname :dat #P"parquet/thrift.lisp")) 19 (defvar *parquet-json* nil) 21 (defun %parquet-json-enums () 22 (json-getf *parquet-json* "enums")) 24 (defun parquet-json-enum-getf (name) 26 (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums)) 29 (defvar *parquet-enums* nil) 31 (defmacro define-parquet-enum (sym name) 33 (defun ,(symbolicate "PARQUET-JSON-" sym) () 34 (mapcar (lambda (x) (keywordicate (snakecase-name-to-lisp-name (json-getf x "name")))) 35 (parquet-json-enum-getf ,name))) 36 (defparameter ,(intern 37 (concatenate 'string "*PARQUET-" (symbol-name sym) "*") 39 (,(symbolicate "PARQUET-JSON-" sym))))) 41 (defun camelcase-name-to-lisp-name (string) 43 (with-output-to-string (name) 44 (loop for i from 0 below (length string) 46 when (and (upper-case-p c) (not (zerop i))) 47 do (write-char #\- name) 48 do (write-char c name))))) 50 (defun snakecase-name-to-lisp-name (string) 52 (substitute #\- #\_ string))) 54 (labels ((parse-type-id (type-id) 56 (string-case (type-id :default nil) 59 ("i16" '(signed-byte 16)) 60 ("i32" '(signed-byte 32)) 61 ("i64" '(signed-byte 64)) 62 ("double" 'double-float) 65 ("binary" 'octet-vector) 67 ("enum" '(signed-byte 32)) 74 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 75 (t (concatenate 'string 77 (camelcase-name-to-lisp-name name)))) 82 (string-case ((json-getf o "typeId")) 83 ("union" (%intern (json-getf o "class"))) 85 (if-let ((elt (json-getf o "elemType" nil))) 86 (%intern (parse-type elt)) 87 (parse-type-id (json-getf o "elemTypeId")))) 89 (if-let ((elt (json-getf o "elemType" nil))) 90 (%intern (parse-type elt)) 91 (parse-type-id (json-getf o "elemTypeId")))) 92 ("struct" (%intern (json-getf o "class"))) 93 ("enum" (%intern (json-getf o "class"))))))) 94 (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system 95 (let* ((type-id (parse-type-id (parquet-struct-field-type-id field))) 96 (type (parse-type (parquet-struct-field-type field))) 97 (required (parquet-struct-field-required field))) 99 ((eql 'vector type-id) `(vector ,type)) 100 (t (or type type-id))))) 101 (if (equal "optional" required) 105 (defparameter *parquet-structs* nil) 107 (defstruct (parquet-struct 108 (:constructor make-parquet-struct (name doc exceptionp unionp fields))) 109 name doc exceptionp unionp (fields nil :type list)) 111 (defstruct (parquet-struct-field 112 (:constructor make-parquet-struct-field (key name type-id type doc required))) 113 key name type-id type doc required) 115 (defun parquet-json-structs () ;; name doc isException isUnion fields 118 (let ((name (json-getf s "name")) 119 (doc (json-getf s "doc")) 120 (exceptionp (json-getf s "isException")) 121 (unionp (json-getf s "isUnion")) 122 (fields (loop for f in (json-getf s "fields") 124 (let ((key (json-getf f "key")) 125 (name (json-getf f "name")) 126 (type-id (json-getf f "typeId")) 127 ;; json object - needs additional parsing 128 (type (json-getf f "type")) 129 (doc (json-getf f "doc")) 130 (required (json-getf f "required"))) 131 (make-parquet-struct-field key name type-id type doc required))))) 132 (make-parquet-struct name doc exceptionp unionp fields))) 133 (json-getf *parquet-json* "structs"))) 135 (defun parquet-json-namespaces () 136 (json-getf *parquet-json* "namespaces")) 139 (defun init-parquet-json (&optional (file *parquet-json-file*)) 140 (with-open-file (file file) 141 (setq *parquet-json* (json-read file))) 142 (setq *parquet-enums* (parquet-json-enums)) 143 (setq *parquet-structs* (parquet-json-structs)))) 147 ;; (defmethod print-object ((obj parquet-object) stream) 148 ;; "Output a Parquet object to a stream." 149 ;; (print-unreadable-object (obj stream :type t))) 151 (defmacro define-parquet-class (name superclasses slots &rest options) 152 "Define a new subclass of PARQUET-OBJECT with NAME." 153 `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((dat/parquet::parquet-object))) ,slots ,@options)) 159 (defun %define-parquet-structs () 160 "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)." 161 (loop for struct in *parquet-structs* 163 collect (let* ((name (parquet-struct-name struct)) 164 (doc (parquet-struct-doc struct)) 165 (fields (parquet-struct-fields struct)) 166 (class-name (intern (cond 167 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 168 (t (concatenate 'string 170 (camelcase-name-to-lisp-name name)))) 173 (defclass ,class-name (dat/parquet::parquet-object) 174 (,@(mapcar (lambda (f) 175 (let ((fdoc (parquet-struct-field-doc f)) 176 (fname (snakecase-name-to-lisp-name 177 (parquet-struct-field-name f)))) 178 `(,(intern fname :dat/parquet) 179 ,@(when fdoc `(:documentation ,fdoc)) 180 :initarg ,(keywordicate fname) 182 ,@(when (equal "optional" (parquet-struct-field-required f)) 184 ,@(when-let ((ty (convert-parquet-struct-field-type f))) 187 ,@(when doc `((:documentation ,doc))))))))) 189 (defmacro define-parquet-type (name opts &body body) 190 "Define a parquet type with DEFTYPE which maps to LISP-TYPE." 191 `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body)) 193 (defun parse-parquet-thrift-definitions (&key (input *parquet-json-file*) 194 (output #.(asdf:system-relative-pathname :dat "parquet/thrift.lisp"))) 195 (init-parquet-json input) 196 (with-open-file (defs output :direction :output :if-exists :supersede :if-does-not-exist :create) 197 (format defs ";;; ~a --- Parquet Thrift Definitions -*- buffer-read-only:t -*- 201 ;; This file was generated automatically by 202 ;; DAT/PARQUET/GEN:PARSE-PARQUET-THRIFT-DEFINITIONS 207 (in-package :dat/parquet)" output input) 209 (let ((enums '((define-parquet-enum types "Type") 210 (define-parquet-enum converted-types "ConvertedType") 211 (define-parquet-enum field-repetition-types "FieldRepetitionType") 212 (define-parquet-enum encodings "Encoding") 213 (define-parquet-enum compression-codecs "CompressionCodec") 214 (define-parquet-enum page-types "PageType") 215 (define-parquet-enum boundary-orders "BoundaryOrder"))) 216 (types '((define-parquet-type "BOOLEAN" () 'boolean) 217 (define-parquet-type "INT32" () '(signed-byte 32)) 218 (define-parquet-type "INT64" () '(signed-byte 64)) 219 (define-parquet-type "INT96" () '(signed-byte 96)) 220 (define-parquet-type "FLOAT" () 'float) 221 (define-parquet-type "DOUBLE" () 'double-float) 222 (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size)) 223 (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size)))) 224 (structs (mapcar #'macroexpand-1 (%define-parquet-structs)))) 225 ;; expands to a progn, so we just take the cdr 227 (dolist (f (cdr (macroexpand en))) 228 (write f :stream defs :case :downcase :readably t) 231 (dolist (f (cdr (macroexpand ty))) 232 (write f :stream defs :case :downcase :readably t) 235 (dolist (f (cdr (macroexpand st))) 236 (write f :stream defs :case :downcase :readably t)