Mercurial > core / lisp/lib/dat/parquet/gen.lisp
changeset 640: |
642b3b82b20d |
parent: |
b88bf15f60d0
|
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Sun, 08 Sep 2024 17:35:03 -0400 |
permissions: |
-rw-r--r-- |
description: |
thrift fixes, org-get-with-inheritance init |
1 ;;; gen.lisp --- Parquet Lisp Code Generator 6 (defpackage :dat/parquet/gen ;; not public API 7 (:use :cl :std :dat/proto :dat/json) 8 (:export :load-parquet)) 10 (in-package :dat/parquet/gen) 12 (defparameter *parquet-json-file* 13 (or (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")) 14 (warn "*PARQUET-JSON-FILE* not found"))) 16 (defparameter *parquet-output-file* 17 #.(asdf:system-relative-pathname :dat #P"parquet/thrift.lisp")) 19 (defvar *parquet-json* nil) 22 (defun %parquet-json-enums () 23 (json-getf *parquet-json* "enums")) 25 (defun dat/parquet::parquet-json-enum-getf (name) 27 (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums)) 30 (defun dat/parquet::snakecase-name-to-lisp-name (string) 32 (substitute #\- #\_ string))) 34 (defun dat/parquet::camelcase-name-to-lisp-name (string) 36 (with-output-to-string (name) 37 (loop for i from 0 below (length string) 39 when (and (upper-case-p c) (not (zerop i))) 40 do (write-char #\- name) 41 do (write-char c name)))))) 43 (defvar *parquet-enums* nil) 45 (defmacro define-parquet-enum (sym name) 47 (defvar ,(symbolicate "*PARQUET-JSON-" sym "*") 48 ',(mapcar (lambda (x) (keywordicate (dat/parquet::snakecase-name-to-lisp-name (json-getf x "name")))) 49 (dat/parquet::parquet-json-enum-getf name))))) 51 (labels ((parse-type-id (type-id) 53 (string-case (type-id :default nil) 56 ("i16" '(signed-byte 16)) 57 ("i32" '(signed-byte 32)) 58 ("i64" '(signed-byte 64)) 59 ("double" 'double-float) 62 ("binary" 'octet-vector) 64 ("enum" '(signed-byte 32)) 71 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 72 (t (concatenate 'string 74 (dat/parquet::camelcase-name-to-lisp-name name))))) 78 (string-case ((json-getf o "typeId")) 79 ("union" (%intern (json-getf o "class"))) 81 (if-let ((elt (json-getf o "elemType" nil))) 82 (%intern (parse-type elt)) 83 (parse-type-id (json-getf o "elemTypeId")))) 85 (if-let ((elt (json-getf o "elemType" nil))) 86 (%intern (parse-type elt)) 87 (parse-type-id (json-getf o "elemTypeId")))) 88 ("struct" (%intern (json-getf o "class"))) 89 ("enum" (%intern (json-getf o "class"))))))) 90 (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system 91 (let* ((type-id (parse-type-id (parquet-struct-field-type-id field))) 92 (type (parse-type (parquet-struct-field-type field))) 93 (required (parquet-struct-field-required field))) 95 ((eql 'vector type-id) `(vector ,type)) 96 (t (or type type-id))))) 97 (if (equal "optional" required) 101 (defparameter *parquet-structs* nil) 103 (defstruct (parquet-struct 104 (:constructor make-parquet-struct (name doc exceptionp unionp fields))) 105 name doc exceptionp unionp (fields nil :type list)) 107 (defstruct (parquet-struct-field 108 (:constructor make-parquet-struct-field (key name type-id type doc required))) 109 key name type-id type doc required) 111 (defun parquet-json-structs () ;; name doc isException isUnion fields 114 (let ((name (json-getf s "name")) 115 (doc (json-getf s "doc")) 116 (exceptionp (json-getf s "isException")) 117 (unionp (json-getf s "isUnion")) 118 (fields (loop for f in (json-getf s "fields") 120 (let ((key (json-getf f "key")) 121 (name (json-getf f "name")) 122 (type-id (json-getf f "typeId")) 123 ;; json object - needs additional parsing 124 (type (json-getf f "type")) 125 (doc (json-getf f "doc")) 126 (required (json-getf f "required"))) 127 (make-parquet-struct-field key name type-id type doc required))))) 128 (make-parquet-struct name doc exceptionp unionp fields))) 129 (json-getf *parquet-json* "structs"))) 131 (defun parquet-json-namespaces () 132 (json-getf *parquet-json* "namespaces")) 135 (defun init-parquet-json (&optional (file *parquet-json-file*)) 136 (with-open-file (file file) 137 (setq *parquet-json* (json-read file))) 138 (setq *parquet-enums* (%parquet-json-enums)) 139 (setq *parquet-structs* (parquet-json-structs)))) 143 ;; (defmethod print-object ((obj parquet-object) stream) 144 ;; "Output a Parquet object to a stream." 145 ;; (print-unreadable-object (obj stream :type t))) 147 (defmacro define-parquet-class (name superclasses slots &rest options) 148 "Define a new subclass of PARQUET-OBJECT with NAME." 149 `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((dat/parquet::parquet-object))) ,slots ,@options)) 155 (defun %define-parquet-structs () 156 "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)." 157 (loop for struct in *parquet-structs* 159 collect (let* ((name (parquet-struct-name struct)) 160 (doc (parquet-struct-doc struct)) 161 (fields (parquet-struct-fields struct)) 162 (class-name (symbolicate (cond 163 ((equal name "UUIDType") "PARQUET-UUID-TYPE") 164 (t (concatenate 'string 166 (dat/parquet::camelcase-name-to-lisp-name name))))))) 168 (defclass ,class-name (dat/parquet::parquet-object) 169 (,@(mapcar (lambda (f) 170 (let ((fdoc (parquet-struct-field-doc f)) 171 (fname (dat/parquet::snakecase-name-to-lisp-name 172 (parquet-struct-field-name f)))) 173 `(,(symbolicate fname) 174 ,@(when fdoc `(:documentation ,fdoc)) 175 :initarg ,(keywordicate fname) 177 ,@(when (equal "optional" (parquet-struct-field-required f)) 179 ,@(when-let ((ty (convert-parquet-struct-field-type f))) 182 ,@(when doc `((:documentation ,doc))))))))) 184 (defmacro define-parquet-type (name opts &body body) 185 "Define a parquet type with DEFTYPE which maps to LISP-TYPE." 186 `(progn (deftype ,(symbolicate "PARQUET-" (substitute #\- #\_ name)) ,opts ,@body))) 188 (defun parse-parquet-thrift-definitions (&key (input *parquet-json-file*) 189 (output #.(asdf:system-relative-pathname :dat "parquet/thrift.lisp"))) 190 (init-parquet-json input) 191 (with-open-file (defs output :direction :output :if-exists :supersede :if-does-not-exist :create) 192 (format defs ";;; ~a --- Parquet Thrift Definitions -*- buffer-read-only:t -*- 196 ;; This file was generated automatically by 197 ;; DAT/PARQUET/GEN:PARSE-PARQUET-THRIFT-DEFINITIONS 202 (in-package :dat/parquet)" output input) 204 (let ((enums '((define-parquet-enum types "Type") 205 (define-parquet-enum converted-types "ConvertedType") 206 (define-parquet-enum field-repetition-types "FieldRepetitionType") 207 (define-parquet-enum encodings "Encoding") 208 (define-parquet-enum compression-codecs "CompressionCodec") 209 (define-parquet-enum page-types "PageType") 210 (define-parquet-enum boundary-orders "BoundaryOrder"))) 211 (types '((define-parquet-type "BOOLEAN" () 'boolean) 212 (define-parquet-type "INT32" () '(signed-byte 32)) 213 (define-parquet-type "INT64" () '(signed-byte 64)) 214 (define-parquet-type "INT96" () '(signed-byte 96)) 215 (define-parquet-type "FLOAT" () 'float) 216 (define-parquet-type "DOUBLE" () 'double-float) 217 (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size)) 218 (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size)))) 219 (structs (mapcar #'macroexpand-1 (%define-parquet-structs)))) 220 ;; expands to a progn, so we just take the cdr 222 (dolist (f (cdr (macroexpand en))) 223 (write f :stream defs :case :downcase :readably t) 226 (dolist (f (cdr (macroexpand ty))) 227 (write f :stream defs :case :downcase :readably t) 230 (dolist (f (cdr (macroexpand st))) 231 (write f :stream defs :case :downcase :readably t)