1.1--- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2+++ b/lisp/lib/dat/parquet/gen.lisp Fri Jul 12 19:57:18 2024 -0400
1.3@@ -0,0 +1,183 @@
1.4+;;; gen.lisp --- Parquet Lisp Code Generator
1.5+
1.6+;;
1.7+
1.8+;;; Code:
1.9+(in-package :dat/parquet/gen)
1.10+(defparameter *parquet-json-file*
1.11+ (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")))
1.12+(defvar *parquet-json* nil)
1.13+(defun load-parquet-json (&optional (json-file *parquet-json-file*))
1.14+ (with-open-file (file json-file)
1.15+ (setq *parquet-json* (json-read file))))
1.16+
1.17+(defun %parquet-json-enums ()
1.18+ (json-getf *parquet-json* "enums"))
1.19+
1.20+(defun parquet-json-enum-getf (name)
1.21+ (json-getf
1.22+ (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums))
1.23+ "members"))
1.24+
1.25+(defvar *parquet-enums* nil)
1.26+
1.27+(defmacro def-parquet-enum (sym name)
1.28+ `(progn
1.29+ (defun ,(symbolicate 'parquet-json- sym) ()
1.30+ (mapcar (lambda (x) (json-getf x "name")) (parquet-json-enum-getf ,name)))
1.31+ (defparameter ,(intern
1.32+ (concatenate 'string "*PARQUET-" (symbol-name sym) "*")
1.33+ :dat/parquet)
1.34+ (,(symbolicate 'parquet-json- sym)))))
1.35+
1.36+(defun camelcase-name-to-lisp-name (string)
1.37+ (string-upcase
1.38+ (with-output-to-string (name)
1.39+ (loop for i from 0 below (length string)
1.40+ for c across string
1.41+ when (and (upper-case-p c) (not (zerop i)))
1.42+ do (write-char #\- name)
1.43+ do (write-char c name)))))
1.44+
1.45+(defun snakecase-name-to-lisp-name (string)
1.46+ (string-upcase
1.47+ (substitute #\- #\_ string)))
1.48+
1.49+(defun parquet-json-enums ()
1.50+ (list
1.51+ (def-parquet-enum types "Type")
1.52+ (def-parquet-enum converted-types "ConvertedType")
1.53+ (def-parquet-enum field-repetition-types "FieldRepetitionType")
1.54+ (def-parquet-enum encodings "Encoding")
1.55+ (def-parquet-enum compression-codecs "CompressionCodec")
1.56+ (def-parquet-enum page-types "PageType")
1.57+ (def-parquet-enum boundary-orders "BoundaryOrder")))
1.58+
1.59+(defvar *parquet-structs* nil)
1.60+(defstruct (parquet-struct
1.61+ (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
1.62+ name doc exceptionp unionp (fields nil :type list))
1.63+
1.64+(defstruct (parquet-struct-field
1.65+ (:constructor make-parquet-struct-field (key name type-id type doc required)))
1.66+ key name type-id type doc required)
1.67+
1.68+(defun parquet-destruct-field (field)
1.69+ (list (parquet-struct-field-name field)
1.70+ (parquet-struct-field-key field)
1.71+ (parquet-struct-field-doc field)
1.72+ (parquet-struct-field-type-id field)
1.73+ (parquet-struct-field-type field)
1.74+ (parquet-struct-field-required field)))
1.75+
1.76+(defun parquet-destruct (struct)
1.77+ (list (parquet-struct-name struct)
1.78+ (parquet-struct-doc struct)
1.79+ (parquet-struct-unionp struct)
1.80+ (parquet-struct-exceptionp struct)
1.81+ (mapcar #'parquet-destruct-field (parquet-struct-fields struct))))
1.82+
1.83+(flet ((pq-type-parse (o) (let ((id (json-getf o "typeId")))
1.84+ (string-case (id :default (warn 'simple-warning :format-control "unknown typeId: ~A"
1.85+ :format-arguments (list id)))
1.86+ ("list" (cons id (json-getf o "elemTypeId")))
1.87+ ("union" (cons id (json-getf o "class")))
1.88+ ("struct" (cons id (json-getf o "class")))
1.89+ ("enum" (cons id (json-getf o "class")))))))
1.90+ (defun parquet-json-structs () ;; name doc isException isUnion fields
1.91+ (mapcar
1.92+ (lambda (s)
1.93+ (let ((name (json-getf s "name"))
1.94+ (doc (json-getf s "doc"))
1.95+ (exceptionp (json-getf s "isException"))
1.96+ (unionp (json-getf s "isUnion"))
1.97+ (fields (loop for f in (json-getf s "fields")
1.98+ collect
1.99+ (let ((key (json-getf f "key"))
1.100+ (name (json-getf f "name"))
1.101+ (type-id (json-getf f "typeId"))
1.102+ ;; json object - needs additional parsing
1.103+ (type (when-let ((ty (json-getf f "type")))
1.104+ (pq-type-parse ty)))
1.105+ (doc (json-getf f "doc"))
1.106+ (required (json-getf f "required")))
1.107+ (make-parquet-struct-field key name type-id type doc required)))))
1.108+ (make-parquet-struct name doc exceptionp unionp fields)))
1.109+ (json-getf *parquet-json* "structs"))))
1.110+
1.111+(defun parquet-json-namespaces ()
1.112+ (json-getf *parquet-json* "namespaces"))
1.113+
1.114+(defun init-parquet-json (&optional (file *parquet-json-file*))
1.115+ (load-parquet-json file)
1.116+ (setq *parquet-enums* (parquet-json-enums))
1.117+ (setq *parquet-structs* (parquet-json-structs)))
1.118+
1.119+;;; CLOS
1.120+(defclass parquet-object () ())
1.121+
1.122+;; (defmethod print-object ((obj parquet-object) stream)
1.123+;; "Output a Parquet object to a stream."
1.124+;; (print-unreadable-object (obj stream :type t)))
1.125+
1.126+(defmacro define-parquet-class (name superclasses slots &rest options)
1.127+ "Define a new subclass of PARQUET-OBJECT with NAME."
1.128+ `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((parquet-object))) ,slots ,@options))
1.129+
1.130+(define-parquet-class dat/parquet:parquet-enum-object () ())
1.131+(define-parquet-class dat/parquet:parquet-struct-object () ())
1.132+
1.133+;;; Codegen
1.134+
1.135+;; 8)
1.136+(defun %define-parquet-structs ()
1.137+ "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)."
1.138+ (loop for struct in *parquet-structs*
1.139+ unless (null struct)
1.140+ collect (let ((name (parquet-struct-name struct))
1.141+ (doc (parquet-struct-doc struct))
1.142+ (fields (parquet-struct-fields struct)))
1.143+ `(define-parquet-class ,(intern (concatenate 'string
1.144+ "PARQUET-"
1.145+ (camelcase-name-to-lisp-name name))
1.146+ :dat/parquet)
1.147+ (parquet-struct-object)
1.148+ (,@(mapcar (lambda (f)
1.149+ (let ((fdoc (parquet-struct-field-doc f))
1.150+ (fname (snakecase-name-to-lisp-name
1.151+ (parquet-struct-field-name f))))
1.152+ `(,(symbolicate fname)
1.153+ ,@(when fdoc `(:documentation ,fdoc))
1.154+ :initarg ,(keywordicate fname)
1.155+ ;; TODO 2024-07-12:
1.156+ ,@(when (equal "optional" (parquet-struct-field-required f))
1.157+ `(:initform nil)))))
1.158+ fields))
1.159+ ,@(when doc `((:documentation ,doc)))))))
1.160+
1.161+(defmacro define-parquet-structs ()
1.162+ `(list
1.163+ ,@(%define-parquet-structs)))
1.164+
1.165+(defmacro define-parquet-type (name opts &body body)
1.166+ "Define a parquet type with DEFTYPE which maps to LISP-TYPE."
1.167+ `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body))
1.168+
1.169+(defun define-parquet-types ()
1.170+ "Define all known values in *PARQUET-TYPES* using DEFINE-PARQUET-TYPE (DEFTYPE)."
1.171+ (list
1.172+ (define-parquet-type "BOOLEAN" () 'boolean)
1.173+ (define-parquet-type "INT32" () '(signed-byte 32))
1.174+ (define-parquet-type "INT64" () '(signed-byte 64))
1.175+ (define-parquet-type "INT96" () '(signed-byte 96))
1.176+ (define-parquet-type "FLOAT" () 'float)
1.177+ (define-parquet-type "DOUBLE" () 'double-float)
1.178+ (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size))
1.179+ (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size))))
1.180+
1.181+(defun load-parquet (&key (file *parquet-json-file*))
1.182+ (init-parquet-json file)
1.183+ (with-package (:dat/parquet)
1.184+ (export (define-parquet-types))
1.185+ (export (mapcar 'class-name (define-parquet-structs)))
1.186+ (export *parquet-enums*)))