changelog shortlog graph tags branches changeset file revisions annotate raw help

Mercurial > core / lisp/lib/dat/parquet/gen.lisp

revision 540: bd49b7e2c623
child 541: 10c4bb778030
     1.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2+++ b/lisp/lib/dat/parquet/gen.lisp	Fri Jul 12 19:57:18 2024 -0400
     1.3@@ -0,0 +1,183 @@
     1.4+;;; gen.lisp --- Parquet Lisp Code Generator
     1.5+
     1.6+;; 
     1.7+
     1.8+;;; Code:
     1.9+(in-package :dat/parquet/gen)
    1.10+(defparameter *parquet-json-file*
    1.11+  (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")))
    1.12+(defvar *parquet-json* nil)
    1.13+(defun load-parquet-json (&optional (json-file *parquet-json-file*))
    1.14+  (with-open-file (file json-file)
    1.15+    (setq *parquet-json* (json-read file))))
    1.16+
    1.17+(defun %parquet-json-enums ()
    1.18+  (json-getf *parquet-json* "enums"))  
    1.19+
    1.20+(defun parquet-json-enum-getf (name)
    1.21+  (json-getf
    1.22+   (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums))
    1.23+   "members"))
    1.24+
    1.25+(defvar *parquet-enums* nil)
    1.26+
    1.27+(defmacro def-parquet-enum (sym name)
    1.28+  `(progn
    1.29+     (defun ,(symbolicate 'parquet-json- sym) ()
    1.30+       (mapcar (lambda (x) (json-getf x "name")) (parquet-json-enum-getf ,name)))
    1.31+     (defparameter ,(intern
    1.32+                     (concatenate 'string "*PARQUET-" (symbol-name sym) "*")
    1.33+                     :dat/parquet)
    1.34+       (,(symbolicate 'parquet-json- sym)))))
    1.35+
    1.36+(defun camelcase-name-to-lisp-name (string)
    1.37+  (string-upcase
    1.38+   (with-output-to-string (name)
    1.39+     (loop for i from 0 below (length string)
    1.40+           for c across string
    1.41+           when (and (upper-case-p c) (not (zerop i)))
    1.42+           do (write-char #\- name)
    1.43+           do (write-char c name)))))
    1.44+
    1.45+(defun snakecase-name-to-lisp-name (string)
    1.46+  (string-upcase
    1.47+   (substitute #\- #\_ string)))
    1.48+
    1.49+(defun parquet-json-enums ()
    1.50+  (list
    1.51+   (def-parquet-enum types "Type")
    1.52+   (def-parquet-enum converted-types "ConvertedType")
    1.53+   (def-parquet-enum field-repetition-types "FieldRepetitionType")
    1.54+   (def-parquet-enum encodings "Encoding")
    1.55+   (def-parquet-enum compression-codecs "CompressionCodec")
    1.56+   (def-parquet-enum page-types "PageType")
    1.57+   (def-parquet-enum boundary-orders "BoundaryOrder")))
    1.58+
    1.59+(defvar *parquet-structs* nil)
    1.60+(defstruct (parquet-struct
    1.61+            (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
    1.62+  name doc exceptionp unionp (fields nil :type list))
    1.63+
    1.64+(defstruct (parquet-struct-field
    1.65+            (:constructor make-parquet-struct-field (key name type-id type doc required)))
    1.66+  key name type-id type doc required)
    1.67+
    1.68+(defun parquet-destruct-field (field)
    1.69+  (list (parquet-struct-field-name field)
    1.70+        (parquet-struct-field-key field)
    1.71+        (parquet-struct-field-doc field)
    1.72+        (parquet-struct-field-type-id field)
    1.73+        (parquet-struct-field-type field)
    1.74+        (parquet-struct-field-required field)))
    1.75+
    1.76+(defun parquet-destruct (struct)
    1.77+  (list (parquet-struct-name struct)
    1.78+        (parquet-struct-doc struct)
    1.79+        (parquet-struct-unionp struct)
    1.80+        (parquet-struct-exceptionp struct)
    1.81+        (mapcar #'parquet-destruct-field (parquet-struct-fields struct))))
    1.82+
    1.83+(flet ((pq-type-parse (o) (let ((id (json-getf o "typeId")))
    1.84+                            (string-case (id :default (warn 'simple-warning :format-control "unknown typeId: ~A"
    1.85+                                                                            :format-arguments (list id)))
    1.86+                              ("list" (cons id (json-getf o "elemTypeId")))
    1.87+                              ("union" (cons id (json-getf o "class")))
    1.88+                              ("struct" (cons id (json-getf o "class")))
    1.89+                              ("enum" (cons id (json-getf o "class")))))))
    1.90+  (defun parquet-json-structs () ;; name doc isException isUnion fields
    1.91+  (mapcar
    1.92+   (lambda (s)
    1.93+     (let ((name (json-getf s "name"))
    1.94+           (doc (json-getf s "doc"))
    1.95+           (exceptionp (json-getf s "isException"))
    1.96+           (unionp (json-getf s "isUnion"))
    1.97+           (fields (loop for f in (json-getf s "fields")
    1.98+                         collect
    1.99+                            (let ((key (json-getf f "key"))
   1.100+                                  (name (json-getf f "name"))
   1.101+                                  (type-id (json-getf f "typeId"))
   1.102+                                  ;; json object - needs additional parsing
   1.103+                                  (type (when-let ((ty (json-getf f "type")))
   1.104+                                          (pq-type-parse ty)))
   1.105+                                  (doc (json-getf f "doc"))
   1.106+                                  (required (json-getf f "required")))
   1.107+                              (make-parquet-struct-field key name type-id type doc required)))))
   1.108+       (make-parquet-struct name doc exceptionp unionp fields)))
   1.109+   (json-getf *parquet-json* "structs"))))
   1.110+
   1.111+(defun parquet-json-namespaces ()
   1.112+  (json-getf *parquet-json* "namespaces"))
   1.113+
   1.114+(defun init-parquet-json (&optional (file *parquet-json-file*))
   1.115+  (load-parquet-json file)
   1.116+  (setq *parquet-enums* (parquet-json-enums))
   1.117+  (setq *parquet-structs* (parquet-json-structs)))
   1.118+
   1.119+;;; CLOS
   1.120+(defclass parquet-object () ())
   1.121+
   1.122+;; (defmethod print-object ((obj parquet-object) stream)
   1.123+;;   "Output a Parquet object to a stream."
   1.124+;;   (print-unreadable-object (obj stream :type t)))
   1.125+
   1.126+(defmacro define-parquet-class (name superclasses slots &rest options)
   1.127+  "Define a new subclass of PARQUET-OBJECT with NAME."
   1.128+  `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((parquet-object))) ,slots ,@options))
   1.129+
   1.130+(define-parquet-class dat/parquet:parquet-enum-object () ())
   1.131+(define-parquet-class dat/parquet:parquet-struct-object () ())
   1.132+
   1.133+;;; Codegen
   1.134+
   1.135+;; 8)
   1.136+(defun %define-parquet-structs ()
   1.137+  "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)."
   1.138+      (loop for struct in *parquet-structs*
   1.139+            unless (null struct)
   1.140+            collect (let ((name (parquet-struct-name struct))
   1.141+                          (doc (parquet-struct-doc struct))
   1.142+                          (fields (parquet-struct-fields struct)))
   1.143+                      `(define-parquet-class ,(intern (concatenate 'string
   1.144+                                                                   "PARQUET-"
   1.145+                                                                   (camelcase-name-to-lisp-name name))
   1.146+                                                      :dat/parquet)
   1.147+                           (parquet-struct-object)
   1.148+                         (,@(mapcar (lambda (f)
   1.149+                                      (let ((fdoc (parquet-struct-field-doc f))
   1.150+                                            (fname (snakecase-name-to-lisp-name
   1.151+                                                    (parquet-struct-field-name f))))
   1.152+                                        `(,(symbolicate fname)
   1.153+                                          ,@(when fdoc `(:documentation ,fdoc))
   1.154+                                          :initarg ,(keywordicate fname)
   1.155+                                          ;; TODO 2024-07-12: 
   1.156+                                          ,@(when (equal "optional" (parquet-struct-field-required f))
   1.157+                                              `(:initform nil)))))
   1.158+                                    fields))
   1.159+                         ,@(when doc `((:documentation ,doc)))))))
   1.160+
   1.161+(defmacro define-parquet-structs ()
   1.162+  `(list
   1.163+     ,@(%define-parquet-structs)))
   1.164+
   1.165+(defmacro define-parquet-type (name opts &body body)
   1.166+  "Define a parquet type with DEFTYPE which maps to LISP-TYPE."
   1.167+  `(deftype ,(intern (concatenate 'string "PARQUET-" (substitute #\- #\_ name)) :dat/parquet) ,opts ,@body))
   1.168+
   1.169+(defun define-parquet-types ()
   1.170+  "Define all known values in *PARQUET-TYPES* using DEFINE-PARQUET-TYPE (DEFTYPE)."
   1.171+  (list
   1.172+   (define-parquet-type "BOOLEAN" () 'boolean)
   1.173+   (define-parquet-type "INT32" () '(signed-byte 32))
   1.174+   (define-parquet-type "INT64" () '(signed-byte 64))
   1.175+   (define-parquet-type "INT96" () '(signed-byte 96))
   1.176+   (define-parquet-type "FLOAT" () 'float)
   1.177+   (define-parquet-type "DOUBLE" () 'double-float)
   1.178+   (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size))
   1.179+   (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size))))
   1.180+
   1.181+(defun load-parquet (&key (file *parquet-json-file*))
   1.182+  (init-parquet-json file)
   1.183+  (with-package (:dat/parquet)
   1.184+    (export (define-parquet-types))
   1.185+    (export (mapcar 'class-name (define-parquet-structs)))
   1.186+    (export *parquet-enums*)))