changelog shortlog graph tags branches files raw help

Mercurial > core / changeset: parquet+arrow

changeset 539: cf0c1933289f
parent 538: d84e518059be
child 540: bd49b7e2c623
author: Richard Westhaver <ellis@rwest.io>
date: Thu, 11 Jul 2024 21:40:29 -0400
files: lisp/ffi/arrow/arrow.asd lisp/ffi/arrow/pkg.lisp lisp/ffi/arrow/tests.lisp lisp/lib/dat/parquet.lisp
description: parquet+arrow
     1.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2+++ b/lisp/ffi/arrow/arrow.asd	Thu Jul 11 21:40:29 2024 -0400
     1.3@@ -0,0 +1,11 @@
     1.4+;;; arrow.asd --- Apache Arrow Sytem Definitions
     1.5+(defsystem :arrow
     1.6+  :depends-on (:std :log)
     1.7+  :description "A thin FFI wrapper for Arrow."
     1.8+  :components ((:file "pkg"))
     1.9+  :in-order-to ((test-op (test-op "arrow/tests"))))
    1.10+
    1.11+(defsystem :arrow/tests
    1.12+  :depends-on (:std :log :rt :arrow)
    1.13+  :components ((:file "tests"))
    1.14+  :perform (test-op (o c) (symbol-call :rt :do-tests :arrow)))
     2.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2+++ b/lisp/ffi/arrow/pkg.lisp	Thu Jul 11 21:40:29 2024 -0400
     2.3@@ -0,0 +1,40 @@
     2.4+;;; pkg.lisp --- Apache Arrow FFI
     2.5+
     2.6+;; 
     2.7+
     2.8+;;; Code:
     2.9+(defpackage :arrow
    2.10+  (:use :cl :std :sb-alien)
    2.11+  (:export))
    2.12+
    2.13+(in-package :arrow)
    2.14+
    2.15+(define-alien-loader "arrow" t "/usr/lib/")
    2.16+
    2.17+(define-alien-type arrow-release-function (function void (* (struct nil))))
    2.18+
    2.19+(define-alien-type arrow-schema
    2.20+  (struct arrow-schema
    2.21+          (format c-string)
    2.22+          (name c-string)
    2.23+          (metadata c-string)
    2.24+          (flags long)
    2.25+          (n-children long)
    2.26+          (children (array (* (struct arrow-schema))))
    2.27+          (dictionary (* (struct arrow-schema)))
    2.28+          (release (* arrow-release-function))
    2.29+          (private-data (* t))))
    2.30+
    2.31+
    2.32+(define-alien-type arrow-array
    2.33+  (struct arrow-array
    2.34+          (length long)
    2.35+          (null-count long)
    2.36+          (offset long)
    2.37+          (n-buffers long)
    2.38+          (n-children long)
    2.39+          (buffers (array (* t)))
    2.40+          (children (array (* (struct arrow-array))))
    2.41+          (dictionary (* (struct arrow-array)))
    2.42+          (release (* arrow-release-function))
    2.43+          (private-data (* t))))
     3.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2+++ b/lisp/ffi/arrow/tests.lisp	Thu Jul 11 21:40:29 2024 -0400
     3.3@@ -0,0 +1,12 @@
     3.4+;;; tests.lisp --- Apache Arrow FFI Tests
     3.5+
     3.6+;; 
     3.7+
     3.8+;;; Code:
     3.9+(defpackage :arrow/tests
    3.10+  (:use :cl :std :sb-alien :rt :arrow))
    3.11+(in-package :arrow/tests)
    3.12+(defsuite :arrow)
    3.13+(in-suite :arrow)
    3.14+(load-arrow)
    3.15+(deftest sanity ())
     4.1--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2+++ b/lisp/lib/dat/parquet.lisp	Thu Jul 11 21:40:29 2024 -0400
     4.3@@ -0,0 +1,119 @@
     4.4+;;; parquet.lisp --- Apache Parquet
     4.5+
     4.6+;; Common Lisp implementation of Apache Parquet
     4.7+
     4.8+;;; Commentary:
     4.9+
    4.10+#|
    4.11+https://github.com/apache/parquet-format
    4.12+https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift
    4.13+https://github.com/apache/parquet-testing
    4.14+https://github.com/apache/parquet-java
    4.15+https://github.com/apache/arrow-rs
    4.16+|#
    4.17+
    4.18+;; In this package we're being as lazy as possible. To generate our own
    4.19+;; encoder/decoder methods we depend on the file parquet.thrift in the
    4.20+;; parquet-format repo above. The core skelfile includes a script to download
    4.21+;; it and convert it to parquet.json (requires the thirft cli tool). We then
    4.22+;; decode it with DAT/JSON and visit all elements recursively, generating lisp
    4.23+;; code using pre-compiled macros.
    4.24+
    4.25+;; 
    4.26+;;; Code:
    4.27+(in-package :dat/parquet)
    4.28+
    4.29+(eval-always
    4.30+  (defparameter *default-parquet-json-file*
    4.31+    (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json")))
    4.32+  (defvar *parquet-json* nil)
    4.33+  (defun load-parquet-json (&optional (json-file *default-parquet-json-file*))
    4.34+    (with-open-file (file json-file)
    4.35+      (setq *parquet-json* (json-read file))))
    4.36+  (defun parquet-json-enums ()
    4.37+    (json-getf *parquet-json* "enums"))  
    4.38+
    4.39+  (defun parquet-json-enum-getf (name)
    4.40+    (json-getf
    4.41+     (find-if (lambda (x) (equal name (json-getf x "name"))) (parquet-json-enums))
    4.42+     "members"))
    4.43+
    4.44+  (defmacro def-parquet-enum (sym name)
    4.45+    `(progn
    4.46+       (defvar ,(symbolicate '*parquet- sym '*) nil)
    4.47+       (defun ,(symbolicate 'parquet-json- sym) ()
    4.48+         (mapcar (lambda (x) (json-getf x "name")) (parquet-json-enum-getf ,name)))))
    4.49+
    4.50+  (defvar *parquet-structs* nil)
    4.51+  (defstruct (parquet-struct
    4.52+              (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
    4.53+    name doc exceptionp unionp (fields nil :type list))
    4.54+  (defstruct (parquet-struct-field
    4.55+              (:constructor make-parquet-struct-field (key name type-id type doc required)))
    4.56+    key name type-id type doc required)
    4.57+  (defun parquet-json-structs () ;; name doc isException isUnion fields
    4.58+    (mapcar
    4.59+     (lambda (s)
    4.60+       (let ((name (json-getf s "name"))
    4.61+             (doc (json-getf s "doc"))
    4.62+             (exceptionp (json-getf s "isException"))
    4.63+             (unionp (json-getf s "isUnion"))
    4.64+             (fields (loop for f in (json-getf s "fields")
    4.65+                           collect
    4.66+                              (let ((key (json-getf f "key"))
    4.67+                                    (name (json-getf f "name"))
    4.68+                                    (type-id (json-getf f "typeId"))
    4.69+                                    ;; json object - needs additional parsing
    4.70+                                    (type (print (json-getf f "type")))
    4.71+                                    (doc (json-getf f "doc"))
    4.72+                                    (required (json-getf f "required")))
    4.73+                                (make-parquet-struct-field key name type-id type doc required)))))
    4.74+         (make-parquet-struct name doc exceptionp unionp fields) *parquet-structs*))
    4.75+     (json-getf *parquet-json* "structs")))
    4.76+
    4.77+  (defun parquet-json-namespaces ()
    4.78+    (json-getf *parquet-json* "namespaces"))
    4.79+
    4.80+  (defun init-parquet-json ()
    4.81+    (load-parquet-json)
    4.82+    (def-parquet-enum types "Type")
    4.83+    (def-parquet-enum converted-types "ConvertedType")
    4.84+    (def-parquet-enum field-repetition-types "FieldRepetitionType")
    4.85+    (def-parquet-enum encodings "Encoding")
    4.86+    (def-parquet-enum compression-codecs "CompressionCodec")
    4.87+    (def-parquet-enum page-types "PageType")
    4.88+    (def-parquet-enum boundary-orders "BoundaryOrder")
    4.89+    (setq *parquet-structs* (parquet-json-structs))))
    4.90+
    4.91+(eval-when (:compile-toplevel)
    4.92+  (init-parquet-json))
    4.93+
    4.94+(defclass parquet-object () ())
    4.95+
    4.96+(defmethod print-object ((obj parquet-object) stream)
    4.97+  "Output a Parquet object to a stream."
    4.98+  (print-unreadable-object (obj stream :type t)
    4.99+    (parquet-encode obj stream)))
   4.100+
   4.101+(defmacro define-parquet-class (name superclasses slots &rest options)
   4.102+  "Define a new subclass of PARQUET-OBJECT with NAME."
   4.103+  `(defclass ,name ,(push 'parquet-object superclasses) ,slots ,@options))
   4.104+
   4.105+(define-parquet-class logical-parquet-object () ())
   4.106+
   4.107+(defgeneric parquet-read (value &optional stream))
   4.108+(defgeneric parquet-write (value &optional stream))
   4.109+
   4.110+(defmethod parquet-write ((value (eql t)) &optional stream)
   4.111+  "Encode a parquet boolean true value."
   4.112+  (declare (ignore value))
   4.113+  (write-byte 1 stream))
   4.114+
   4.115+(defmethod parquet-write ((value (eql nil)) &optional stream)
   4.116+  "Encode a parquet boolean false value."
   4.117+  (declare (ignore value))
   4.118+  (write-byte 0 stream))
   4.119+
   4.120+(defun parquet-encode (value &optional stream)
   4.121+  "Encode a Lisp value and write it to a parquet stream."
   4.122+  (parquet-write value stream))