Mercurial > core / lisp/lib/dat/parquet/parquet.lisp
changeset 543: |
b88bd4b0a039 |
parent: |
10c4bb778030
|
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Sat, 13 Jul 2024 00:03:13 -0400 |
permissions: |
-rw-r--r-- |
description: |
tweaks |
1 ;;; parquet.lisp --- Apache Parquet 3 ;; Common Lisp implementation of Apache Parquet 8 https://github.com/apache/parquet-format 9 https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift 10 https://github.com/apache/parquet-testing 11 https://github.com/apache/parquet-java 12 https://github.com/apache/arrow-rs 14 https://thrift.apache.org/docs/types 18 4-byte magic number "PAR1" 33 4-byte length in bytes of file metadata (little endian) 34 4-byte magic number "PAR1" 37 ;; In this package we're being as lazy as possible. To generate our own 38 ;; encoder/decoder methods we depend on the file parquet.thrift in the 39 ;; parquet-format repo above. The core skelfile includes a script to download 40 ;; it and convert it to parquet.json (requires the thirft cli tool). We then 41 ;; decode it with DAT/JSON and generate lisp classes, and types. 45 (in-package :dat/parquet) 47 (dat/parquet/gen::load-parquet)) 49 (defgeneric parquet-read (value &optional stream)) 50 (defgeneric parquet-write (value &optional stream)) 53 (define-bitfield parquet-compression-codec 54 (uncompressed boolean) 64 (define-constant +parquet-magic-number+ "PAR1" :test 'equal) 66 (defconstant +default-parquet-page-size+ (* 8 1024)) ;; 8kb 67 (defconstant +default-parquet-row-group-size (expt 1024 3)) ;; 1gb 69 (defvar *parquet-creator* "parquet-cl version 0.1.0") 71 (defun parquet-write-magic (stream) 72 (write-string +parquet-magic-number+ stream)) 74 (defun parquet-read-magic (stream) 75 (assert (char= #.(char +parquet-magic-number+ 0) (read-char stream))) 76 (assert (char= #.(char +parquet-magic-number+ 1) (read-char stream))) 77 (assert (char= #.(char +parquet-magic-number+ 2) (read-char stream))) 78 (assert (char= #.(char +parquet-magic-number+ 3) (read-char stream))) 81 (defmethod parquet-write ((value (eql t)) &optional stream) 82 "Encode a parquet boolean true value." 83 (declare (ignore value)) 84 (write-byte 1 stream)) 86 (defmethod parquet-write ((value (eql nil)) &optional stream) 87 "Encode a parquet boolean false value." 88 (declare (ignore value)) 89 (write-byte 0 stream)) 91 (defmethod parquet-write ((value string) &optional stream)) 94 (defun parquet-encode (value &optional stream) 95 "Encode a Lisp value and write it to a parquet stream." 96 (parquet-write value stream)) 98 (defun parquet-decode (string &key (start 0) end) 99 "Convert a PARQUET string into a Lisp object." 100 (with-input-from-string (stream string :start start :end end) 101 (values (parquet-read stream) 102 (file-position stream))))