changelog shortlog graph tags branches changeset files file revisions raw help

Mercurial > core / annotate lisp/lib/dat/parquet/gen.lisp

changeset 640: 642b3b82b20d
parent: b88bf15f60d0
author: Richard Westhaver <ellis@rwest.io>
date: Sun, 08 Sep 2024 17:35:03 -0400
permissions: -rw-r--r--
description: thrift fixes, org-get-with-inheritance init
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1
 ;;; gen.lisp --- Parquet Lisp Code Generator
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3
 ;; 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5
 ;;; Code:
544
ec1d4d544c36 parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents: 543
diff changeset
6
 (defpackage :dat/parquet/gen ;; not public API
ec1d4d544c36 parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents: 543
diff changeset
7
   (:use :cl :std :dat/proto :dat/json)
ec1d4d544c36 parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents: 543
diff changeset
8
   (:export :load-parquet))
ec1d4d544c36 parquet expansion, init leb128, add little-endian octet encoders
Richard Westhaver <ellis@rwest.io>
parents: 543
diff changeset
9
 
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
10
 (in-package :dat/parquet/gen)
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
11
 
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
12
 (defparameter *parquet-json-file*
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
13
   (or (probe-file #.(asdf:system-relative-pathname :prelude #P"../.stash/parquet.json"))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
14
       (warn "*PARQUET-JSON-FILE* not found")))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
15
 
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
16
 (defparameter *parquet-output-file*
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
17
   #.(asdf:system-relative-pathname :dat #P"parquet/thrift.lisp"))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
18
 
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
19
 (defvar *parquet-json* nil)
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
20
 
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
21
 (eval-always
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
22
   (defun %parquet-json-enums ()
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
23
     (json-getf *parquet-json* "enums"))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
24
 
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
25
   (defun dat/parquet::parquet-json-enum-getf (name)
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
26
     (json-getf
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
27
      (find-if (lambda (x) (equal name (json-getf x "name"))) (%parquet-json-enums))
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
28
      "members"))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
29
 
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
30
   (defun dat/parquet::snakecase-name-to-lisp-name (string)
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
31
     (string-upcase
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
32
      (substitute #\- #\_ string)))
637
b88bf15f60d0 parquet tweaks, import ox-man
Richard Westhaver <ellis@rwest.io>
parents: 635
diff changeset
33
 
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
34
   (defun dat/parquet::camelcase-name-to-lisp-name (string)
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
35
     (string-upcase
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
36
      (with-output-to-string (name)
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
37
        (loop for i from 0 below (length string)
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
38
              for c across string
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
39
              when (and (upper-case-p c) (not (zerop i)))
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
40
                do (write-char #\- name)
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
41
              do (write-char c name))))))
637
b88bf15f60d0 parquet tweaks, import ox-man
Richard Westhaver <ellis@rwest.io>
parents: 635
diff changeset
42
 
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
43
 (defvar *parquet-enums* nil)
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
44
 
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
45
 (defmacro define-parquet-enum (sym name)
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
46
   `(progn
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
47
      (defvar ,(symbolicate "*PARQUET-JSON-" sym "*")
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
48
        ',(mapcar (lambda (x) (keywordicate (dat/parquet::snakecase-name-to-lisp-name (json-getf x "name"))))
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
49
                  (dat/parquet::parquet-json-enum-getf name)))))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
50
 
541
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
51
 (labels ((parse-type-id (type-id)
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
52
            (when type-id
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
53
              (string-case (type-id :default nil)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
54
                ("bool" 'boolean)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
55
                ("byte" 'signed-byte)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
56
                ("i16" '(signed-byte 16))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
57
                ("i32" '(signed-byte 32))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
58
                ("i64" '(signed-byte 64))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
59
                ("double" 'double-float)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
60
                ("string" 'string)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
61
                ("list" 'vector)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
62
                ("binary" 'octet-vector)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
63
                ("set" 'vector)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
64
                ("enum" '(signed-byte 32))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
65
                ("union" 'union)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
66
                ("struct" 'struct))))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
67
          (%intern (name)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
68
            (if (stringp name)
637
b88bf15f60d0 parquet tweaks, import ox-man
Richard Westhaver <ellis@rwest.io>
parents: 635
diff changeset
69
                (symbolicate
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
70
                 (cond 
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
71
                   ((equal name "UUIDType") "PARQUET-UUID-TYPE")
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
72
                   (t (concatenate 'string
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
73
                                   "PARQUET-"
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
74
                                   (dat/parquet::camelcase-name-to-lisp-name name)))))
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
75
                name))
541
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
76
          (parse-type (o)
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
77
            (when o
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
78
              (string-case ((json-getf o "typeId"))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
79
                ("union" (%intern (json-getf o "class")))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
80
                ("list"
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
81
                 (if-let ((elt (json-getf o "elemType" nil)))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
82
                   (%intern (parse-type elt))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
83
                   (parse-type-id (json-getf o "elemTypeId"))))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
84
                ("set"
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
85
                 (if-let ((elt (json-getf o "elemType" nil)))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
86
                   (%intern (parse-type elt))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
87
                   (parse-type-id (json-getf o "elemTypeId"))))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
88
                ("struct" (%intern (json-getf o "class")))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
89
                ("enum" (%intern (json-getf o "class")))))))
541
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
90
   (defun convert-parquet-struct-field-type (field) ;; technically part of thrift type system
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
91
     (let* ((type-id (parse-type-id (parquet-struct-field-type-id field)))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
92
            (type (parse-type (parquet-struct-field-type field)))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
93
            (required (parquet-struct-field-required field)))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
94
           (let ((ret (cond
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
95
                        ((eql 'vector type-id) `(vector ,type))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
96
                        (t (or type type-id)))))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
97
             (if (equal "optional" required)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
98
                 `(or null ,ret)
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
99
                 ret)))))
541
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
100
 
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
101
 (defparameter *parquet-structs* nil)
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
102
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
103
 (defstruct (parquet-struct
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
104
             (:constructor make-parquet-struct (name doc exceptionp unionp fields)))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
105
   name doc exceptionp unionp (fields nil :type list))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
106
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
107
 (defstruct (parquet-struct-field
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
108
             (:constructor make-parquet-struct-field (key name type-id type doc required)))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
109
   key name type-id type doc required)
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
110
 
541
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
111
 (defun parquet-json-structs () ;; name doc isException isUnion fields
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
112
   (mapcar
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
113
    (lambda (s)
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
114
      (let ((name (json-getf s "name"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
115
            (doc (json-getf s "doc"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
116
            (exceptionp (json-getf s "isException"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
117
            (unionp (json-getf s "isUnion"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
118
            (fields (loop for f in (json-getf s "fields")
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
119
                          collect
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
120
                             (let ((key (json-getf f "key"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
121
                                   (name (json-getf f "name"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
122
                                   (type-id (json-getf f "typeId"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
123
                                   ;; json object - needs additional parsing
541
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
124
                                   (type (json-getf f "type"))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
125
                                   (doc (json-getf f "doc"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
126
                                   (required (json-getf f "required")))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
127
                               (make-parquet-struct-field key name type-id type doc required)))))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
128
        (make-parquet-struct name doc exceptionp unionp fields)))
541
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
129
    (json-getf *parquet-json* "structs")))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
130
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
131
 (defun parquet-json-namespaces ()
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
132
   (json-getf *parquet-json* "namespaces"))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
133
 
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
134
 (eval-always
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
135
   (defun init-parquet-json (&optional (file *parquet-json-file*))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
136
     (with-open-file (file file)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
137
       (setq *parquet-json* (json-read file)))
637
b88bf15f60d0 parquet tweaks, import ox-man
Richard Westhaver <ellis@rwest.io>
parents: 635
diff changeset
138
     (setq *parquet-enums* (%parquet-json-enums))
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
139
     (setq *parquet-structs* (parquet-json-structs))))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
140
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
141
 ;;; CLOS
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
142
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
143
 ;; (defmethod print-object ((obj parquet-object) stream)
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
144
 ;;   "Output a Parquet object to a stream."
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
145
 ;;   (print-unreadable-object (obj stream :type t)))
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
146
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
147
 (defmacro define-parquet-class (name superclasses slots &rest options)
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
148
   "Define a new subclass of PARQUET-OBJECT with NAME."
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
149
   `(defclass ,name ,@(if-let ((s superclasses)) (list s) `((dat/parquet::parquet-object))) ,slots ,@options))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
150
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
151
 ;;; Codegen
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
152
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
153
 ;; 8)
541
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
154
 (eval-always
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
155
   (defun %define-parquet-structs ()
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
156
     "Define all known values in *PARQUET-STRUCTS* using DEFINE-PARQUET-CLASS (DEFCLASS)."
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
157
     (loop for struct in *parquet-structs*
10c4bb778030 bit of parquet refactoring, properly generate slot types
Richard Westhaver <ellis@rwest.io>
parents: 540
diff changeset
158
           unless (null struct)
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
159
           collect (let* ((name (parquet-struct-name struct))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
160
                          (doc (parquet-struct-doc struct))
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
161
                          (fields (parquet-struct-fields struct))
637
b88bf15f60d0 parquet tweaks, import ox-man
Richard Westhaver <ellis@rwest.io>
parents: 635
diff changeset
162
                          (class-name (symbolicate (cond
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
163
                                                ((equal name "UUIDType") "PARQUET-UUID-TYPE")
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
164
                                                (t (concatenate 'string
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
165
                                                                "PARQUET-"
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
166
                                                                (dat/parquet::camelcase-name-to-lisp-name name)))))))
550
4d34907c69eb more work on tcompact/thrift, fixed type info in parquet-struct-objects
Richard Westhaver <ellis@rwest.io>
parents: 549
diff changeset
167
                     `(progn
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
168
                        (defclass ,class-name (dat/parquet::parquet-object)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
169
                                  (,@(mapcar (lambda (f)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
170
                                               (let ((fdoc (parquet-struct-field-doc f))
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
171
                                                     (fname (dat/parquet::snakecase-name-to-lisp-name
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
172
                                                             (parquet-struct-field-name f))))
637
b88bf15f60d0 parquet tweaks, import ox-man
Richard Westhaver <ellis@rwest.io>
parents: 635
diff changeset
173
                                                 `(,(symbolicate fname)
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
174
                                                   ,@(when fdoc `(:documentation ,fdoc))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
175
                                                   :initarg ,(keywordicate fname)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
176
                                                   ;; TODO 2024-07-12: 
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
177
                                                   ,@(when (equal "optional" (parquet-struct-field-required f))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
178
                                                       `(:initform nil))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
179
                                                   ,@(when-let ((ty (convert-parquet-struct-field-type f)))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
180
                                                       `(:type ,ty)))))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
181
                                             fields))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
182
                                  ,@(when doc `((:documentation ,doc)))))))))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
183
 
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
184
 (defmacro define-parquet-type (name opts &body body)
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
185
   "Define a parquet type with DEFTYPE which maps to LISP-TYPE."
640
642b3b82b20d thrift fixes, org-get-with-inheritance init
Richard Westhaver <ellis@rwest.io>
parents: 637
diff changeset
186
   `(progn (deftype ,(symbolicate "PARQUET-" (substitute #\- #\_ name)) ,opts ,@body)))
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
187
 
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
188
 (defun parse-parquet-thrift-definitions (&key (input *parquet-json-file*)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
189
                                            (output #.(asdf:system-relative-pathname :dat "parquet/thrift.lisp")))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
190
   (init-parquet-json input)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
191
   (with-open-file (defs output :direction :output :if-exists :supersede :if-does-not-exist :create)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
192
     (format defs ";;; ~a --- Parquet Thrift Definitions -*- buffer-read-only:t -*-
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
193
 
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
194
 ;; input = ~a
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
195
 
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
196
 ;; This file was generated automatically by
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
197
 ;; DAT/PARQUET/GEN:PARSE-PARQUET-THRIFT-DEFINITIONS
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
198
 
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
199
 ;; Do not modify.
540
bd49b7e2c623 more parquettiquette
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
200
 
635
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
201
 ;;; Code:
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
202
 (in-package :dat/parquet)" output input)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
203
     (format defs "~2%")
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
204
     (let ((enums   '((define-parquet-enum types "Type")
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
205
                      (define-parquet-enum converted-types "ConvertedType")
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
206
                      (define-parquet-enum field-repetition-types "FieldRepetitionType")
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
207
                      (define-parquet-enum encodings "Encoding")
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
208
                      (define-parquet-enum compression-codecs "CompressionCodec")
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
209
                      (define-parquet-enum page-types "PageType")
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
210
                      (define-parquet-enum boundary-orders "BoundaryOrder")))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
211
           (types '((define-parquet-type "BOOLEAN" () 'boolean)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
212
                    (define-parquet-type "INT32" () '(signed-byte 32))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
213
                    (define-parquet-type "INT64" () '(signed-byte 64))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
214
                    (define-parquet-type "INT96" () '(signed-byte 96))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
215
                    (define-parquet-type "FLOAT" () 'float)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
216
                    (define-parquet-type "DOUBLE" () 'double-float)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
217
                    (define-parquet-type "BYTE_ARRAY" (&optional size) `(octet-vector ,size))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
218
                    (define-parquet-type "FIXED_LEN_BYTE_ARRAY" (size) `(octet-vector ,size))))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
219
           (structs (mapcar #'macroexpand-1 (%define-parquet-structs))))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
220
       ;; expands to a progn, so we just take the cdr
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
221
       (dolist (en enums)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
222
         (dolist (f (cdr (macroexpand en)))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
223
           (write f :stream defs :case :downcase :readably t)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
224
           (terpri defs)))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
225
       (dolist (ty types)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
226
         (dolist (f (cdr (macroexpand ty)))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
227
           (write f :stream defs :case :downcase :readably t)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
228
           (terpri defs)))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
229
       (dolist (st structs)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
230
         (dolist (f (cdr (macroexpand st)))
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
231
           (write f :stream defs :case :downcase :readably t)
849f72b72b41 add back fuzz.lisp and proper codegen for parquet.json thrift definitions
Richard Westhaver <ellis@rwest.io>
parents: 550
diff changeset
232
           (terpri defs))))))