changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/bench/tpc-h.lisp

changeset 698: 96958d3eb5b0
parent: 38e9c3be2392
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; tpc-h.lisp --- TPC-H Benchmark Suite
2 
3 ;; This package contains an implementation of the TPC-H benchmark.
4 
5 ;;; Commentary:
6 
7 ;; ref: https://www.tpc.org/tpc_documents_current_versions/pdf/tpc-h_v2.17.1.pdf
8 
9 ;; The TPC-H dbgen source is out there somewhere. It generates ASCII
10 ;; pipe-delimited output and it seems pretty common to roll your own
11 ;; implementation. For full compliance we are supposed to generate output that
12 ;; is EXACTLY the same as the output as the original tool, but in practice we
13 ;; may skip this and ingest data directly into the database. We'll see. For
14 ;; now we aspire to generate ASCII.
15 
16 ;;; Code:
17 (defpackage :core/bench/tpc-h
18  (:nicknames :bench/tpc-h :tpc-h)
19  (:import-from :obj/query :make-field)
20  (:import-from :obj/time :date)
21  (:use :cl :std :rt :rt/bench :rt/cover :log :sql :parse/pratt :dat/csv :dat/proto :obj/query))
22 
23 (in-package :core/bench/tpc-h)
24 (defsuite :tpc-h)
25 (in-suite :tpc-h)
26 (eval-always
27  (declaim (pathname *tpc-h-data-directory*))
28  (defvar *tpc-h-data-directory* (ensure-directories-exist #p"/tmp/tpc-h/")))
29 
30 (defclass tpc-h-schema (schema) ())
31 
32 (defgeneric apply-schema (self object)
33  (:method ((self tpc-h-schema) (object t))
34  (let ((flen (length (fields self)))
35  (olen (length object)))
36  (unless (= flen olen)
37  (error 'invalid-argument :reason "Field count doesn't match length of object" :item object)))))
38 
39 (defgeneric gen-table (self count))
40 
41 (defun random-id32 () (octets-to-integer (random-bytes 4)))
42 (defun random-id64 () (octets-to-integer (random-bytes 8)))
43 (defun random-string (&optional (n 25)) (random-chars n))
44 (defun random-date () (obj/time:today))
45 
46 (defun random-double () ;; [0,10000)
47  (coerce (* (random 100.0) 100) 'double-float))
48 
49 (defun make-random-value (type)
50  (cond
51  ((equal '(unsigned-byte 32) type) (random-id32))
52  ((equal '(unsigned-byte 64) type) (random-id64))
53  ((eql 'double-float type) (random-double))
54  ((eql 'date type) (random-date))
55  ((eql 'character type) (random-char))
56  ((and (consp type) (eql (car type) 'string)) (random-string (cdr type)))
57  (t (error 'invalid-argument :reason "Invalid TPC-H type designator" :item type))))
58 
59 (eval-always
60  (defun parse-tpc-h-fields (fields)
61  (let ((ret) (keys) (val-forms))
62  (sb-int:doplist (k v) fields
63  (push (make-field :name (string-downcase (symbol-name k)) :type v)
64  ret)
65  (push (symbolicate k) keys)
66  (push k val-forms)
67  (push `(make-random-value ,v) val-forms))
68  (values (coerce (nreverse ret) '(vector field)) (nreverse keys) (nreverse val-forms)))))
69 
70 (defmacro def-table (name &rest fields)
71  "Define a new TPC-H table."
72  (with-gensyms (data)
73  (let ((path (merge-pathnames
74  (make-pathname :name (string-downcase (symbol-name name))
75  :type "tbl"
76  :directory nil)
77  *tpc-h-data-directory*))
78  (schema-class (symbolicate name '-table-schema))
79  (write-tbl-fn (symbolicate 'write- name '-table))
80  (write-row-fn (symbolicate 'write- name '-row))
81  (make-batch-fn (symbolicate 'make- name '-table-batch))
82  (path-var (symbolicate '* name '-table-path*))
83  (read-tbl-fn (symbolicate 'read- name '-table)))
84  (multiple-value-bind (field-vec keys val-forms) (parse-tpc-h-fields fields)
85  `(progn
86  (defclass ,schema-class (tpc-h-schema) ()
87  (:default-initargs
88  :fields ,field-vec))
89  (defmethod apply-tpc-h-schema ((self ,schema-class) (object t))
90  (let ((flen (length (fields self)))
91  (olen (length object)))
92  (unless (= flen olen)
93  (error 'invalid-argument :reason "Field count doesn't match length of object" :item object))))
94  (defmethod gen-table ((self (eql ,(keywordicate name))) (count fixnum))
95  (declare (ignore self))
96  (loop for i below count
97  do (,write-row-fn
98  ,@val-forms)))
99  (defun ,write-tbl-fn (,data)
100  (apply-tpc-h-schema (make-instance ',schema-class) ,data)
101  (write-csv-file ,path ,data
102  :delimiter #\|))
103  (defun ,write-row-fn (&key ,@keys)
104  (let ((,data (vector ,@keys)))
105  (apply-tpc-h-schema (make-instance ',schema-class) ,data)
106  (with-open-file (file ,path :direction :output :if-exists :append :if-does-not-exist :create)
107  (write-csv-stream file (vector ,data) :delimiter #\|))))
108  (defun ,make-batch-fn (,data)
109  (let ((schema (make-instance ',schema-class)))
110  (apply-tpc-h-schema schema ,data)
111  (make-record-batch :schema schema :fields ,data)))
112  (defparameter ,path-var ,path)
113  (defun ,read-tbl-fn ()
114  (read-csv-file ,path :delimiter #\| :header nil)))))))
115 
116 ;; nation
117 (def-table nation
118  :nationkey '(unsigned-byte 32)
119  :name '(string 25)
120  :regionkey '(unsigned-byte 32)
121  :comment '(string 152))
122 
123 ;; region
124 (def-table region
125  :regionkey '(unsigned-byte 32)
126  :name '(string 25)
127  :comment '(string 152))
128 
129 ;; part
130 (def-table part
131  :partkey '(unsigned-byte 64)
132  :name '(string 55)
133  :mfgr '(string 25)
134  :brand '(string 10)
135  :type '(string 25)
136  :size '(unsigned-byte 32)
137  :container '(string 10)
138  :retailprice 'double-float
139  :comment '(string 23))
140 
141 ;; supplier
142 (def-table supplier
143  :suppkey '(unsigned-byte 64)
144  :name '(string 25)
145  :address '(string 40)
146  :nationkey '(unsigned-byte 32)
147  :phone '(string 15)
148  :acctbal 'double-float
149  :comment '(string 101))
150 
151 ;; partsupp
152 (def-table partsupp
153  :partkey '(unsigned-byte 64)
154  :suppkey '(unsigned-byte 64)
155  :availqty '(unsigned-byte 64)
156  :supplycost 'double-float
157  :comment '(string 199))
158 
159 ;; customer
160 (def-table customer
161  :custkey '(unsigned-byte 64)
162  :name '(string 25)
163  :address '(string 40)
164  :nationkey '(unsigned-byte 32)
165  :phone '(string 15)
166  :acctbal 'double-float
167  :mktsegment '(string 10)
168  :comment '(string 117))
169 
170 ;; orders
171 (def-table orders
172  :orderkey '(unsigned-byte 64)
173  :custkey '(unsigned-byte 64)
174  :orderstatus 'character
175  :totalprice 'double-float
176  :orderdate 'date
177  :orderpriority '(string 15)
178  :clerk '(string 15)
179  :shippriority '(unsigned-byte 32)
180  :comment '(string 79))
181 
182 ;; lineitem
183 (def-table lineitem
184  :orderkey '(unsigned-byte 64)
185  :partkey '(unsigned-byte 64)
186  :suppkey '(unsigned-byte 64)
187  :linenumber '(unsigned-byte 64)
188  :quantity 'double-float
189  :extendedprice 'double-float
190  :discount 'double-float
191  :tax 'double-float
192  :returnflag 'character
193  :linestatus 'character
194  :shipdate 'date
195  :receiptdate 'date
196  :shipinstruct '(string 25)
197  :shipmode '(string 10)
198  :comment '(string 44))
199 
200 (defconstant +tpc-h-region-count+ 5)
201 (defconstant +tpc-h-nation-count+ 25)
202 
203 (defun dbgen-thread ()
204  (lambda (x y)
205  (gen-table x y)
206  (std/thread:print-top-level (format nil "finished: ~A~%" x))))
207 
208 (defun dbgen (&optional (scale-factor 1)) ;; ~= 2.4G, 200s
209  "Generate the TPC-H database in standardized format (|-delim ASCII). Files are
210 written with a .tbl extension to *TPC-H-DATA-DIRECTORY*."
211  (let ((region-count +tpc-h-region-count+)
212  (nation-count +tpc-h-nation-count+)
213  (part-count (* scale-factor 200000))
214  (supplier-count (* scale-factor 10000))
215  (partsupp-count (* scale-factor 800000))
216  (customer-count (* scale-factor 150000))
217  (lineitem-count (* scale-factor 6000000))
218  (order-count (* scale-factor 1500000)))
219  (info! "Generating new TPC-H database:" *tpc-h-data-directory*)
220  (debug! (format nil "scale-factor=~A~%" scale-factor))
221  (assert
222  (wait-for-threads
223  (loop for args in `((:region ,region-count)
224  (:nation ,nation-count)
225  (:part ,part-count)
226  (:supplier ,supplier-count)
227  (:partsupp ,partsupp-count)
228  (:customer ,customer-count)
229  (:lineitem ,lineitem-count)
230  (:orders ,order-count))
231  collect (make-thread (dbgen-thread) :name (string-downcase (symbol-name (car args)))
232  :arguments args))))))
233 
234 
235 ;; (length (read-orders-table))
236 ;; (make-region-table-batch #(1 2 3))
237 ;; (write-region-row :regionkey 0 :name "USA" :comment "OORAH")
238 ;; (gen-table :orders 100000)
239 
240 ;; (deftest dbgen (:profile t :bench t #+nil :args #+nil (&optional (scale 1))) (dbgen))