Mercurial > core / lisp/bench/tpc-h.lisp
changeset 698: |
96958d3eb5b0 |
parent: |
38e9c3be2392
|
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Fri, 04 Oct 2024 22:04:59 -0400 |
permissions: |
-rw-r--r-- |
description: |
fixes |
1 ;;; tpc-h.lisp --- TPC-H Benchmark Suite 3 ;; This package contains an implementation of the TPC-H benchmark. 7 ;; ref: https://www.tpc.org/tpc_documents_current_versions/pdf/tpc-h_v2.17.1.pdf 9 ;; The TPC-H dbgen source is out there somewhere. It generates ASCII 10 ;; pipe-delimited output and it seems pretty common to roll your own 11 ;; implementation. For full compliance we are supposed to generate output that 12 ;; is EXACTLY the same as the output as the original tool, but in practice we 13 ;; may skip this and ingest data directly into the database. We'll see. For 14 ;; now we aspire to generate ASCII. 17 (defpackage :core/bench/tpc-h 18 (:nicknames :bench/tpc-h :tpc-h) 19 (:import-from :obj/query :make-field) 20 (:import-from :obj/time :date) 21 (:use :cl :std :rt :rt/bench :rt/cover :log :sql :parse/pratt :dat/csv :dat/proto :obj/query)) 23 (in-package :core/bench/tpc-h) 27 (declaim (pathname *tpc-h-data-directory*)) 28 (defvar *tpc-h-data-directory* (ensure-directories-exist #p"/tmp/tpc-h/"))) 30 (defclass tpc-h-schema (schema) ()) 32 (defgeneric apply-schema (self object) 33 (:method ((self tpc-h-schema) (object t)) 34 (let ((flen (length (fields self))) 35 (olen (length object))) 37 (error 'invalid-argument :reason "Field count doesn't match length of object" :item object))))) 39 (defgeneric gen-table (self count)) 41 (defun random-id32 () (octets-to-integer (random-bytes 4))) 42 (defun random-id64 () (octets-to-integer (random-bytes 8))) 43 (defun random-string (&optional (n 25)) (random-chars n)) 44 (defun random-date () (obj/time:today)) 46 (defun random-double () ;; [0,10000) 47 (coerce (* (random 100.0) 100) 'double-float)) 49 (defun make-random-value (type) 51 ((equal '(unsigned-byte 32) type) (random-id32)) 52 ((equal '(unsigned-byte 64) type) (random-id64)) 53 ((eql 'double-float type) (random-double)) 54 ((eql 'date type) (random-date)) 55 ((eql 'character type) (random-char)) 56 ((and (consp type) (eql (car type) 'string)) (random-string (cdr type))) 57 (t (error 'invalid-argument :reason "Invalid TPC-H type designator" :item type)))) 60 (defun parse-tpc-h-fields (fields) 61 (let ((ret) (keys) (val-forms)) 62 (sb-int:doplist (k v) fields 63 (push (make-field :name (string-downcase (symbol-name k)) :type v) 65 (push (symbolicate k) keys) 67 (push `(make-random-value ,v) val-forms)) 68 (values (coerce (nreverse ret) '(vector field)) (nreverse keys) (nreverse val-forms))))) 70 (defmacro def-table (name &rest fields) 71 "Define a new TPC-H table." 73 (let ((path (merge-pathnames 74 (make-pathname :name (string-downcase (symbol-name name)) 77 *tpc-h-data-directory*)) 78 (schema-class (symbolicate name '-table-schema)) 79 (write-tbl-fn (symbolicate 'write- name '-table)) 80 (write-row-fn (symbolicate 'write- name '-row)) 81 (make-batch-fn (symbolicate 'make- name '-table-batch)) 82 (path-var (symbolicate '* name '-table-path*)) 83 (read-tbl-fn (symbolicate 'read- name '-table))) 84 (multiple-value-bind (field-vec keys val-forms) (parse-tpc-h-fields fields) 86 (defclass ,schema-class (tpc-h-schema) () 89 (defmethod apply-tpc-h-schema ((self ,schema-class) (object t)) 90 (let ((flen (length (fields self))) 91 (olen (length object))) 93 (error 'invalid-argument :reason "Field count doesn't match length of object" :item object)))) 94 (defmethod gen-table ((self (eql ,(keywordicate name))) (count fixnum)) 95 (declare (ignore self)) 96 (loop for i below count 99 (defun ,write-tbl-fn (,data) 100 (apply-tpc-h-schema (make-instance ',schema-class) ,data) 101 (write-csv-file ,path ,data 103 (defun ,write-row-fn (&key ,@keys) 104 (let ((,data (vector ,@keys))) 105 (apply-tpc-h-schema (make-instance ',schema-class) ,data) 106 (with-open-file (file ,path :direction :output :if-exists :append :if-does-not-exist :create) 107 (write-csv-stream file (vector ,data) :delimiter #\|)))) 108 (defun ,make-batch-fn (,data) 109 (let ((schema (make-instance ',schema-class))) 110 (apply-tpc-h-schema schema ,data) 111 (make-record-batch :schema schema :fields ,data))) 112 (defparameter ,path-var ,path) 113 (defun ,read-tbl-fn () 114 (read-csv-file ,path :delimiter #\| :header nil))))))) 118 :nationkey '(unsigned-byte 32) 120 :regionkey '(unsigned-byte 32) 121 :comment '(string 152)) 125 :regionkey '(unsigned-byte 32) 127 :comment '(string 152)) 131 :partkey '(unsigned-byte 64) 136 :size '(unsigned-byte 32) 137 :container '(string 10) 138 :retailprice 'double-float 139 :comment '(string 23)) 143 :suppkey '(unsigned-byte 64) 145 :address '(string 40) 146 :nationkey '(unsigned-byte 32) 148 :acctbal 'double-float 149 :comment '(string 101)) 153 :partkey '(unsigned-byte 64) 154 :suppkey '(unsigned-byte 64) 155 :availqty '(unsigned-byte 64) 156 :supplycost 'double-float 157 :comment '(string 199)) 161 :custkey '(unsigned-byte 64) 163 :address '(string 40) 164 :nationkey '(unsigned-byte 32) 166 :acctbal 'double-float 167 :mktsegment '(string 10) 168 :comment '(string 117)) 172 :orderkey '(unsigned-byte 64) 173 :custkey '(unsigned-byte 64) 174 :orderstatus 'character 175 :totalprice 'double-float 177 :orderpriority '(string 15) 179 :shippriority '(unsigned-byte 32) 180 :comment '(string 79)) 184 :orderkey '(unsigned-byte 64) 185 :partkey '(unsigned-byte 64) 186 :suppkey '(unsigned-byte 64) 187 :linenumber '(unsigned-byte 64) 188 :quantity 'double-float 189 :extendedprice 'double-float 190 :discount 'double-float 192 :returnflag 'character 193 :linestatus 'character 196 :shipinstruct '(string 25) 197 :shipmode '(string 10) 198 :comment '(string 44)) 200 (defconstant +tpc-h-region-count+ 5) 201 (defconstant +tpc-h-nation-count+ 25) 203 (defun dbgen-thread () 206 (std/thread:print-top-level (format nil "finished: ~A~%" x)))) 208 (defun dbgen (&optional (scale-factor 1)) ;; ~= 2.4G, 200s 209 "Generate the TPC-H database in standardized format (|-delim ASCII). Files are 210 written with a .tbl extension to *TPC-H-DATA-DIRECTORY*." 211 (let ((region-count +tpc-h-region-count+) 212 (nation-count +tpc-h-nation-count+) 213 (part-count (* scale-factor 200000)) 214 (supplier-count (* scale-factor 10000)) 215 (partsupp-count (* scale-factor 800000)) 216 (customer-count (* scale-factor 150000)) 217 (lineitem-count (* scale-factor 6000000)) 218 (order-count (* scale-factor 1500000))) 219 (info! "Generating new TPC-H database:" *tpc-h-data-directory*) 220 (debug! (format nil "scale-factor=~A~%" scale-factor)) 223 (loop for args in `((:region ,region-count) 224 (:nation ,nation-count) 226 (:supplier ,supplier-count) 227 (:partsupp ,partsupp-count) 228 (:customer ,customer-count) 229 (:lineitem ,lineitem-count) 230 (:orders ,order-count)) 231 collect (make-thread (dbgen-thread) :name (string-downcase (symbol-name (car args))) 232 :arguments args)))))) 235 ;; (length (read-orders-table)) 236 ;; (make-region-table-batch #(1 2 3)) 237 ;; (write-region-row :regionkey 0 :name "USA" :comment "OORAH") 238 ;; (gen-table :orders 100000) 240 ;; (deftest dbgen (:profile t :bench t #+nil :args #+nil (&optional (scale 1))) (dbgen))