# HG changeset patch # User Richard Westhaver # Date 1723079383 14400 # Node ID 568c39371122b58d00f5b3b0f89af702acdb8219 # Parent d3e2829521a31c99371da66b7fd6ffd50370d01f sql updates, fuzzz diff -r d3e2829521a3 -r 568c39371122 lisp/lib/obj/pkg.lisp --- a/lisp/lib/obj/pkg.lisp Tue Aug 06 21:17:24 2024 -0400 +++ b/lisp/lib/obj/pkg.lisp Wed Aug 07 21:09:43 2024 -0400 @@ -461,7 +461,8 @@ :projection-pushdown-optimizer :extract-columns* :extract-columns - :query-vop)) + :query-vop + :expr)) (defpackage :obj/secret (:nicknames :secret) diff -r d3e2829521a3 -r 568c39371122 lisp/lib/obj/query.lisp --- a/lisp/lib/obj/query.lisp Tue Aug 06 21:17:24 2024 -0400 +++ b/lisp/lib/obj/query.lisp Wed Aug 07 21:09:43 2024 -0400 @@ -101,7 +101,10 @@ ;;; Record Batch (defstruct record-batch (schema (make-schema) :type schema) - (fields #() :type field-vector)) + (fields #() :type column-vector)) + +(defmethod schema ((self record-batch)) + (record-batch-schema self)) (defmethod make-load-form ((self record-batch) &optional env) (declare (ignore env)) @@ -328,7 +331,8 @@ (defgeneric aggregate-expression-p (self) (:method ((self aggregate-expression)) t) - (:method ((self alias-expression)) (aggregate-expression-p (expr self)))) + (:method ((self alias-expression)) (aggregate-expression-p (expr self))) + (:method ((self t)) nil)) (defmethod to-field ((self aggregate-expression) (input logical-plan)) (declare (ignorable input)) @@ -479,6 +483,7 @@ (plan (make-instance 'logical-plan) :type logical-plan)) (defgeneric df-col (self)) + (defgeneric df-project (df exprs) (:method ((df data-frame) (expr list)) (df-project df (coerce expr 'vector))) @@ -505,7 +510,9 @@ (:method ((df data-frame) (group-by list) (agg-expr list)) (df-aggregate df (coerce group-by 'vector) (coerce agg-expr 'vector)))) -(defgeneric make-df (&rest initargs &key &allow-other-keys)) +(defgeneric make-df (self &key &allow-other-keys) + (:method ((self null) &key) + (make-data-frame))) (defmethod schema ((df data-frame)) (schema (data-frame-plan df))) diff -r d3e2829521a3 -r 568c39371122 lisp/lib/q/sql.lisp --- a/lisp/lib/q/sql.lisp Tue Aug 06 21:17:24 2024 -0400 +++ b/lisp/lib/q/sql.lisp Wed Aug 07 21:09:43 2024 -0400 @@ -43,6 +43,8 @@ (defclass sql-data-source (data-source) () (:documentation "Data source which can be used within SQL expressions.")) +;; SQL-EXPRESSIONs are the output of a SQL-PARSER. These objects are further +;; lowered to LOGICAL-EXPRESSIONs. (defclass sql-expression () ()) (deftype sql-expression-vector () '(vector sql-expression)) @@ -667,8 +669,6 @@ ,@body)) ;;; Planner -(defclass sql-planner (query-planner) ()) - (defun make-sql-logical-expression (expr input) (etypecase expr (sql-identifier (make-instance 'column-expression :name (id expr))) @@ -800,9 +800,13 @@ aggregate-expr))))) (defun make-sql-data-frame (select tables) - (let* ((table (gethash (slot-value select 'table-name) - tables - (simple-sql-error "No table named ~A" (slot-value select 'table-name)))) + "Process the given SELECT statement with the provided hash-table of +string:data-frame. Returns a data-frame." + (let* ((table (or + (gethash (slot-value select 'table-name) + tables + ) + (simple-sql-error "No table named ~A" (slot-value select 'table-name)))) (proj (map 'vector (lambda (x) (make-sql-logical-expression x table)) (slot-value select 'projection))) @@ -818,7 +822,35 @@ (agg) (n-group-cols 0) (group-count 0)) - plan))))) + (declare (fixnum n-group-cols group-count)) + (loop for expr across proj + do (typecase expr + (aggregate-expression + (progn + (push (+ n-group-cols (length agg)) pro) + (push expr agg))) + (alias-expression + (progn + (push (make-instance 'alias-expression + :name (+ n-group-cols (length agg)) + :expr (slot-value expr 'alias)) + pro) + ;; TODO 2024-08-07: does this need to be cast to aggregate-expression? + (push (expr expr) agg))) + (t (progn + (push group-count pro) + (incf group-count))))) + (let ((plan + (df-project + (plan-aggregate-query proj select cols-in-sel plan agg) + pro))) + (if-let ((having (slot-value select 'having))) + (df-filter plan (make-sql-logical-expression having plan)) + plan))))))) + +(defmethod make-df ((self sql-select) &key tables &allow-other-keys) + (when tables + (make-sql-data-frame self tables))) ;;; Optimizer (defclass sql-optimizer (query-optimizer) ()) diff -r d3e2829521a3 -r 568c39371122 lisp/lib/q/tests/fuzz.lisp --- a/lisp/lib/q/tests/fuzz.lisp Tue Aug 06 21:17:24 2024 -0400 +++ b/lisp/lib/q/tests/fuzz.lisp Wed Aug 07 21:09:43 2024 -0400 @@ -5,8 +5,27 @@ ;;; Code: (in-package :q/tests/fuzz) -(defclass query-fuzzer (fuzzer) (data-source)) +(defvar *fuzz-value-max-size* 32) + +;; > schema, state, generator +(defclass query-fuzzer (fuzzer data-source) ()) + +(defun generate-sql-type (state &optional (type :string)) + (case type + (:integer (make-instance 'sql-number :value (random most-positive-fixnum))) + (:float (make-instance 'sql-number :value (random most-positive-single-float))) + (:double (make-instance 'sql-number :value (random most-positive-double-float))) + (:string (make-instance 'sql-string :value (rt:random-chars (random *fuzz-value-max-size* state)))))) -(defclass sql-fuzzer (query-fuzzer) ()) +(defun generate-dql-type (state &optional (type :string))) + +(defclass sql-fuzzer (query-fuzzer) () + (:default-initargs + :generator #'generate-sql-type)) -(defclass dql-fuzzer (query-fuzzer) ()) +(defmethod fuzz ((self sql-fuzzer) &key type) + (funcall (fuzz-generator self) (fuzz-state self) type)) + +(defclass dql-fuzzer (query-fuzzer) () + (:default-initargs + :generator #'generate-dql-type)) diff -r d3e2829521a3 -r 568c39371122 lisp/lib/q/tests/pkg.lisp --- a/lisp/lib/q/tests/pkg.lisp Tue Aug 06 21:17:24 2024 -0400 +++ b/lisp/lib/q/tests/pkg.lisp Wed Aug 07 21:09:43 2024 -0400 @@ -4,7 +4,7 @@ ;;; Code: (defpackage :q/tests/fuzz - (:use :cl :std :rt/fuzz :q :log)) + (:use :cl :std :rt/fuzz :q :log :obj/query)) (defpackage :q/tests (:use :cl :std :rt :q :log :parse/pratt :obj/query)) diff -r d3e2829521a3 -r 568c39371122 lisp/lib/q/tests/suite.lisp --- a/lisp/lib/q/tests/suite.lisp Tue Aug 06 21:17:24 2024 -0400 +++ b/lisp/lib/q/tests/suite.lisp Wed Aug 07 21:09:43 2024 -0400 @@ -16,18 +16,12 @@ :sources nil))) (deftest sql-select () - (with-sql (expr "SELECT * FROM FOO") + (setf (gethash "FOO" tbl) (make-df nil)) + (with-sql (expr "SELECT I FROM FOO") (is (typep expr 'sql-select)) - (signals simple-sql-error (make-sql-data-frame expr tbl)))) - -(deftest sql-df () - (let ((tbl (make-hash-table :test 'equal)) - (df (make-instance 'data-frame))) - (setf (schema df) (make-schema)) - (is (setf (gethash "FOO" tbl) df)) - (is (gethash "FOO" tbl)) - ;; (is (make-sql-data-frame df tbl)) - )) + (let ((tbl (make-hash-table :test 'equalp))) + (is (gethash "FOO" tbl)) + (make-sql-data-frame expr tbl)))) (deftest sql-math () (with-sql (expr "1 + 2 * 3") diff -r d3e2829521a3 -r 568c39371122 lisp/lib/rt/fuzz.lisp --- a/lisp/lib/rt/fuzz.lisp Tue Aug 06 21:17:24 2024 -0400 +++ b/lisp/lib/rt/fuzz.lisp Wed Aug 07 21:09:43 2024 -0400 @@ -17,21 +17,23 @@ (defclass fuzzer () ((state :initform (make-random-state t) :initarg :state - :accessor fuzzer-state) + :accessor fuzz-state) (generator :initform *default-fuzz-generator* :initarg :generator - :type (function (state)) + :type function :accessor fuzz-generator)) (:documentation "An object which provides invalid, unexpected or random data as inputs to some program.")) (defgeneric fuzz (self &key &allow-other-keys) + (:method ((self fuzzer) &key &allow-other-keys) + (funcall (the function (fuzz-generator self)) (fuzz-state self))) (:method ((self fuzzer) &key count) (if count (let ((ret)) (dotimes (i count ret) - (push (funcall (the function (fuzz-generator self)) (fuzzer-state self)) ret))) - (funcall (the function (fuzz-generator self)) (fuzzer-state self))))) + (push (funcall (the function (fuzz-generator self)) (fuzz-state self)) ret))) + (fuzz self)))) (defgeneric fuzz* (state generator &key &allow-other-keys) (:method ((state list) (generator function) &key (count 1)) diff -r d3e2829521a3 -r 568c39371122 lisp/lib/rt/pkg.lisp --- a/lisp/lib/rt/pkg.lisp Tue Aug 06 21:17:24 2024 -0400 +++ b/lisp/lib/rt/pkg.lisp Wed Aug 07 21:09:43 2024 -0400 @@ -135,7 +135,9 @@ (:use :cl :std :log :rt) (:export :fuzzer :fuzz - :fuzz*)) + :fuzz* + :fuzz-generator + :fuzz-state)) (in-package :rt) (in-readtable :std)