changeset 576: |
60c7b1c83c47 |
parent 575: |
efb4a19ff530 |
child 577: |
806c2b214df8 |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Sun, 04 Aug 2024 16:58:23 -0400 |
files: |
lisp/lib/obj/pkg.lisp lisp/lib/obj/query.lisp lisp/lib/q/sql.lisp lisp/lib/q/tests.lisp |
description: |
more sql query updates |
1.1--- a/lisp/lib/obj/pkg.lisp Sun Aug 04 00:18:52 2024 -0400
1.2+++ b/lisp/lib/obj/pkg.lisp Sun Aug 04 16:58:23 2024 -0400
1.3@@ -424,7 +424,20 @@
1.4 :lt-expression
1.5 :gt-expression
1.6 :neq-expression
1.7- :eq-expression))
1.8+ :eq-expression
1.9+ :lhs
1.10+ :rhs
1.11+ :logical-plan
1.12+ :aggregate-expression-p
1.13+ :df-col
1.14+ :df-project
1.15+ :df-filter
1.16+ :df-aggregate
1.17+ :df-select
1.18+ :df-fields
1.19+ :df-data
1.20+ :limit
1.21+ :make-df))
1.22
1.23 (defpackage :obj/secret
1.24 (:nicknames :secret)
2.1--- a/lisp/lib/obj/query.lisp Sun Aug 04 00:18:52 2024 -0400
2.2+++ b/lisp/lib/obj/query.lisp Sun Aug 04 16:58:23 2024 -0400
2.3@@ -171,18 +171,6 @@
2.4 (defgeneric scan-data-source (self projection)
2.5 (:documentation "Scan the data source, selecting the specified columns."))
2.6
2.7-;;; Dataframes
2.8-;; minimal data-frame abstraction. methods are prefixed with 'DF-'.
2.9-(defclass data-frame ()
2.10- ((fields :initform #() :initarg :fields :accessor df-fields)
2.11- (data :initform #() :initarg :data :accessor df-data)))
2.12-
2.13-(defgeneric df-col (self))
2.14-
2.15-(defgeneric df-project (&rest expr &key &allow-other-keys))
2.16-(defgeneric df-filter (expr))
2.17-(defgeneric df-aggregate (group-by agg-expr))
2.18-
2.19 ;;; Expressions
2.20 (defclass query-expression () ())
2.21
2.22@@ -221,12 +209,12 @@
2.23
2.24 ;;;;; Alias
2.25 (defclass alias-expression (logical-expression)
2.26- ((expr :type logical-expression :initarg :expr)
2.27+ ((expr :type logical-expression :initarg :expr :accessor expr)
2.28 (alias :type string :initarg :alias)))
2.29
2.30 ;;;;; Unary
2.31 (defclass unary-expression (logical-expression)
2.32- ((expr :type logical-expression)))
2.33+ ((expr :type logical-expression :accessor expr)))
2.34
2.35 ;;;;; Binary
2.36 (defclass binary-expression (logical-expression)
2.37@@ -294,7 +282,7 @@
2.38 ;; TODO 2024-08-03: ???
2.39 (defmethod to-field ((self math-expression) (input logical-plan))
2.40 (declare (ignorable input))
2.41- (make-field :name "mult" :type (field-type (to-field (lhs self) input))))
2.42+ (make-field :name "*" :type (field-type (to-field (lhs self) input))))
2.43
2.44 (defclass add-expression (math-expression) ()
2.45 (:default-initargs
2.46@@ -328,7 +316,11 @@
2.47
2.48 (defclass aggregate-expression (logical-expression)
2.49 ((name :type string)
2.50- (expr :type logical-expression)))
2.51+ (expr :type logical-expression :accessor expr)))
2.52+
2.53+(defgeneric aggregate-expression-p (self)
2.54+ (:method ((self aggregate-expression)) t)
2.55+ (:method ((self alias-expression)) (aggregate-expression-p (expr self))))
2.56
2.57 (defmethod to-field ((self aggregate-expression) (input logical-plan))
2.58 (declare (ignorable input))
2.59@@ -406,6 +398,114 @@
2.60 do (push (to-field a input) ret))
2.61 (make-schema :fields (coerce ret 'field-vector))))
2.62
2.63+;;;;; Limit
2.64+(defclass limit (logical-plan)
2.65+ ((input :type logical-plan :initarg :input)
2.66+ (limit :type integer)))
2.67+
2.68+(defmethod schema ((self limit))
2.69+ (setf (slot-value self 'schema)
2.70+ (schema (slot-value self 'input))))
2.71+
2.72+(defmethod children ((self limit))
2.73+ (setf (slot-value self 'children)
2.74+ (children (slot-value self 'input))))
2.75+
2.76+;;;;; Joins
2.77+(defclass join (logical-plan)
2.78+ ((left :accessor lhs)
2.79+ (right :accessor rhs)
2.80+ (on :accessor join-on)))
2.81+
2.82+(defclass inner-join (join) ())
2.83+;; (defclass outer-join (join))
2.84+(defclass left-join (join) ())
2.85+(defclass right-join (join) ())
2.86+;; left-outer-join
2.87+;; right-outer-join
2.88+;; semi-join
2.89+;; anti-join
2.90+;; cross-join
2.91+
2.92+(defmethod schema ((self join))
2.93+ ;; TODO 2024-08-04: test better dupe impl
2.94+ (let ((dupes (mapcon #'(lambda (l) (when (eq (car l) (second l)) (list (car l))))
2.95+ (coerce (join-on self) 'list)))
2.96+ (schema (make-instance 'schema)))
2.97+ (setf (fields schema)
2.98+ (typecase self
2.99+ (right-join
2.100+ (let ((l (remove-if (lambda (x) (member x dupes :test 'string-equal)) (fields (schema (lhs self)))))
2.101+ (r (fields (schema (rhs self)))))
2.102+ (merge 'vector l r (lambda (x y) (declare (ignore y)) x))))
2.103+ (inner-join
2.104+ (let ((l (fields (schema (lhs self))))
2.105+ (r (remove-if (lambda (x) (member x dupes :test 'string-equal)) (fields (schema (rhs self))))))
2.106+ (merge 'vector l r (lambda (x y) (declare (ignore y)) x))))))
2.107+ schema))
2.108+
2.109+(defmethod children ((self join))
2.110+ (vector (lhs self) (rhs self)))
2.111+
2.112+;;; Subqueries
2.113+
2.114+;; TODO 2024-08-02:
2.115+
2.116+;; subquery
2.117+
2.118+;; correlated-subquery
2.119+
2.120+;; SELECT id, name, (SELECT count(*) FROM orders WHERE customer_id = customer.id) AS num_orders FROM customers
2.121+
2.122+;; uncorrelated-subquery
2.123+
2.124+;; scalar-subquery
2.125+
2.126+;; SELECT * FROM orders WHERE total > (SELECT avg(total) FROM sales WHERE customer_state = 'CA')
2.127+
2.128+;; NOTE 2024-08-02: EXISTS, IN, NOT EXISTS, and NOT IN are also subqueries
2.129+
2.130+;;; Dataframes
2.131+;; minimal data-frame abstraction. methods are prefixed with 'DF-'.
2.132+(defstruct (data-frame (:constructor make-data-frame (&optional plan)))
2.133+ (plan (make-instance 'logical-plan) :type logical-plan))
2.134+
2.135+(defgeneric df-col (self))
2.136+(defgeneric df-project (df exprs)
2.137+ (:method ((df data-frame) (expr list))
2.138+ (df-project df (coerce expr 'vector)))
2.139+ (:method ((df data-frame) (expr vector))
2.140+ (setf (data-frame-plan df)
2.141+ (make-instance 'projection
2.142+ :input (data-frame-plan df)
2.143+ :expr expr))
2.144+ df))
2.145+
2.146+(defgeneric df-filter (df expr)
2.147+ (:method ((df data-frame) (expr logical-expression))
2.148+ (setf (data-frame-plan df)
2.149+ (make-instance 'selection :input (data-frame-plan df) :expr expr))
2.150+ df))
2.151+
2.152+(defgeneric df-aggregate (df group-by agg-expr)
2.153+ (:method ((df data-frame) (group-by vector) (agg-expr vector))
2.154+ (setf (data-frame-plan df)
2.155+ (make-instance 'aggregate :input (data-frame-plan df)
2.156+ :group-expr group-by
2.157+ :agg-expr agg-expr))
2.158+ df)
2.159+ (:method ((df data-frame) (group-by list) (agg-expr list))
2.160+ (df-aggregate df (coerce group-by 'vector) (coerce agg-expr 'vector))))
2.161+
2.162+(defgeneric make-df (&rest initargs &key &allow-other-keys))
2.163+
2.164+(defmethod schema ((df data-frame))
2.165+ (schema (data-frame-plan df)))
2.166+
2.167+(defgeneric df-plan (df)
2.168+ (:documentation "Return the logical plan associated with this data-frame.")
2.169+ (:method ((df data-frame)) (data-frame-plan df)))
2.170+
2.171 ;;; Physical Expression
2.172 (defclass physical-expression (query-expression) ())
2.173
2.174@@ -685,38 +785,6 @@
2.175 :group-expr (make-physical-expression (slot-value plan 'group-expr) (slot-value plan 'input))
2.176 :agg-expr (make-physical-expression (slot-value plan 'agg-expr) (slot-value plan 'input)))))))
2.177
2.178-;;; Joins
2.179-
2.180-;; TODO 2024-08-02:
2.181-
2.182-;; inner-join
2.183-
2.184-;; outer-join left-outer-join right-outer-join
2.185-
2.186-;; semi-join
2.187-
2.188-;; anti-join
2.189-
2.190-;; cross-join
2.191-
2.192-;;; Subqueries
2.193-
2.194-;; TODO 2024-08-02:
2.195-
2.196-;; subquery
2.197-
2.198-;; correlated-subquery
2.199-
2.200-;; SELECT id, name, (SELECT count(*) FROM orders WHERE customer_id = customer.id) AS num_orders FROM customers
2.201-
2.202-;; uncorrelated-subquery
2.203-
2.204-;; scalar-subquery
2.205-
2.206-;; SELECT * FROM orders WHERE total > (SELECT avg(total) FROM sales WHERE customer_state = 'CA')
2.207-
2.208-;; NOTE 2024-08-02: EXISTS, IN, NOT EXISTS, and NOT IN are also subqueries
2.209-
2.210 ;;; Optimizer
2.211
2.212 ;; The Query Optimizer is responsible for walking a QUERY-PLAN and returning a
3.1--- a/lisp/lib/q/sql.lisp Sun Aug 04 00:18:52 2024 -0400
3.2+++ b/lisp/lib/q/sql.lisp Sun Aug 04 16:58:23 2024 -0400
3.3@@ -37,7 +37,7 @@
3.4 (defun illegal-sql-state (state)
3.5 (error 'illegal-sql-state :state state))
3.6
3.7-;;; Objects
3.8+;;; Logical Classes
3.9 (defclass sql-query (query) ())
3.10
3.11 (defclass sql-data-source (data-source) ()
3.12@@ -83,10 +83,6 @@
3.13 (having :type (or null sql-expression) :initarg :having)
3.14 (table-name :type string :initarg :table-name)))
3.15
3.16-(defclass sql-planner (query-planner) ())
3.17-
3.18-(defclass sql-optimizer (query-optimizer) ())
3.19-
3.20 ;;; Lexer
3.21 (eval-always
3.22 (defvar *sql-token-types* (list :ident :str :num :kw :op :sym t))
3.23@@ -550,7 +546,7 @@
3.24 #.(get-sql-symbol :EQ) #.(get-sql-symbol :GT)
3.25 #.(get-sql-symbol :LT))
3.26 :test 'string=)
3.27- ;; (pop tokens) ;; consume
3.28+ (pop (sql-tokens self)) ;; consume
3.29 (make-instance 'sql-math-expression
3.30 :lhs left
3.31 :op (sql-token-text token)
3.32@@ -670,6 +666,46 @@
3.33 `(with-sql-parser (,sym (read-sql-stream ,stream))
3.34 ,@body))
3.35
3.36+;;; Planner
3.37+(defclass sql-logical-plan (logical-plan) ())
3.38+(defclass sql-physical-plan (physical-plan) ())
3.39+
3.40+(defmethod make-physical-expression ((expr sql-expression) (input sql-logical-plan)))
3.41+(defmethod make-physical-plan ((plan sql-logical-plan)))
3.42+
3.43+(defclass sql-planner (query-planner) ())
3.44+
3.45+(defun make-sql-logical-expression ())
3.46+(defun get-ref-columns ())
3.47+(defun get-selection-ref-columns ())
3.48+(defun plan-non-aggregate-query ())
3.49+(defun plan-aggregate-query ())
3.50+
3.51+(defun make-sql-data-frame (select tables)
3.52+ (let* ((table (gethash (slot-value select 'table-name)
3.53+ tables
3.54+ (simple-sql-error "No table named ~A" (slot-value select 'table-name))))
3.55+ (proj (map 'vector
3.56+ (lambda (x) (make-sql-logical-expression x table))
3.57+ (slot-value select 'projection)))
3.58+ (cols-in-proj (get-ref-columns proj))
3.59+ (agg-count (count-if 'aggregate-expression-p proj)))
3.60+ (when (and (zerop agg-count) (not (sequence:emptyp (slot-value select 'group-by))))
3.61+ (simple-sql-error "GROUP BY without aggregate expression is not supported"))
3.62+ (let ((cols-in-sel (get-selection-ref-columns select table))
3.63+ (plan table))
3.64+ (if (zerop agg-count)
3.65+ (plan-non-aggregate-query select plan proj cols-in-sel cols-in-proj)
3.66+ (let ((pro)
3.67+ (agg)
3.68+ (n-group-cols 0)
3.69+ (group-count 0))
3.70+ plan)))))
3.71+
3.72+;;; Optimizer
3.73+(defclass sql-optimizer (query-optimizer) ())
3.74+
3.75+;;; Top-level Macros
3.76 (defmacro with-sql ((sym input &key (parse t) optimize execute) &body body)
3.77 (declare (ignore optimize execute))
3.78 `(with-sql-parser (,sym ,@(etypecase input
4.1--- a/lisp/lib/q/tests.lisp Sun Aug 04 00:18:52 2024 -0400
4.2+++ b/lisp/lib/q/tests.lisp Sun Aug 04 16:58:23 2024 -0400
4.3@@ -4,7 +4,7 @@
4.4
4.5 ;;; Code:
4.6 (defpackage :q/tests
4.7- (:use :cl :std :rt :q :log :parse/pratt))
4.8+ (:use :cl :std :rt :q :log :parse/pratt :obj/query))
4.9
4.10 (in-package :q/tests)
4.11
4.12@@ -15,16 +15,14 @@
4.13 (deftest sanity ())
4.14
4.15 (deftest sql-select ()
4.16- (is (typep
4.17- (with-sql-parser (expr (read-sql-string "SELECT a,b,c FROM FOO"))
4.18- (parse expr))
4.19- 'sql-select)))
4.20+ (with-sql-parser (expr (read-sql-string "SELECT * FROM FOO"))
4.21+ (is (typep (parse expr) 'sql-select))))
4.22
4.23 (deftest sql-math ()
4.24- (is (typep
4.25- (with-sql (expr "1 + 2 * 3")
4.26- expr)
4.27- 'sql-math-expression)))
4.28+ (with-sql (expr "1 + 2 * 3")
4.29+ (is (typep expr 'sql-math-expression))
4.30+ (is (typep (rhs expr) 'sql-math-expression))
4.31+ (is (typep (lhs expr) 'sql-number))))
4.32
4.33 ;; https://www.cpp.edu/~jrfisher/www/prolog_tutorial/2_1.html
4.34 (deftest dql ()