changelog shortlog graph tags branches files raw help

Mercurial > core / changeset: more sql query updates

changeset 576: 60c7b1c83c47
parent 575: efb4a19ff530
child 577: 806c2b214df8
author: Richard Westhaver <ellis@rwest.io>
date: Sun, 04 Aug 2024 16:58:23 -0400
files: lisp/lib/obj/pkg.lisp lisp/lib/obj/query.lisp lisp/lib/q/sql.lisp lisp/lib/q/tests.lisp
description: more sql query updates
     1.1--- a/lisp/lib/obj/pkg.lisp	Sun Aug 04 00:18:52 2024 -0400
     1.2+++ b/lisp/lib/obj/pkg.lisp	Sun Aug 04 16:58:23 2024 -0400
     1.3@@ -424,7 +424,20 @@
     1.4            :lt-expression
     1.5            :gt-expression
     1.6            :neq-expression
     1.7-           :eq-expression))
     1.8+           :eq-expression
     1.9+           :lhs
    1.10+           :rhs
    1.11+           :logical-plan
    1.12+           :aggregate-expression-p
    1.13+           :df-col
    1.14+           :df-project
    1.15+           :df-filter
    1.16+           :df-aggregate
    1.17+           :df-select
    1.18+           :df-fields
    1.19+           :df-data
    1.20+           :limit
    1.21+           :make-df))
    1.22 
    1.23 (defpackage :obj/secret
    1.24   (:nicknames :secret)
     2.1--- a/lisp/lib/obj/query.lisp	Sun Aug 04 00:18:52 2024 -0400
     2.2+++ b/lisp/lib/obj/query.lisp	Sun Aug 04 16:58:23 2024 -0400
     2.3@@ -171,18 +171,6 @@
     2.4 (defgeneric scan-data-source (self projection)
     2.5   (:documentation "Scan the data source, selecting the specified columns."))
     2.6 
     2.7-;;; Dataframes
     2.8-;; minimal data-frame abstraction. methods are prefixed with 'DF-'.
     2.9-(defclass data-frame ()
    2.10-  ((fields :initform #() :initarg :fields :accessor df-fields)
    2.11-   (data :initform #() :initarg :data :accessor df-data)))
    2.12-
    2.13-(defgeneric df-col (self))
    2.14-
    2.15-(defgeneric df-project (&rest expr &key &allow-other-keys))
    2.16-(defgeneric df-filter (expr))
    2.17-(defgeneric df-aggregate (group-by agg-expr))
    2.18-
    2.19 ;;; Expressions
    2.20 (defclass query-expression () ())
    2.21 
    2.22@@ -221,12 +209,12 @@
    2.23 
    2.24 ;;;;; Alias
    2.25 (defclass alias-expression (logical-expression)
    2.26-  ((expr :type logical-expression :initarg :expr)
    2.27+  ((expr :type logical-expression :initarg :expr :accessor expr)
    2.28    (alias :type string :initarg :alias)))
    2.29 
    2.30 ;;;;; Unary
    2.31 (defclass unary-expression (logical-expression)
    2.32-  ((expr :type logical-expression)))
    2.33+  ((expr :type logical-expression :accessor expr)))
    2.34 
    2.35 ;;;;; Binary
    2.36 (defclass binary-expression (logical-expression)
    2.37@@ -294,7 +282,7 @@
    2.38 ;; TODO 2024-08-03: ???
    2.39 (defmethod to-field ((self math-expression) (input logical-plan))
    2.40   (declare (ignorable input))
    2.41-  (make-field :name "mult" :type (field-type (to-field (lhs self) input))))
    2.42+  (make-field :name "*" :type (field-type (to-field (lhs self) input))))
    2.43 
    2.44 (defclass add-expression (math-expression) ()
    2.45   (:default-initargs
    2.46@@ -328,7 +316,11 @@
    2.47 
    2.48 (defclass aggregate-expression (logical-expression)
    2.49   ((name :type string)
    2.50-   (expr :type logical-expression)))
    2.51+   (expr :type logical-expression :accessor expr)))
    2.52+
    2.53+(defgeneric aggregate-expression-p (self)
    2.54+  (:method ((self aggregate-expression)) t)
    2.55+  (:method ((self alias-expression)) (aggregate-expression-p (expr self))))
    2.56 
    2.57 (defmethod to-field ((self aggregate-expression) (input logical-plan))
    2.58   (declare (ignorable input))
    2.59@@ -406,6 +398,114 @@
    2.60           do (push (to-field a input) ret))
    2.61     (make-schema :fields (coerce ret 'field-vector))))
    2.62 
    2.63+;;;;; Limit
    2.64+(defclass limit (logical-plan)
    2.65+  ((input :type logical-plan :initarg :input)
    2.66+   (limit :type integer)))
    2.67+
    2.68+(defmethod schema ((self limit))
    2.69+  (setf (slot-value self 'schema)
    2.70+        (schema (slot-value self 'input))))
    2.71+
    2.72+(defmethod children ((self limit))
    2.73+  (setf (slot-value self 'children)
    2.74+        (children (slot-value self 'input))))
    2.75+
    2.76+;;;;; Joins
    2.77+(defclass join (logical-plan)
    2.78+  ((left :accessor lhs)
    2.79+   (right :accessor rhs)
    2.80+   (on :accessor join-on)))
    2.81+
    2.82+(defclass inner-join (join) ())
    2.83+;; (defclass outer-join (join))
    2.84+(defclass left-join (join) ())
    2.85+(defclass right-join (join) ())
    2.86+;; left-outer-join
    2.87+;; right-outer-join
    2.88+;; semi-join
    2.89+;; anti-join
    2.90+;; cross-join
    2.91+
    2.92+(defmethod schema ((self join))
    2.93+  ;; TODO 2024-08-04: test better dupe impl
    2.94+  (let ((dupes (mapcon #'(lambda (l) (when (eq (car l) (second l)) (list (car l))))
    2.95+                       (coerce (join-on self) 'list)))
    2.96+        (schema (make-instance 'schema)))
    2.97+    (setf (fields schema)
    2.98+          (typecase self
    2.99+            (right-join
   2.100+             (let ((l (remove-if (lambda (x) (member x dupes :test 'string-equal)) (fields (schema (lhs self)))))
   2.101+                   (r (fields (schema (rhs self)))))
   2.102+               (merge 'vector l r (lambda (x y) (declare (ignore y)) x))))
   2.103+            (inner-join
   2.104+             (let ((l (fields (schema (lhs self))))
   2.105+                   (r (remove-if (lambda (x) (member x dupes :test 'string-equal)) (fields (schema (rhs self))))))
   2.106+               (merge 'vector l r (lambda (x y) (declare (ignore y)) x))))))
   2.107+    schema))
   2.108+
   2.109+(defmethod children ((self join))
   2.110+  (vector (lhs self) (rhs self))) 
   2.111+
   2.112+;;; Subqueries
   2.113+
   2.114+;;  TODO 2024-08-02: 
   2.115+
   2.116+;; subquery
   2.117+
   2.118+;; correlated-subquery
   2.119+
   2.120+;; SELECT id, name, (SELECT count(*) FROM orders WHERE customer_id = customer.id) AS num_orders FROM customers
   2.121+
   2.122+;; uncorrelated-subquery
   2.123+
   2.124+;; scalar-subquery
   2.125+
   2.126+;; SELECT * FROM orders WHERE total > (SELECT avg(total) FROM sales WHERE customer_state = 'CA')
   2.127+
   2.128+;; NOTE 2024-08-02: EXISTS, IN, NOT EXISTS, and NOT IN are also subqueries
   2.129+
   2.130+;;; Dataframes
   2.131+;; minimal data-frame abstraction. methods are prefixed with 'DF-'.
   2.132+(defstruct (data-frame (:constructor make-data-frame (&optional plan)))
   2.133+  (plan (make-instance 'logical-plan) :type logical-plan))
   2.134+
   2.135+(defgeneric df-col (self))
   2.136+(defgeneric df-project (df exprs)
   2.137+  (:method ((df data-frame) (expr list))
   2.138+    (df-project df (coerce expr 'vector)))
   2.139+  (:method ((df data-frame) (expr vector))
   2.140+    (setf (data-frame-plan df)
   2.141+          (make-instance 'projection
   2.142+            :input (data-frame-plan df)
   2.143+            :expr expr))
   2.144+    df))
   2.145+
   2.146+(defgeneric df-filter (df expr)
   2.147+  (:method ((df data-frame) (expr logical-expression))
   2.148+    (setf (data-frame-plan df)
   2.149+          (make-instance 'selection :input (data-frame-plan df) :expr expr))
   2.150+    df))
   2.151+
   2.152+(defgeneric df-aggregate (df group-by agg-expr)
   2.153+  (:method ((df data-frame) (group-by vector) (agg-expr vector))
   2.154+    (setf (data-frame-plan df)
   2.155+          (make-instance 'aggregate :input (data-frame-plan df)
   2.156+                         :group-expr group-by
   2.157+                         :agg-expr agg-expr))
   2.158+    df)
   2.159+  (:method ((df data-frame) (group-by list) (agg-expr list))
   2.160+    (df-aggregate df (coerce group-by 'vector) (coerce agg-expr 'vector))))
   2.161+
   2.162+(defgeneric make-df (&rest initargs &key &allow-other-keys))
   2.163+
   2.164+(defmethod schema ((df data-frame))
   2.165+  (schema (data-frame-plan df)))
   2.166+
   2.167+(defgeneric df-plan (df)
   2.168+  (:documentation "Return the logical plan associated with this data-frame.")
   2.169+  (:method ((df data-frame)) (data-frame-plan df)))
   2.170+
   2.171 ;;; Physical Expression
   2.172 (defclass physical-expression (query-expression) ())
   2.173 
   2.174@@ -685,38 +785,6 @@
   2.175                    :group-expr (make-physical-expression (slot-value plan 'group-expr) (slot-value plan 'input))
   2.176                    :agg-expr (make-physical-expression (slot-value plan 'agg-expr) (slot-value plan 'input)))))))
   2.177 
   2.178-;;; Joins
   2.179-
   2.180-;;  TODO 2024-08-02: 
   2.181-
   2.182-;; inner-join
   2.183-
   2.184-;; outer-join left-outer-join right-outer-join
   2.185-
   2.186-;; semi-join
   2.187-
   2.188-;; anti-join
   2.189-
   2.190-;; cross-join
   2.191-
   2.192-;;; Subqueries
   2.193-
   2.194-;;  TODO 2024-08-02: 
   2.195-
   2.196-;; subquery
   2.197-
   2.198-;; correlated-subquery
   2.199-
   2.200-;; SELECT id, name, (SELECT count(*) FROM orders WHERE customer_id = customer.id) AS num_orders FROM customers
   2.201-
   2.202-;; uncorrelated-subquery
   2.203-
   2.204-;; scalar-subquery
   2.205-
   2.206-;; SELECT * FROM orders WHERE total > (SELECT avg(total) FROM sales WHERE customer_state = 'CA')
   2.207-
   2.208-;; NOTE 2024-08-02: EXISTS, IN, NOT EXISTS, and NOT IN are also subqueries
   2.209-
   2.210 ;;; Optimizer
   2.211 
   2.212 ;; The Query Optimizer is responsible for walking a QUERY-PLAN and returning a
     3.1--- a/lisp/lib/q/sql.lisp	Sun Aug 04 00:18:52 2024 -0400
     3.2+++ b/lisp/lib/q/sql.lisp	Sun Aug 04 16:58:23 2024 -0400
     3.3@@ -37,7 +37,7 @@
     3.4 (defun illegal-sql-state (state)
     3.5   (error 'illegal-sql-state :state state))
     3.6 
     3.7-;;; Objects
     3.8+;;; Logical Classes
     3.9 (defclass sql-query (query) ())
    3.10 
    3.11 (defclass sql-data-source (data-source) ()
    3.12@@ -83,10 +83,6 @@
    3.13    (having :type (or null sql-expression) :initarg :having)
    3.14    (table-name :type string :initarg :table-name)))
    3.15 
    3.16-(defclass sql-planner (query-planner) ())
    3.17-
    3.18-(defclass sql-optimizer (query-optimizer) ())
    3.19-
    3.20 ;;; Lexer
    3.21 (eval-always
    3.22   (defvar *sql-token-types* (list :ident :str :num :kw :op :sym t))
    3.23@@ -550,7 +546,7 @@
    3.24                                                       #.(get-sql-symbol :EQ) #.(get-sql-symbol :GT)
    3.25                                                       #.(get-sql-symbol :LT))
    3.26                          :test 'string=)
    3.27-                 ;; (pop tokens) ;; consume
    3.28+                 (pop (sql-tokens self)) ;; consume
    3.29                  (make-instance 'sql-math-expression
    3.30                    :lhs left
    3.31                    :op (sql-token-text token)
    3.32@@ -670,6 +666,46 @@
    3.33   `(with-sql-parser (,sym (read-sql-stream ,stream))
    3.34      ,@body))
    3.35 
    3.36+;;; Planner
    3.37+(defclass sql-logical-plan (logical-plan) ())
    3.38+(defclass sql-physical-plan (physical-plan) ())
    3.39+
    3.40+(defmethod make-physical-expression ((expr sql-expression) (input sql-logical-plan)))
    3.41+(defmethod make-physical-plan ((plan sql-logical-plan)))
    3.42+
    3.43+(defclass sql-planner (query-planner) ())
    3.44+
    3.45+(defun make-sql-logical-expression ())
    3.46+(defun get-ref-columns ())
    3.47+(defun get-selection-ref-columns ())
    3.48+(defun plan-non-aggregate-query ())
    3.49+(defun plan-aggregate-query ())
    3.50+
    3.51+(defun make-sql-data-frame (select tables)
    3.52+  (let* ((table (gethash (slot-value select 'table-name)
    3.53+                         tables
    3.54+                         (simple-sql-error "No table named ~A" (slot-value select 'table-name))))
    3.55+         (proj (map 'vector
    3.56+                    (lambda (x) (make-sql-logical-expression x table))
    3.57+                    (slot-value select 'projection)))
    3.58+         (cols-in-proj (get-ref-columns proj))
    3.59+         (agg-count (count-if 'aggregate-expression-p proj)))
    3.60+    (when (and (zerop agg-count) (not (sequence:emptyp (slot-value select 'group-by))))
    3.61+      (simple-sql-error "GROUP BY without aggregate expression is not supported"))
    3.62+    (let ((cols-in-sel (get-selection-ref-columns select table))
    3.63+          (plan table))
    3.64+      (if (zerop agg-count)
    3.65+          (plan-non-aggregate-query select plan proj cols-in-sel cols-in-proj)
    3.66+          (let ((pro)
    3.67+                (agg)
    3.68+                (n-group-cols 0)
    3.69+                (group-count 0))
    3.70+            plan)))))
    3.71+
    3.72+;;; Optimizer
    3.73+(defclass sql-optimizer (query-optimizer) ())
    3.74+
    3.75+;;; Top-level Macros
    3.76 (defmacro with-sql ((sym input &key (parse t) optimize execute) &body body)
    3.77   (declare (ignore optimize execute))
    3.78   `(with-sql-parser (,sym ,@(etypecase input
     4.1--- a/lisp/lib/q/tests.lisp	Sun Aug 04 00:18:52 2024 -0400
     4.2+++ b/lisp/lib/q/tests.lisp	Sun Aug 04 16:58:23 2024 -0400
     4.3@@ -4,7 +4,7 @@
     4.4 
     4.5 ;;; Code:
     4.6 (defpackage :q/tests
     4.7-  (:use :cl :std :rt :q :log :parse/pratt))
     4.8+  (:use :cl :std :rt :q :log :parse/pratt :obj/query))
     4.9 
    4.10 (in-package :q/tests)
    4.11 
    4.12@@ -15,16 +15,14 @@
    4.13 (deftest sanity ())
    4.14 
    4.15 (deftest sql-select ()
    4.16-  (is (typep
    4.17-       (with-sql-parser (expr (read-sql-string "SELECT a,b,c FROM FOO"))
    4.18-         (parse expr))
    4.19-       'sql-select)))
    4.20+  (with-sql-parser (expr (read-sql-string "SELECT * FROM FOO"))
    4.21+    (is (typep (parse expr) 'sql-select))))
    4.22 
    4.23 (deftest sql-math ()
    4.24-  (is (typep
    4.25-       (with-sql (expr "1 + 2 * 3")
    4.26-         expr)
    4.27-       'sql-math-expression)))
    4.28+  (with-sql (expr "1 + 2 * 3")
    4.29+    (is (typep expr 'sql-math-expression))
    4.30+    (is (typep (rhs expr) 'sql-math-expression))
    4.31+    (is (typep (lhs expr) 'sql-number))))
    4.32 
    4.33 ;; https://www.cpp.edu/~jrfisher/www/prolog_tutorial/2_1.html
    4.34 (deftest dql ()