# HG changeset patch # User Richard Westhaver # Date 1722661176 14400 # Node ID 9e7d4393eac6bcb2c5a19d98b117608daa50c36e # Parent f8b76ced5e2dc29805fbf02b272d6dcf19c7cec2 add skel/core/print.lisp, wrap up obj/query init diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/dat/dat.asd --- a/lisp/lib/dat/dat.asd Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/dat/dat.asd Sat Aug 03 00:59:36 2024 -0400 @@ -1,6 +1,6 @@ (defsystem :dat :description "Data formats" - :depends-on (:cl-ppcre :std :obj #+png :png :flexi-streams :io) + :depends-on (:cl-ppcre :std :obj #+png :png :flexi-streams :io :log) :version "0.1.0" :serial t :components ((:file "pkg") diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/obj/hash/chash.lisp --- a/lisp/lib/obj/hash/chash.lisp Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/obj/hash/chash.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -51,3 +51,70 @@ ;;; Code: (in-package :obj/hash) + +(deftype solist-element-designator () `(member ,@(list :addr :fixnum :string))) + +(defun show-list (solist) + (let ((node (so-head solist))) + (loop (format t "~s~%" node) + (when (endp node) (return)) + (setq node (%node-next node))))) + +(defun show-bin (solist i) + (let ((node (aref (car (so-bins solist)) i)) + (bin-nbits (- +hash-nbits+ (cdr (so-bins solist)))) + (count 0)) + (flet ((bit-string (hash) + (let ((s (format nil " ~v,'0b" +hash-nbits+ hash))) + (replace s s :end1 bin-nbits :start2 1) + (setf (char s bin-nbits) #\.) + s))) + (cond + ((unbound-marker-p node) + (values 0 0)) + (t + (let ((node node)) + (loop (let ((next (get-next node))) + (when (or (endp next) (evenp (node-hash next))) + (return)) + (incf count) + (setq node next)))) + (format t " ~5d [~2d] = ~a" i count (bit-string (node-hash node))) + (loop (let ((next (get-next node))) + (when (or (endp next) (evenp (node-hash next))) + (return)) + (setq node next) + (if (= count 1) + (format t " ~a=~s" + (bit-string (node-hash node)) (so-key node)) + (format t "~% ~a=~s" + (bit-string (node-hash node)) (so-key node))))) + (terpri) + (values 1 count)))))) + +(defun show-bins (solist) + (let ((bins (car (so-bins solist))) + (bin-nbits (- +hash-nbits+ (cdr (so-bins solist)))) + (n-occupied-bins 0) + (sum-chainlengths 0) + (max-chainlength 0)) + (assert (= (length bins) (ash 1 bin-nbits))) + (format t "Bins (~d total, ~d leading bits):~%" + (length bins) bin-nbits) + (dotimes (i (length bins)) + (multiple-value-bind (occupied count) (show-bin solist i) + (incf n-occupied-bins occupied) + (incf sum-chainlengths count) + (setq max-chainlength (max count max-chainlength)))) + (let ((avg-chainlength (/ sum-chainlengths n-occupied-bins))) + (format t "~&Total ~D items, avg ~F items/bin~%" + (so-count solist) avg-chainlength) + (values max-chainlength (float avg-chainlength))))) + +(defun print-hashes (solist) + (do ((node (%node-next (so-head solist)) (%node-next node))) + ((endp node)) + (format t "~16x~@[ ~s~]~%" + (node-hash node) + (if (so-key-node-p node) (type-of (so-key node)))))) +(sb-lockless:lfl-insert (sb-lockless:make-ordered-list :key-type 'fixnum) 5 'five) diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/obj/hash/map.lisp --- a/lisp/lib/obj/hash/map.lisp Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/obj/hash/map.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -4,3 +4,15 @@ ;;; Code: (in-package :obj/hash) + +;;; SOLIST + +;; Lockfree Maps + +(defmacro make-so-map (&optional (type :addr)) + "Return a SOLIST map. Type may be either FIXNUM or STRING." + (declare (solist-element-designator type)) + `(case ,type + (:fixnum ,(make-so-map/fixnum)) + (:string ,(make-so-map/string)) + (:addr ,(make-so-map/addr)))) diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/obj/hash/set.lisp --- a/lisp/lib/obj/hash/set.lisp Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/obj/hash/set.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -7,6 +7,14 @@ ;;; SOLIST -;; Split-Ordered Lists +;; Lockfree hashsets ;; see file src/code/solist.lisp + +(defmacro make-so-set (&optional (type :addr)) + "Return a SOLIST set. Type is of type SOLIST-ELEMENT-DESIGNATOR." + (declare (solist-element-designator type)) + `(case ,type + (:fixnum ,(make-so-set/fixnum)) + (:string ,(make-so-map/string)) + (:addr ,(make-so-set/addr)))) diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/obj/list/clist.lisp --- a/lisp/lib/obj/list/clist.lisp Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/obj/list/clist.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -1,7 +1,7 @@ ;;; obj/list/clist.lisp --- Concurrent Lists ;; https://timharris.uk/papers/2001-disc.pdf -;; see sb-lockless - lfl=lockfree-list solist=splitorder-lists +;; see sb-lockless - lfl=lockfree-list ;;; Code: (in-package :obj/list) diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/obj/pkg.lisp --- a/lisp/lib/obj/pkg.lisp Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/obj/pkg.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -23,8 +23,11 @@ (defpackage :obj/hash (:nicknames :hash) (:use :cl :std) + (:shadowing-import-from :sb-lockless :endp) (:import-from :sb-lockless :make-so-map/fixnum :+hash-nbits+ + :node-hash :%node-next + :unbound-marker-p :get-next :node-hash :so-head :so-bins :so-key :so-data @@ -32,7 +35,8 @@ :so-insert :so-delete :so-find :so-find/string :so-maplist :make-so-map/string - :make-so-set/string :make-so-map/addr :make-marked-ref) + :make-so-set/string :make-so-set/fixnum :make-so-map/addr :make-marked-ref + :make-so-set/addr) (:export :*global-hasher* :*global-hash* diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/obj/query.lisp --- a/lisp/lib/obj/query.lisp Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/obj/query.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -21,6 +21,14 @@ ;; - Backends :: The interface exposed to the underlying data sources - ;; RocksDB, SQLite, etc. +;;;; Refs + +;; https://gist.github.com/twitu/221c8349887cec0a83b395e4cbb492a7 + +;; https://www1.columbia.edu/sec/acis/db2/db2d0/db2d0103.htm + +;; https://howqueryengineswork.com/ + ;;; Code: (in-package :obj/query) @@ -353,9 +361,9 @@ ;;;;; Scan (defclass scan-data (logical-plan) - ((path :type string) - (data-source :type data-source) - (projection :type (vector string)))) + ((path :type string :initarg :path) + (data-source :type data-source :initarg :data-source) + (projection :type (vector string) :initarg :projection))) (defmethod derive-schema ((self scan-data)) (let ((proj (slot-value self 'projection))) @@ -368,25 +376,25 @@ ;;;;; Projection (defclass projection (logical-plan) - ((input :type logical-plan) - (expr :type (vector logical-expression)))) + ((input :type logical-plan :initarg :input) + (expr :type (vector logical-expression) :initarg :expr))) (defmethod schema ((self projection)) (schema (slot-value self 'input))) ;;;;; Selection (defclass selection (logical-plan) - ((input :type logical-plan) - (expr :type logical-expression))) + ((input :type logical-plan :initarg :input) + (expr :type logical-expression :initarg :expr))) (defmethod schema ((self selection)) (schema (slot-value self 'input))) ;;;;; Aggregate (defclass aggregate (logical-plan) - ((input :type logical-plan) - (group-expr :type (vector logical-expression)) - (agg-expr :type (vector aggregate-expression)))) + ((input :type logical-plan :initarg :input) + (group-expr :type (vector logical-expression) :initarg :group-expr) + (agg-expr :type (vector aggregate-expression) :initarg :agg-expr))) (defmethod schema ((self aggregate)) (let ((input (slot-value self 'input)) @@ -398,7 +406,6 @@ (make-schema :fields (coerce ret 'field-vector)))) ;;; Physical Expression - (defclass physical-expression (query-expression) ()) (defclass literal-physical-expression (physical-expression) ()) @@ -439,6 +446,24 @@ (declare (ignore self)) (equal lhs rhs)) +(defclass neq-physical-expression (binary-physical-expression) ()) + +(defmethod evaluate2 ((self neq-physical-expression) lhs rhs) + (declare (ignore self)) + (equal lhs rhs)) + +(defclass lt-physical-expression (binary-physical-expression) ()) + +(defclass gt-physical-expression (binary-physical-expression) ()) + +(defclass lteq-physical-expression (binary-physical-expression) ()) + +(defclass gteq-physical-expression (binary-physical-expression) ()) + +(defclass and-physical-expression (binary-physical-expression) ()) + +(defclass or-physical-expression (binary-physical-expression) ()) + (defclass math-physical-expression (binary-physical-expression) ()) (defmethod evaluate2 ((self math-physical-expression) (lhs column-vector) (rhs column-vector)) @@ -452,6 +477,24 @@ (declare (ignore self)) (+ lhs rhs)) +(defclass sub-physical-expression (math-expression) ()) + +(defmethod evaluate2 ((self sub-physical-expression) lhs rhs) + (declare (ignore self)) + (- lhs rhs)) + +(defclass mult-physical-expression (math-expression) ()) + +(defmethod evaluate2 ((self mult-physical-expression) lhs rhs) + (declare (ignore self)) + (* lhs rhs)) + +(defclass div-physical-expression (math-expression) ()) + +(defmethod evaluate2 ((self div-physical-expression) lhs rhs) + (declare (ignore self)) + (/ lhs rhs)) + (defclass accumulator () ((value :initarg :value :accessor accumulator-value))) @@ -523,7 +566,6 @@ collect (make-record-batch :schema schema :fields (coerce filtered 'field-vector))) '(vector record-batch))) -;; NOTE 2024-07-10: (defgeneric filter (self columns selection) (:method ((self selection-exec) (columns column-vector) (selection simple-bit-vector)) (coerce @@ -533,9 +575,9 @@ 'field-vector))) (defclass hash-aggregate-exec (physical-plan) - ((input :type physical-plan) - (group-expr :type (vector physical-plan)) - (agg-expr :type (vector aggregate-physical-expression)))) + ((input :type physical-plan :initarg :input) + (group-expr :type (vector physical-plan) :initarg :group-expr) + (agg-expr :type (vector aggregate-physical-expression) :initarg :agg-expr))) (defmethod execute ((self hash-aggregate-exec)) (coerce @@ -589,7 +631,6 @@ (defclass query-planner () ()) -;; these generics (defgeneric make-physical-expression (expr input) (:documentation "Translate logical expression EXPR and logical plan INPUT into a physical expression.") @@ -607,18 +648,17 @@ (r (make-physical-expression (rhs expr) input))) (etypecase expr (eq-expression (make-instance 'eq-physical-expression :lhs l :rhs r)) - ;; (neq-expression (make-instance 'neq-physical-expression :lhs l :rhs r)) - ;; (gt-expression) - ;; (gteq-expression) - ;; (lt-expression) - ;; (lteq-expression) - ;; (and-expression) - ;; (or-expression) + (neq-expression (make-instance 'neq-physical-expression :lhs l :rhs r)) + (gt-expression (make-instance 'gt-physical-expression :lhs l :rhs r)) + (gteq-expression (make-instance 'gteq-physical-expression :lhs l :rhs r)) + (lt-expression (make-instance 'lt-physical-expression :lhs l :rhs r)) + (lteq-expression (make-instance 'lteq-physical-expression :lhs l :rhs r)) + (and-expression (make-instance 'and-physical-expression :lhs l :rhs r)) + (or-expression (make-instance 'or-physical-expression :lhs l :rhs r)) (add-expression (make-instance 'add-physical-expresion :lhs l :rhs r)) - ;; (sub-expression) - ;; (mult-expression) - ;; (div-expression) - )))) + (sub-expression (make-instance 'sub-physical-expression :lhs l :rhs r)) + (mult-expression (make-instance 'mult-physical-expression :lhs l :rhs r)) + (div-expression (make-instance 'div-physical-expression :lhs l :rhs r)))))) (defgeneric make-physical-plan (plan) (:documentation "Create a physical plan from logical PLAN.") @@ -639,8 +679,42 @@ (selection (make-instance 'selection-exec :input (make-physical-plan (slot-value plan 'input)) :expr (make-physical-expression (slot-value plan 'expr) (slot-value plan 'input)))) - ;; TODO 2024-07-10: - (aggregate (make-instance 'hash-aggregate-exec))))) + (aggregate (make-instance 'hash-aggregate-exec + :input (make-physical-plan (slot-value plan 'input)) + :group-expr (make-physical-expression (slot-value plan 'group-expr) (slot-value plan 'input)) + :agg-expr (make-physical-expression (slot-value plan 'agg-expr) (slot-value plan 'input))))))) + +;;; Joins + +;; TODO 2024-08-02: + +;; inner-join + +;; outer-join left-outer-join right-outer-join + +;; semi-join + +;; anti-join + +;; cross-join + +;;; Subqueries + +;; TODO 2024-08-02: + +;; subquery + +;; correlated-subquery + +;; SELECT id, name, (SELECT count(*) FROM orders WHERE customer_id = customer.id) AS num_orders FROM customers + +;; uncorrelated-subquery + +;; scalar-subquery + +;; SELECT * FROM orders WHERE total > (SELECT avg(total) FROM sales WHERE customer_state = 'CA') + +;; NOTE 2024-08-02: EXISTS, IN, NOT EXISTS, and NOT IN are also subqueries ;;; Optimizer @@ -654,7 +728,56 @@ ;; TODO 2024-07-10: (defclass query-optimizer () ()) -(defgeneric optimize-query (self &key &allow-other-keys)) +(defstruct (query-vop (:constructor make-query-vop (info))) + (info nil)) + +(defgeneric optimize-query (self plan)) + +;; Projection Pushdown +(defun extract-columns (expr input &optional accum) + (etypecase expr + (array-index (accumulate accum (field (fields (schema input)) expr))) + (column-expression (accumulate accum (column-name expr))) + (binary-expression + (extract-columns (lhs expr) input accum) + (extract-columns (rhs expr) input accum)) + (alias-expression (extract-columns (slot-value expr 'expr) input accum)) + ;; cast-expression + (literal-expression nil))) + +(defun extract-columns* (exprs input &optional accum) + (mapcar (lambda (x) (extract-columns x input accum)) exprs)) + +(defclass projection-pushdown-optimizer (query-optimizer) ()) + +(defun %pushdown (plan &optional column-names) + (declare (logical-plan plan)) + (etypecase plan + (projection + (extract-columns (slot-value plan 'expr) column-names) + (let ((input (%pushdown (slot-value plan 'input) column-names))) + (make-instance 'projection :input input :expr (slot-value plan 'expr)))) + (selection + (extract-columns (slot-value plan 'expr) column-names) + (let ((input (%pushdown (slot-value plan 'input) column-names))) + (make-instance 'selection :input input :expr (slot-value plan 'expr)))) + (aggregate + (extract-columns (slot-value plan 'group-expr) column-names) + (extract-columns* + (loop for x across (slot-value plan 'agg-expr) collect (slot-value x 'input)) + column-names) + (let ((input (%pushdown (slot-value plan 'input) column-names))) + (make-instance 'aggregate + :input input + :group-expr (slot-value plan 'group-expr) + :agg-expr (slot-value plan 'agg-expr)))) + (scan-data (make-instance 'scan-data + :path (slot-value plan 'name) + :data-source (slot-value plan 'data-source) + :projection column-names)))) ;; maybe sort here? + +(defmethod optimize-query ((self projection-pushdown-optimizer) (plan logical-plan)) + (%pushdown plan)) ;;; Query (defclass query () ()) diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/obj/tests.lisp --- a/lisp/lib/obj/tests.lisp Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/obj/tests.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -250,6 +250,9 @@ (defclass bogus-data-source (data-source) ((db :initform nil :initarg :db))) -(deftest query-simple-hard () +(defvar *basic-query* "SELECT * FROM employee WHERE state = 'CT'") + +(deftest query-basic () "Test the simple query `SELECT * FROM employee WHERE state = 'CT'` by manually -building a query-plan.") +building a query-plan." + (make-query *basic-query*)) diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/q/dql.lisp --- a/lisp/lib/q/dql.lisp Thu Aug 01 21:21:07 2024 -0400 +++ b/lisp/lib/q/dql.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -23,13 +23,42 @@ ;; The WAM compiler is a bit too much to understand let alone implement at ;; this stage. The design of this package will be much simpler and optimized -;; for compatibility with Lisp. +;; for compatibility with Lisp Objects. + +;; I think we can get quite far, with a bit of clever hacking and some good +;; macros. + +;;;; Refs -;; I think we can get quite far +;; https://franz.com/support/documentation/11.0/prolog.html + +;; https://github.com/wmannis/cl-gambol + +;; https://norvig.com/paip/README.html + +;; https://en.wikipedia.org/wiki/Negation_as_failure ;;; Code: (in-package :q/dql) +;;; Vars + +(declaim (fixnum *lips*)) +(defvar *lips* 0 + "Count of logical inferences performed.") + +;; from GAMBOL +(defvar *interactive* t "true iff interacting with user") +(defvar *auto-backtrack* nil "return all solutions if true") +(defvar *last-continuation* nil "saved state of the system") +(defvar *trail* nil "the trail, for backtracking") +(defvar *x-env* nil "env for goals") +(defvar *y-env* nil "env for rules") +(defvar *top-level-envs* nil "saves top-level environments") +(defvar *top-level-vars* nil "saves top-level variable names") +(defvar *num-slots* -1 "number of logical variables in a query") +(defvar *rules* (make-hash-table) "hash table for prolog rule heads") + ;;; Conditions (define-condition dql-error (error) ()) @@ -38,8 +67,21 @@ (defun simple-dql-error (ctrl &rest args) (error 'simpl-dql-error :format-control ctrl :format-arguments args)) +;;; CLOS (defclass dql-query (query) ()) (defclass dql-data-source (data-source) () (:documentation "Data source which can be used withing DQL expressions.")) +;;; Prolog Semantics + +;; NOTE 2024-08-03: we're loosely following along with CL-GAMBOL, but sticking +;; with defstructs instead of vectors for the most part. I'm willing to pay +;; the immediate cost of not vectorizing in hopes that the fact that structs +;; are vector-backed and multi-threaded contexts exist will minimize the +;; effect. + +;;; Macros +(defmacro ?- (&body clauses)) + +(defmacro *- (head &body body)) diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/skel/core/print.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/lib/skel/core/print.lisp Sat Aug 03 00:59:36 2024 -0400 @@ -0,0 +1,28 @@ +;;; print.lisp --- Skel Printer + +;; SK-PRINT + +;;; Commentary: + +;; SK-PRINT is the top-level interface, and dispatches on all sorts of SKEL +;; objects. The output is different than the PRINT-OBJECT methods, which are +;; implemented in the SKEL/CORE/OBJ package. + +;; SK-PRINT is the 'external print' representation, which is structured, akin +;; to PPRINT - while PRINT-OBJECT is the 'internal print' and unstructured +;; representation. + +;; All printer parameters are dynamic and dispatch occurs in the same manner +;; as the standard Lisp Printer. Additional parameters may be provided in the +;; future. + +;;; Code: +(in-package :skel/core/print) + +;; sb-pretty::*standard-pprint-dispatch-table* +;; *readtable* + +(sb-ext:defglobal *sk-print-dispatch-table* (sb-pretty::make-pprint-dispatch-table #() nil nil)) + +(defmethod sk-print ((self skel)) + (pprint (cons (keywordicate (class-name (class-of self))) (format-sxhash (obj/id:id self))))) diff -r f8b76ced5e2d -r 9e7d4393eac6 lisp/lib/skel/skel.lisp