changeset 580: |
571685ae64f1 |
parent: |
806c2b214df8
|
child: |
d3e2829521a3 |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Mon, 05 Aug 2024 21:57:13 -0400 |
permissions: |
-rw-r--r-- |
description: |
queries, cli fixes, dat/csv, emacs org-columns |
1 ;;; sql.lisp --- Structured Query Langs 3 ;; SQL parser and query specification. 7 ;; Parser derived from PARSE/PRATT:PRATT-PARSER 9 ;; ref: https://tdop.github.io/ 14 (declaim (optimize (speed 3))) 17 (define-condition sql-error (error) ()) 19 (deferror simple-sql-error (sql-error simple-error) ()) 21 (defun simple-sql-error (ctrl &rest args) 22 (error 'simple-sql-error :format-control ctrl :format-arguments args)) 24 (define-condition sql-token-error (sql-error) 25 ((token :initarg :token :reader bad-token)) 26 (:report (lambda (c s) 27 (format s "Bad Token: ~A" (bad-token c))))) 29 (defun sql-token-error (token) 30 (error 'sql-token-error :token token)) 32 (define-condition illegal-sql-state (sql-error) 33 ((state :initform nil :initarg :state :reader illegal-state)) 34 (:report (lambda (c s) 35 (format s "Illegal SQL State: ~A" (illegal-state c))))) 37 (defun illegal-sql-state (state) 38 (error 'illegal-sql-state :state state)) 41 (defclass sql-query (query) ()) 43 (defclass sql-data-source (data-source) () 44 (:documentation "Data source which can be used within SQL expressions.")) 46 (defclass sql-expression () ()) 48 (deftype sql-expression-vector () '(vector sql-expression)) 50 (defclass sql-identifier (id sql-expression) ()) 52 (defclass sql-binary-expression (binary-expression sql-expression) ()) 54 (defclass sql-math-expression (sql-binary-expression) 55 ((op :initarg :op :type symbol :accessor binary-expression-op))) 57 (defclass sql-string (sql-expression literal-expression) 58 ((value :type string :initarg :value :accessor literal-value))) 60 (defclass sql-number (sql-expression literal-expression) 61 ((value :type number :initarg :value :accessor literal-value))) 63 (defclass sql-function (id sql-expression) 64 ((args :type sql-expression-vector :initarg :args))) 66 (defclass sql-alias (sql-expression alias-expression) ()) 68 (defclass sql-cast (sql-expression) 69 ((expr :type sql-expression :initarg :expr) 70 (type :type sql-identifier :initarg :type))) 72 (defclass sql-sort (sql-expression) 73 ((expr :type sql-expression :initarg :expr) 74 (asc :type boolean :initarg :asc))) 76 (defclass sql-relation (sql-expression) ()) 78 (defclass sql-select (sql-relation) 79 ((projection :type sql-expression-vector :initarg :projection) 80 (selection :type (or sql-expression null) :initarg :selection) 81 (group-by :type sql-expression-vector :initarg :group-by) 82 (order-by :type sql-expression-vector :initarg :order-by) 83 (having :type (or null sql-expression) :initarg :having) 84 (table-name :type string :initarg :table-name))) 88 (defvar *sql-token-types* (list :ident :str :num :kw :op :sym t)) 89 (deftype sql-token-type-designator () `(member ,@*sql-token-types*)) 90 (defvar *sql-keywords* 292 (defvar *sql-keyword-start-chars* 293 (remove-duplicates (mapcar 295 (declare (simple-string k)) 299 (defvar *sql-keyword-table* 300 (let* ((pairs (mapcar (lambda (x) (cons (keywordicate x) x)) *sql-keywords*)) 301 (table (make-hash-table :size (length pairs)))) 302 (dolist (p pairs table) 303 (setf (gethash (car p) table) (cdr p))))) 305 (defvar *sql-symbol-table* 306 (let* ((pairs '((:LEFT-PAREN . "(") 310 (:LEFT-BRACKET . "[") 311 (:RIGHT-BRACKET . "]") 329 (:DOUBLE-COLON . "::") 346 (table (make-hash-table :size (length pairs)))) 347 (dolist (p pairs table) 348 (setf (gethash (car p) table) (cdr p))))) 350 (declaim (ftype (function (keyword) (values string boolean)) 353 (defun get-sql-keyword (kw) (gethash kw *sql-keyword-table*)) 354 (defun get-sql-symbol (kw) (gethash kw *sql-symbol-table*))) 356 (defvar *sql-symbols* (hash-table-values *sql-symbol-table*)) 358 (defvar *sql-symbol-start-chars* (remove-duplicates 360 (declare (simple-string x)) 365 (text "" :type string) 366 (type t :type sql-token-type-designator) 367 (end 0 :type fixnum)) 369 (defun num-start-p (c) (or (digit-char-p c) (char= #\. c) (char= #\- c))) 370 (defun ident-start-p (c) (alpha-char-p c)) 371 (defun ident-part-p (c) (or (alpha-char-p c) (digit-char-p c) (char= #\_ c))) 372 (defun str-start-p (c) (or (char= #\' c) (char= #\" c))) 373 (defun kw-start-p (c) (member c *sql-keyword-start-chars* :test 'char=)) 374 (defun sym-start-p (c) (member c *sql-symbol-start-chars* :test 'char=)) 376 ;; low-level token readers 377 (defmacro def-sql-reader (name (&rest args) &body body) 378 `(defun ,(symbolicate 'read-sql- name) (,@args) 379 (declare (optimize (safety 0))) 382 (defun peek-sql-char (expected stream &optional skip-ws) 383 (char= (peek-char skip-ws stream) expected)) 385 (def-sql-reader char (stream expected &optional skip-ws) 386 (when (peek-sql-char expected stream skip-ws) 387 (read-char stream nil nil))) 389 (def-sql-reader num-token (stream) 392 (with-output-to-string (s) 393 (when (read-sql-char stream #\- nil) 395 (loop for x = (peek-char nil stream nil nil) 397 while (or (digit-char-p x) (char= #\. x)) 398 do (write-char (read-char stream nil nil) s) 401 :end (file-position stream))) 403 (def-sql-reader str-token (stream) 404 (let ((tok (make-sql-token :type :str)) 406 (unless (read-sql-char stream terminator) 407 (setf terminator #\') 408 (unless (read-sql-char stream terminator) 409 (sql-token-error tok))) 410 (setf (sql-token-text tok) 411 (with-output-to-string (s) 412 (loop for x = (peek-char nil stream) ;; must not be EOF before terminator 413 if (not (char= terminator x)) 414 do (write-char (read-char stream) s) 415 else if (char= terminator x) 416 do (return (read-char stream))))) 417 (setf (sql-token-end tok) (file-position stream)) 420 (def-sql-reader sym-token (stream) 421 (let ((tok (make-sql-token :type :sym))) 422 (setf (sql-token-text tok) 423 (with-output-to-string (s) 424 (write-char (read-char stream nil nil) s)) 425 (sql-token-end tok) (file-position stream)) 428 (defun ambiguous-ident-p (tok) 429 (let ((text (sql-token-text tok))) 430 (or (string-equal #.(get-sql-keyword :ORDER) text) 431 (string-equal #.(get-sql-keyword :GROUP) text)))) 433 (defun proc-ambiguous-ident (stream start) 434 (declare (stream stream) (fixnum start)) 436 (read-sequence (make-string 2) stream :start start :end (the fixnum (+ start 2))) 437 #.(get-sql-keyword :BY)) 441 (def-sql-reader ident-token (stream) 442 (let ((tok (make-sql-token))) 443 (if (read-sql-char stream #\`) 444 (setf (sql-token-text tok) 445 (with-output-to-string (s) 446 (loop for x = (peek-char nil stream) ;; must not be EOF before terminator 447 if (not (char= #\` x)) 448 do (write-char (read-char stream) s) 449 else do (return (read-char stream)))) 450 (sql-token-type tok) :ident) 451 ;; may not actually be ident - we check for kw after we have a known end position 452 (setf (sql-token-text tok) 453 (with-output-to-string (s) 454 (loop for x = (peek-char nil stream nil nil) 455 while (and x (ident-part-p x)) 456 do (write-char (read-char stream) s))))) 457 (setf (sql-token-end tok) (file-position stream)) 458 ;; resolve sql-token-type 460 ((ambiguous-ident-p tok) 461 (setf (sql-token-type tok) (proc-ambiguous-ident stream (sql-token-end tok)))) 462 ((and (not (eql (sql-token-type tok) :ident)) (member (sql-token-text tok) *sql-keywords* :test 'string-equal)) 463 (setf (sql-token-type tok) :kw))) 466 (defun next-sql-token (stream) 467 "Parse the next sql token from input STREAM else return nil." 470 (next (peek-char t stream nil nil))) 472 (return-from :next tok)) 474 ((num-start-p next) (read-sql-num-token stream)) 475 ((ident-start-p next) (read-sql-ident-token stream)) 476 ((str-start-p next) (read-sql-str-token stream)) 477 ((sym-start-p next) (read-sql-sym-token stream)) 478 (t (make-sql-token :end (file-position stream))))))) 480 (defun read-sql-stream (stream) 481 (loop for tok = (next-sql-token stream) 485 (defun read-sql-string (sql) 486 "Convert SQL string into a list of tokens. Tokens are of the form 488 (with-input-from-string (sql sql) 489 (read-sql-stream sql))) 493 ;; At this point we have a sequence (list) of tokens 494 (defclass sql-parser (pratt-parser query-parser) 495 ((tokens :type list :initarg :tokens :accessor sql-tokens))) 497 (defmethod next-precedence ((self sql-parser)) 498 (let ((token (car (sql-tokens self)))) 501 (case (sql-token-type token) 502 (:kw (string-case ((sql-token-text token) :default 0) 508 (:sym (string-case ((sql-token-text token) :default 0) 509 (#.(get-sql-symbol :LT) 40) 510 (#.(get-sql-symbol :LT-EQ) 40) 511 (#.(get-sql-symbol :EQ) 40) 512 (#.(get-sql-symbol :BANG-EQ) 40) 513 (#.(get-sql-symbol :GT-EQ) 40) 514 (#.(get-sql-symbol :GT) 40) 515 (#.(get-sql-symbol :PLUS) 50) 516 (#.(get-sql-symbol :SUB) 50) 517 (#.(get-sql-symbol :STAR) 60) 518 (#.(get-sql-symbol :SLASH) 60) 519 (#.(get-sql-symbol :LEFT-PAREN) 70))) 522 (defmethod parse-prefix ((self sql-parser)) 523 (let ((token (pop (sql-tokens self)))) 525 (case (sql-token-type token) 526 (:kw (string-case ((sql-token-text token)) 527 ("SELECT" (parse-select self)) 528 ("CAST" (parse-cast self)) 529 ("MAX" (make-instance 'sql-identifier :id "MAX")) 530 ("INT" (make-instance 'sql-identifier :id "INT")) 531 ("DOUBLE" (make-instance 'sql-identifier :id "DOUBLE")))) 532 (:ident (make-instance 'sql-identifier :id (sql-token-text token))) 533 (:str (make-instance 'sql-string :value (sql-token-text token))) 534 (:num (make-instance 'sql-number :value (parse-number (sql-token-text token)))) 535 ;; unknown identifier 536 (t (make-instance 'sql-identifier :id (sql-token-text token))))))) 538 (defmethod parse-infix ((self sql-parser) (left sql-expression) precedence) 539 (let* ((tokens (sql-tokens self)) 540 (token (pop tokens))) 542 (case (sql-token-type token) 544 ((member (sql-token-text token) (list #.(get-sql-symbol :PLUS) #.(get-sql-symbol :SUB) 545 #.(get-sql-symbol :STAR) #.(get-sql-symbol :SLASH) 546 #.(get-sql-symbol :EQ) #.(get-sql-symbol :GT) 547 #.(get-sql-symbol :LT)) 549 (pop (sql-tokens self)) ;; consume 550 (make-instance 'sql-math-expression 552 :op (sql-token-text token) 553 :rhs (parse self precedence))) 554 ((string-equal "(" (sql-token-text token)) 556 (let ((args (parse-expression-list self))) 557 (assert (string-equal (sql-token-text (pop tokens)) ")")) 558 (make-instance 'sql-function :id (id left) :args args))) 560 (:kw (string-case ((sql-token-text token)) 562 (make-instance 'sql-alias 564 :alias (parse-identifier self))) 566 (make-instance 'sql-binary-expression 569 :rhs (parse self precedence))) 571 (make-instance 'sql-binary-expression 574 :rhs (parse self precedence))) 576 ("DESC" (pop tokens)))))))) 578 (defmethod parse-order ((self sql-parser)) 580 (sort (parse-expression self))) 583 (case (sql-token-type sort) 584 (:ident (setf sort (make-instance 'sql-sort :expr sort :asc t))) 586 (push sort sort-list) 587 (let ((next (car (sql-tokens self)))) 588 (when (and (eql (sql-token-type next) :sym) (string-equal (sql-token-text next) ",")) 589 (pop (sql-tokens self))) 590 (setf sort (parse-expression self)))) 591 finally (return sort-list)))) 593 (defmethod parse-cast ((self sql-parser)) 594 (let ((tokens (sql-tokens self))) 595 (assert (string-equal (sql-token-text (pop tokens)) "(")) 596 (let* ((expr (parse-expression self)) 597 (alias (make-instance 'sql-alias :expr expr))) 598 (assert (string-equal (sql-token-text (pop tokens)) ")")) 599 (make-instance 'sql-cast :expr expr :type (slot-value alias 'alias))))) 601 (defmethod parse-select ((self sql-parser)) 602 (let ((projection (parse-expression-list self)) 603 table filter-expr group-by having-expr order-by 604 (tok (pop (sql-tokens self)))) 605 (case (sql-token-type tok) 606 (:kw (string-case ((sql-token-text tok)) 608 (setf table (parse-expression self)) 610 ;; parse optional WHERE 611 (let ((next (car (sql-tokens self)))) 613 (when (string-equal "WHERE" (sql-token-text next)) 614 (setf filter-expr (parse-expression self))) 616 (string-equal "GROUP" (sql-token-text next)) 617 (string-equal "BY" (sql-token-text (cadr (sql-tokens self))))) 618 (setf group-by (parse-expression-list self))) 619 (when (string-equal "HAVING" (sql-token-text next)) 620 (setf having-expr (parse-expression self))) 621 (when (and (string-equal "ORDER" (sql-token-text next)) 622 (string-equal "BY" (sql-token-text next))) 623 (setf order-by (parse-order self)))))))) 624 (t (illegal-sql-state tok))) 625 (make-instance 'sql-select 626 :projection projection 627 :selection filter-expr 631 :table-name (id table)))) 633 (defmethod parse-expression-list ((self sql-parser)) 634 (log:trace! "> parse-expression-list") 636 (loop for expr = (parse-expression self) 639 if ;; check for comma and repeat, else return 640 (let ((peek (car (sql-tokens self)))) 642 (eql :sym (sql-token-type peek)) 643 (string-equal (sql-token-text peek) #.(get-sql-symbol :comma)))) 644 do (pop (sql-tokens self)) 646 finally (return ret)))) 648 (defmethod parse-expression ((self sql-parser)) 651 (defmethod parse-identifier ((self sql-parser)) 652 (let ((expr (parse-expression self))) 653 (if (typep expr 'sql-identifier) 655 (simple-sql-error "Expected identifier, got ~A" expr)))) 657 (defmacro with-sql-parser ((sym &optional tokens) &body body) 658 `(let ((,sym (make-instance 'sql-parser :tokens ,tokens))) 661 (defmacro with-sql-string ((sym str) &body body) 662 `(with-sql-parser (,sym (read-sql-string ,str)) 665 (defmacro with-sql-stream ((sym stream) &body body) 666 `(with-sql-parser (,sym (read-sql-stream ,stream)) 670 (defclass sql-planner (query-planner) ()) 672 (defun make-sql-logical-expression (expr input) 674 (sql-identifier (make-instance 'column-expression :name (id expr))) 675 (sql-string (literal-value expr)) 676 (sql-number (literal-value expr)) 677 ;; TODO 2024-08-04: sql-unary-expression 678 (sql-binary-expression 679 (let ((l (make-sql-logical-expression (lhs expr) input)) 680 (r (make-sql-logical-expression (rhs expr) input))) 683 (string-case ((binary-expression-op expr)) 685 ("=" (make-instance 'eq-expression :lhs l :rhs r)) 686 ("!=" (make-instance 'neq-expression :lhs l :rhs r)) 687 (">" (make-instance 'gt-expression :lhs l :rhs r)) 688 (">=" (make-instance 'gteq-expression :lhs l :rhs r)) 689 ("<" (make-instance 'lt-expression :lhs l :rhs r)) 690 ("<=" (make-instance 'lteq-expression :lhs l :rhs r)) 692 ("AND" (make-instance 'and-expression :lhs l :rhs r)) 693 ("OR" (make-instance 'or-expression :lhs l :rhs r)) 695 ("+" (make-instance 'add-expression :lhs l :rhs r)) 696 ("-" (make-instance 'sub-expression :lhs l :rhs r)) 697 ("*" (make-instance 'mult-expression :lhs l :rhs r)) 698 ("/" (make-instance 'div-expression :lhs l :rhs r)) 699 ("%" (make-instance 'mod-expression :lhs l :rhs r))))))) 700 (sql-alias (make-instance 'alias-expression 701 :expr (make-sql-logical-expression (slot-value expr 'expr) input) 702 :alias (id (slot-value expr 'alias)))) 703 ;; TODO 2024-08-04: requires cast-expression impl in obj/query 704 ;; (sql-cast (make-instance 'cast)) 707 (string-case ((id expr)) 708 ("MIN" (make-instance 'min-expression 709 :expr (make-sql-logical-expression (car (slot-value expr 'args)) input))) 710 ("MAX" (make-instance 'max-expression 711 :expr (make-sql-logical-expression (car (slot-value expr 'args)) input))) 712 ("SUM" (make-instance 'sum-expression 713 :expr (make-sql-logical-expression (car (slot-value expr 'args)) input))) 714 ("AVG" (make-instance 'avg-expression 715 :expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))))))) 717 (labels ((visit (expr accum) 720 (column-expression (accumulate accum (column-name expr))) 721 (alias-expression (visit (slot-value expr 'expr) accum)) 723 (visit (lhs expr) accum) 724 (visit (rhs expr) accum)) 725 (aggregate-expression (visit (slot-value expr 'expr) accum)))))) 726 (defun get-ref-columns (exprs) 728 (loop for expr across exprs 729 collect (visit expr accum)))) 730 (defun get-selection-ref-columns (select table) 732 (when (slot-value select 'selection) 733 (let ((filter-expr (make-sql-logical-expression (slot-value select 'selection) table))) 734 (visit filter-expr accum) 735 (let ((valid-cols (map 'list (lambda (x) (field-name x)) (fields (schema table))))) 736 (remove-if (lambda (x) (not (member x valid-cols :test 'string-equal))) accum))))))) 738 (defun plan-non-aggregate-query (select df projection-expr column-names-in-selection column-names-in-projection) 740 (unless (slot-value select 'selection) 741 (return-from plan-non-aggregate-query (df-project plan projection-expr))) 742 (let ((missing (member-if-not 743 (lambda (x) (member x column-names-in-projection :test 'string-equal)) 744 column-names-in-selection))) 746 (setq plan (df-filter 748 (make-sql-logical-expression 749 (slot-value select 'selection) 750 (setf plan (df-project plan projection-expr))))) 751 (let ((n (length projection-expr))) 752 (setq plan (df-filter plan 753 (make-sql-logical-expression 754 (slot-value select 'selection) 760 (lambda (x) (make-instance 'column-expression :name x)) 762 (lambda (x y) (declare (ignore y)) x))))))) 767 collect (make-instance 'column-expression 768 :name (field-name (field (schema plan) i)))) 772 (defun plan-aggregate-query (projection-expr select column-names-in-selection df aggregate-expr) 774 (proj-no-agg (remove-if 'aggregate-expression-p projection-expr))) 775 (when (slot-value select 'selection) 776 (let* ((cols-in-proj-no-agg (get-ref-columns proj-no-agg)) 777 (missing (member-if-not 778 (lambda (x) (member x cols-in-proj-no-agg :test 'string-equal)) 779 column-names-in-selection))) 781 (setq plan (df-filter 783 (make-sql-logical-expression 784 (slot-value select 'selection) 785 (setf plan (df-project plan proj-no-agg))))) 786 (setq plan (df-filter 788 (make-sql-logical-expression 789 (slot-value select 'selection) 794 (mapcar (lambda (x) (make-instance 'column-expression :name x)) 796 (lambda (x y) (declare (ignore y)) x)))))))) 798 (map 'vector (lambda (x) (make-sql-logical-expression x plan)) 799 (slot-value select 'group-by)) 802 ;; TODO 2024-08-04: fix deadlock 803 (defun make-sql-data-frame (select tables) 804 (let* ((table (gethash (slot-value select 'table-name) 806 (simple-sql-error "No table named ~A" (slot-value select 'table-name)))) 808 (lambda (x) (make-sql-logical-expression x table)) 809 (slot-value select 'projection))) 810 (cols-in-proj (get-ref-columns proj)) 811 (agg-count (count-if 'aggregate-expression-p proj))) 812 (when (and (zerop agg-count) (not (sequence:emptyp (slot-value select 'group-by)))) 813 (simple-sql-error "GROUP BY without aggregate expression is not supported")) 814 (let ((cols-in-sel (get-selection-ref-columns select table)) 816 (if (zerop agg-count) 817 (plan-non-aggregate-query select plan proj cols-in-sel cols-in-proj) 825 (defclass sql-optimizer (query-optimizer) ()) 828 (defclass sql-engine (query-engine) () 830 :parser (make-instance 'sql-parser))) 833 (defmacro with-sql ((sym input &key (parse t) optimize execute) &body body) 834 `(with-sql-parser (,sym ,@(etypecase input 835 (stream `((read-sql-stream ,input))) 836 (string `((read-sql-string ,input))))) 838 (optimize `((setq ,sym (optimize (parse ,sym))))) 839 (parse `((setq ,sym (parse ,sym))))) 841 `((execute (make-physical-plan ,sym))))