changeset 698: |
96958d3eb5b0 |
parent: |
568c39371122
|
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Fri, 04 Oct 2024 22:04:59 -0400 |
permissions: |
-rw-r--r-- |
description: |
fixes |
1 ;;; sql.lisp --- Structured Query Langs 3 ;; SQL parser and query specification. 7 ;; Parser derived from PARSE/PRATT:PRATT-PARSER 9 ;; ref: https://tdop.github.io/ 14 (declaim (optimize (speed 3))) 17 (define-condition sql-error (error) ()) 19 (deferror simple-sql-error (sql-error simple-error) ()) 21 (defun simple-sql-error (ctrl &rest args) 22 (error 'simple-sql-error :format-control ctrl :format-arguments args)) 24 (define-condition sql-token-error (sql-error) 25 ((token :initarg :token :reader bad-token)) 26 (:report (lambda (c s) 27 (format s "Bad Token: ~A" (bad-token c))))) 29 (defun sql-token-error (token) 30 (error 'sql-token-error :token token)) 32 (define-condition illegal-sql-state (sql-error) 33 ((state :initform nil :initarg :state :reader illegal-state)) 34 (:report (lambda (c s) 35 (format s "Illegal SQL State: ~A" (illegal-state c))))) 37 (defun illegal-sql-state (state) 38 (error 'illegal-sql-state :state state)) 41 (defclass sql-query (query) ()) 43 (defclass sql-data-source (data-source) () 44 (:documentation "Data source which can be used within SQL expressions.")) 46 ;; SQL-EXPRESSIONs are the output of a SQL-PARSER. These objects are further 47 ;; lowered to LOGICAL-EXPRESSIONs. 48 (defclass sql-expression () ()) 50 (deftype sql-expression-vector () '(vector sql-expression)) 52 (defclass sql-identifier (id sql-expression) ()) 54 (defclass sql-binary-expression (binary-expression sql-expression) ()) 56 (defclass sql-math-expression (sql-binary-expression) 57 ((op :initarg :op :type symbol :accessor binary-expression-op))) 59 (defclass sql-string (sql-expression literal-expression) 60 ((value :type string :initarg :value :accessor literal-value))) 62 (defclass sql-number (sql-expression literal-expression) 63 ((value :type number :initarg :value :accessor literal-value))) 65 (defclass sql-function (id sql-expression) 66 ((args :type sql-expression-vector :initarg :args))) 68 (defclass sql-alias (sql-expression alias-expression) ()) 70 (defclass sql-cast (sql-expression) 71 ((expr :type sql-expression :initarg :expr) 72 (type :type sql-identifier :initarg :type))) 74 (defclass sql-sort (sql-expression) 75 ((expr :type sql-expression :initarg :expr) 76 (asc :type boolean :initarg :asc))) 78 (defclass sql-relation (sql-expression) ()) 80 (defclass sql-select (sql-relation) 81 ((projection :type sql-expression-vector :initarg :projection) 82 (selection :type (or sql-expression null) :initarg :selection) 83 (group-by :type sql-expression-vector :initarg :group-by) 84 (order-by :type sql-expression-vector :initarg :order-by) 85 (having :type (or null sql-expression) :initarg :having) 86 (table-name :type string :initarg :table-name))) 90 (defvar *sql-token-types* (list :ident :str :num :kw :op :sym t)) 91 (deftype sql-token-type-designator () `(member ,@*sql-token-types*)) 92 (defvar *sql-keywords* 294 (defvar *sql-keyword-start-chars* 295 (remove-duplicates (mapcar 297 (declare (simple-string k)) 301 (defvar *sql-keyword-table* 302 (let* ((pairs (mapcar (lambda (x) (cons (keywordicate x) x)) *sql-keywords*)) 303 (table (make-hash-table :size (length pairs)))) 304 (dolist (p pairs table) 305 (setf (gethash (car p) table) (cdr p))))) 307 (defvar *sql-symbol-table* 308 (let* ((pairs '((:LEFT-PAREN . "(") 312 (:LEFT-BRACKET . "[") 313 (:RIGHT-BRACKET . "]") 331 (:DOUBLE-COLON . "::") 348 (table (make-hash-table :size (length pairs)))) 349 (dolist (p pairs table) 350 (setf (gethash (car p) table) (cdr p))))) 352 (declaim (ftype (function (keyword) (values string boolean)) 355 (defun get-sql-keyword (kw) (gethash kw *sql-keyword-table*)) 356 (defun get-sql-symbol (kw) (gethash kw *sql-symbol-table*))) 358 (defvar *sql-symbols* (hash-table-values *sql-symbol-table*)) 360 (defvar *sql-symbol-start-chars* (remove-duplicates 362 (declare (simple-string x)) 367 (text "" :type string) 368 (type t :type sql-token-type-designator) 369 (end 0 :type fixnum)) 371 (defun num-start-p (c) (or (digit-char-p c) (char= #\. c) (char= #\- c))) 372 (defun ident-start-p (c) (alpha-char-p c)) 373 (defun ident-part-p (c) (or (alpha-char-p c) (digit-char-p c) (char= #\_ c))) 374 (defun str-start-p (c) (or (char= #\' c) (char= #\" c))) 375 (defun kw-start-p (c) (member c *sql-keyword-start-chars* :test 'char=)) 376 (defun sym-start-p (c) (member c *sql-symbol-start-chars* :test 'char=)) 378 ;; low-level token readers 379 (defmacro def-sql-reader (name (&rest args) &body body) 380 `(defun ,(symbolicate 'read-sql- name) (,@args) 381 (declare (optimize (safety 0))) 384 (defun peek-sql-char (expected stream &optional skip-ws) 385 (char= (peek-char skip-ws stream) expected)) 387 (def-sql-reader char (stream expected &optional skip-ws) 388 (when (peek-sql-char expected stream skip-ws) 389 (read-char stream nil nil))) 391 (def-sql-reader num-token (stream) 394 (with-output-to-string (s) 395 (when (read-sql-char stream #\- nil) 397 (loop for x = (peek-char nil stream nil nil) 399 while (or (digit-char-p x) (char= #\. x)) 400 do (write-char (read-char stream nil nil) s) 403 :end (file-position stream))) 405 (def-sql-reader str-token (stream) 406 (let ((tok (make-sql-token :type :str)) 408 (unless (read-sql-char stream terminator) 409 (setf terminator #\') 410 (unless (read-sql-char stream terminator) 411 (sql-token-error tok))) 412 (setf (sql-token-text tok) 413 (with-output-to-string (s) 414 (loop for x = (peek-char nil stream) ;; must not be EOF before terminator 415 if (not (char= terminator x)) 416 do (write-char (read-char stream) s) 417 else if (char= terminator x) 418 do (return (read-char stream))))) 419 (setf (sql-token-end tok) (file-position stream)) 422 (def-sql-reader sym-token (stream) 423 (let ((tok (make-sql-token :type :sym))) 424 (setf (sql-token-text tok) 425 (with-output-to-string (s) 426 (write-char (read-char stream nil nil) s)) 427 (sql-token-end tok) (file-position stream)) 430 (defun ambiguous-ident-p (tok) 431 (let ((text (sql-token-text tok))) 432 (or (string-equal #.(get-sql-keyword :ORDER) text) 433 (string-equal #.(get-sql-keyword :GROUP) text)))) 435 (defun proc-ambiguous-ident (stream start) 436 (declare (stream stream) (fixnum start)) 438 (read-sequence (make-string 2) stream :start start :end (the fixnum (+ start 2))) 439 #.(get-sql-keyword :BY)) 443 (def-sql-reader ident-token (stream) 444 (let ((tok (make-sql-token))) 445 (if (read-sql-char stream #\`) 446 (setf (sql-token-text tok) 447 (with-output-to-string (s) 448 (loop for x = (peek-char nil stream) ;; must not be EOF before terminator 449 if (not (char= #\` x)) 450 do (write-char (read-char stream) s) 451 else do (return (read-char stream)))) 452 (sql-token-type tok) :ident) 453 ;; may not actually be ident - we check for kw after we have a known end position 454 (setf (sql-token-text tok) 455 (with-output-to-string (s) 456 (loop for x = (peek-char nil stream nil nil) 457 while (and x (ident-part-p x)) 458 do (write-char (read-char stream) s))))) 459 (setf (sql-token-end tok) (file-position stream)) 460 ;; resolve sql-token-type 462 ((ambiguous-ident-p tok) 463 (setf (sql-token-type tok) (proc-ambiguous-ident stream (sql-token-end tok)))) 464 ((and (not (eql (sql-token-type tok) :ident)) (member (sql-token-text tok) *sql-keywords* :test 'string-equal)) 465 (setf (sql-token-type tok) :kw))) 468 (defun next-sql-token (stream) 469 "Parse the next sql token from input STREAM else return nil." 472 (next (peek-char t stream nil nil))) 474 (return-from :next tok)) 476 ((num-start-p next) (read-sql-num-token stream)) 477 ((ident-start-p next) (read-sql-ident-token stream)) 478 ((str-start-p next) (read-sql-str-token stream)) 479 ((sym-start-p next) (read-sql-sym-token stream)) 480 (t (make-sql-token :end (file-position stream))))))) 482 (defun read-sql-stream (stream) 483 (loop for tok = (next-sql-token stream) 487 (defun read-sql-string (sql) 488 "Convert SQL string into a list of tokens. Tokens are of the form 490 (with-input-from-string (sql sql) 491 (read-sql-stream sql))) 495 ;; At this point we have a sequence (list) of tokens 496 (defclass sql-parser (pratt-parser query-parser) 497 ((tokens :type list :initarg :tokens :accessor sql-tokens))) 499 (defmethod next-precedence ((self sql-parser)) 500 (let ((token (car (sql-tokens self)))) 503 (case (sql-token-type token) 504 (:kw (string-case ((sql-token-text token) :default 0) 510 (:sym (string-case ((sql-token-text token) :default 0) 511 (#.(get-sql-symbol :LT) 40) 512 (#.(get-sql-symbol :LT-EQ) 40) 513 (#.(get-sql-symbol :EQ) 40) 514 (#.(get-sql-symbol :BANG-EQ) 40) 515 (#.(get-sql-symbol :GT-EQ) 40) 516 (#.(get-sql-symbol :GT) 40) 517 (#.(get-sql-symbol :PLUS) 50) 518 (#.(get-sql-symbol :SUB) 50) 519 (#.(get-sql-symbol :STAR) 60) 520 (#.(get-sql-symbol :SLASH) 60) 521 (#.(get-sql-symbol :LEFT-PAREN) 70))) 524 (defmethod parse-prefix ((self sql-parser)) 525 (let ((token (pop (sql-tokens self)))) 527 (case (sql-token-type token) 528 (:kw (string-case ((sql-token-text token)) 529 ("SELECT" (parse-select self)) 530 ("CAST" (parse-cast self)) 531 ("MAX" (make-instance 'sql-identifier :id "MAX")) 532 ("INT" (make-instance 'sql-identifier :id "INT")) 533 ("DOUBLE" (make-instance 'sql-identifier :id "DOUBLE")))) 534 (:ident (make-instance 'sql-identifier :id (sql-token-text token))) 535 (:str (make-instance 'sql-string :value (sql-token-text token))) 536 (:num (make-instance 'sql-number :value (parse-number (sql-token-text token)))) 537 ;; unknown identifier 538 (t (make-instance 'sql-identifier :id (sql-token-text token))))))) 540 (defmethod parse-infix ((self sql-parser) (left sql-expression) precedence) 541 (let* ((tokens (sql-tokens self)) 542 (token (pop tokens))) 544 (case (sql-token-type token) 546 ((member (sql-token-text token) (list #.(get-sql-symbol :PLUS) #.(get-sql-symbol :SUB) 547 #.(get-sql-symbol :STAR) #.(get-sql-symbol :SLASH) 548 #.(get-sql-symbol :EQ) #.(get-sql-symbol :GT) 549 #.(get-sql-symbol :LT)) 551 (pop (sql-tokens self)) ;; consume 552 (make-instance 'sql-math-expression 554 :op (sql-token-text token) 555 :rhs (parse self precedence))) 556 ((string-equal "(" (sql-token-text token)) 558 (let ((args (parse-expression-list self))) 559 (assert (string-equal (sql-token-text (pop tokens)) ")")) 560 (make-instance 'sql-function :id (id left) :args args))) 562 (:kw (string-case ((sql-token-text token)) 564 (make-instance 'sql-alias 566 :alias (parse-identifier self))) 568 (make-instance 'sql-binary-expression 571 :rhs (parse self precedence))) 573 (make-instance 'sql-binary-expression 576 :rhs (parse self precedence))) 578 ("DESC" (pop tokens)))))))) 580 (defmethod parse-order ((self sql-parser)) 582 (sort (parse-expression self))) 585 (case (sql-token-type sort) 586 (:ident (setf sort (make-instance 'sql-sort :expr sort :asc t))) 588 (push sort sort-list) 589 (let ((next (car (sql-tokens self)))) 590 (when (and (eql (sql-token-type next) :sym) (string-equal (sql-token-text next) ",")) 591 (pop (sql-tokens self))) 592 (setf sort (parse-expression self)))) 593 finally (return sort-list)))) 595 (defmethod parse-cast ((self sql-parser)) 596 (let ((tokens (sql-tokens self))) 597 (assert (string-equal (sql-token-text (pop tokens)) "(")) 598 (let* ((expr (parse-expression self)) 599 (alias (make-instance 'sql-alias :expr expr))) 600 (assert (string-equal (sql-token-text (pop tokens)) ")")) 601 (make-instance 'sql-cast :expr expr :type (slot-value alias 'alias))))) 603 (defmethod parse-select ((self sql-parser)) 604 (let ((projection (parse-expression-list self)) 605 table filter-expr group-by having-expr order-by 606 (tok (pop (sql-tokens self)))) 607 (case (sql-token-type tok) 608 (:kw (string-case ((sql-token-text tok)) 610 (setf table (parse-expression self)) 612 ;; parse optional WHERE 613 (let ((next (car (sql-tokens self)))) 615 (when (string-equal "WHERE" (sql-token-text next)) 616 (setf filter-expr (parse-expression self))) 618 (string-equal "GROUP" (sql-token-text next)) 619 (string-equal "BY" (sql-token-text (cadr (sql-tokens self))))) 620 (setf group-by (parse-expression-list self))) 621 (when (string-equal "HAVING" (sql-token-text next)) 622 (setf having-expr (parse-expression self))) 623 (when (and (string-equal "ORDER" (sql-token-text next)) 624 (string-equal "BY" (sql-token-text next))) 625 (setf order-by (parse-order self)))))))) 626 (t (illegal-sql-state tok))) 627 (make-instance 'sql-select 628 :projection projection 629 :selection filter-expr 633 :table-name (id table)))) 635 (defmethod parse-expression-list ((self sql-parser)) 636 (log:trace! "> parse-expression-list") 638 (loop for expr = (parse-expression self) 641 if ;; check for comma and repeat, else return 642 (let ((peek (car (sql-tokens self)))) 644 (eql :sym (sql-token-type peek)) 645 (string-equal (sql-token-text peek) #.(get-sql-symbol :comma)))) 646 do (pop (sql-tokens self)) 648 finally (return ret)))) 650 (defmethod parse-expression ((self sql-parser)) 653 (defmethod parse-identifier ((self sql-parser)) 654 (let ((expr (parse-expression self))) 655 (if (typep expr 'sql-identifier) 657 (simple-sql-error "Expected identifier, got ~A" expr)))) 659 (defmacro with-sql-parser ((sym &optional tokens) &body body) 660 `(let ((,sym (make-instance 'sql-parser :tokens ,tokens))) 663 (defmacro with-sql-string ((sym str) &body body) 664 `(with-sql-parser (,sym (read-sql-string ,str)) 667 (defmacro with-sql-stream ((sym stream) &body body) 668 `(with-sql-parser (,sym (read-sql-stream ,stream)) 672 (defun make-sql-logical-expression (expr input) 674 (sql-identifier (make-instance 'column-expression :name (id expr))) 675 (sql-string (literal-value expr)) 676 (sql-number (literal-value expr)) 677 ;; TODO 2024-08-04: sql-unary-expression 678 (sql-binary-expression 679 (let ((l (make-sql-logical-expression (lhs expr) input)) 680 (r (make-sql-logical-expression (rhs expr) input))) 683 (string-case ((binary-expression-op expr)) 685 ("=" (make-instance 'eq-expression :lhs l :rhs r)) 686 ("!=" (make-instance 'neq-expression :lhs l :rhs r)) 687 (">" (make-instance 'gt-expression :lhs l :rhs r)) 688 (">=" (make-instance 'gteq-expression :lhs l :rhs r)) 689 ("<" (make-instance 'lt-expression :lhs l :rhs r)) 690 ("<=" (make-instance 'lteq-expression :lhs l :rhs r)) 692 ("AND" (make-instance 'and-expression :lhs l :rhs r)) 693 ("OR" (make-instance 'or-expression :lhs l :rhs r)) 695 ("+" (make-instance 'add-expression :lhs l :rhs r)) 696 ("-" (make-instance 'sub-expression :lhs l :rhs r)) 697 ("*" (make-instance 'mult-expression :lhs l :rhs r)) 698 ("/" (make-instance 'div-expression :lhs l :rhs r)) 699 ("%" (make-instance 'mod-expression :lhs l :rhs r))))))) 700 (sql-alias (make-instance 'alias-expression 701 :expr (make-sql-logical-expression (slot-value expr 'expr) input) 702 :alias (id (slot-value expr 'alias)))) 703 ;; TODO 2024-08-04: requires cast-expression impl in obj/query 704 ;; (sql-cast (make-instance 'cast)) 707 (string-case ((id expr)) 708 ("MIN" (make-instance 'min-expression 709 :expr (make-sql-logical-expression (car (slot-value expr 'args)) input))) 710 ("MAX" (make-instance 'max-expression 711 :expr (make-sql-logical-expression (car (slot-value expr 'args)) input))) 712 ("SUM" (make-instance 'sum-expression 713 :expr (make-sql-logical-expression (car (slot-value expr 'args)) input))) 714 ("AVG" (make-instance 'avg-expression 715 :expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))))))) 717 (labels ((visit (expr accum) 720 (column-expression (accumulate accum (column-name expr))) 721 (alias-expression (visit (slot-value expr 'expr) accum)) 723 (visit (lhs expr) accum) 724 (visit (rhs expr) accum)) 725 (aggregate-expression (visit (slot-value expr 'expr) accum)))))) 726 (defun get-ref-columns (exprs) 728 (loop for expr across exprs 729 collect (visit expr accum)))) 730 (defun get-selection-ref-columns (select table) 732 (when (slot-value select 'selection) 733 (let ((filter-expr (make-sql-logical-expression (slot-value select 'selection) table))) 734 (visit filter-expr accum) 735 (let ((valid-cols (map 'list (lambda (x) (field-name x)) (fields (schema table))))) 736 (remove-if (lambda (x) (not (member x valid-cols :test 'string-equal))) accum))))))) 738 (defun plan-non-aggregate-query (select df projection-expr column-names-in-selection column-names-in-projection) 740 (unless (slot-value select 'selection) 741 (return-from plan-non-aggregate-query (df-project plan projection-expr))) 742 (let ((missing (member-if-not 743 (lambda (x) (member x column-names-in-projection :test 'string-equal)) 744 column-names-in-selection))) 746 (setq plan (df-filter 748 (make-sql-logical-expression 749 (slot-value select 'selection) 750 (setf plan (df-project plan projection-expr))))) 751 (let ((n (length projection-expr))) 752 (setq plan (df-filter plan 753 (make-sql-logical-expression 754 (slot-value select 'selection) 760 (lambda (x) (make-instance 'column-expression :name x)) 762 (lambda (x y) (declare (ignore y)) x))))))) 767 collect (make-instance 'column-expression 768 :name (field-name (field (schema plan) i)))) 772 (defun plan-aggregate-query (projection-expr select column-names-in-selection df aggregate-expr) 774 (proj-no-agg (remove-if 'aggregate-expression-p projection-expr))) 775 (when (slot-value select 'selection) 776 (let* ((cols-in-proj-no-agg (get-ref-columns proj-no-agg)) 777 (missing (member-if-not 778 (lambda (x) (member x cols-in-proj-no-agg :test 'string-equal)) 779 column-names-in-selection))) 781 (setq plan (df-filter 783 (make-sql-logical-expression 784 (slot-value select 'selection) 785 (setf plan (df-project plan proj-no-agg))))) 786 (setq plan (df-filter 788 (make-sql-logical-expression 789 (slot-value select 'selection) 794 (mapcar (lambda (x) (make-instance 'column-expression :name x)) 796 (lambda (x y) (declare (ignore y)) x)))))))) 798 (map 'vector (lambda (x) (make-sql-logical-expression x plan)) 799 (slot-value select 'group-by)) 802 (defun make-sql-data-frame (select tables) 803 "Process the given SELECT statement with the provided hash-table of 804 string:data-frame. Returns a data-frame." 806 (gethash (slot-value select 'table-name) 809 (simple-sql-error "No table named ~A" (slot-value select 'table-name)))) 811 (lambda (x) (make-sql-logical-expression x table)) 812 (slot-value select 'projection))) 813 (cols-in-proj (get-ref-columns proj)) 814 (agg-count (count-if 'aggregate-expression-p proj))) 815 (when (and (zerop agg-count) (not (sequence:emptyp (slot-value select 'group-by)))) 816 (simple-sql-error "GROUP BY without aggregate expression is not supported")) 817 (let ((cols-in-sel (get-selection-ref-columns select table)) 819 (if (zerop agg-count) 820 (plan-non-aggregate-query select plan proj cols-in-sel cols-in-proj) 825 (declare (fixnum n-group-cols group-count)) 826 (loop for expr across proj 828 (aggregate-expression 830 (push (+ n-group-cols (length agg)) pro) 834 (push (make-instance 'alias-expression 835 :name (+ n-group-cols (length agg)) 836 :expr (slot-value expr 'alias)) 838 ;; TODO 2024-08-07: does this need to be cast to aggregate-expression? 839 (push (expr expr) agg))) 841 (push group-count pro) 842 (incf group-count))))) 845 (plan-aggregate-query proj select cols-in-sel plan agg) 847 (if-let ((having (slot-value select 'having))) 848 (df-filter plan (make-sql-logical-expression having plan)) 851 (defmethod make-df ((self sql-select) &key tables &allow-other-keys) 853 (make-sql-data-frame self tables))) 856 (defclass sql-optimizer (query-optimizer) ()) 859 (defclass sql-engine (query-engine) () 861 :parser (make-instance 'sql-parser))) 864 (defmacro with-sql ((sym input &key (parse t) optimize execute) &body body) 865 `(with-sql-parser (,sym ,@(etypecase input 866 (stream `((read-sql-stream ,input))) 867 (string `((read-sql-string ,input))))) 869 (optimize `((setq ,sym (optimize (parse ,sym))))) 870 (parse `((setq ,sym (parse ,sym))))) 872 `((execute (make-physical-plan ,sym))))