changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/q/sql.lisp

changeset 580: 571685ae64f1
parent: 806c2b214df8
child: d3e2829521a3
author: Richard Westhaver <ellis@rwest.io>
date: Mon, 05 Aug 2024 21:57:13 -0400
permissions: -rw-r--r--
description: queries, cli fixes, dat/csv, emacs org-columns
1 ;;; sql.lisp --- Structured Query Langs
2 
3 ;; SQL parser and query specification.
4 
5 ;;; Commentary:
6 
7 ;; Parser derived from PARSE/PRATT:PRATT-PARSER
8 
9 ;; ref: https://tdop.github.io/
10 
11 ;;; Code:
12 (in-package :q/sql)
13 
14 (declaim (optimize (speed 3)))
15 
16 ;;; Conditions
17 (define-condition sql-error (error) ())
18 
19 (deferror simple-sql-error (sql-error simple-error) ())
20 
21 (defun simple-sql-error (ctrl &rest args)
22  (error 'simple-sql-error :format-control ctrl :format-arguments args))
23 
24 (define-condition sql-token-error (sql-error)
25  ((token :initarg :token :reader bad-token))
26  (:report (lambda (c s)
27  (format s "Bad Token: ~A" (bad-token c)))))
28 
29 (defun sql-token-error (token)
30  (error 'sql-token-error :token token))
31 
32 (define-condition illegal-sql-state (sql-error)
33  ((state :initform nil :initarg :state :reader illegal-state))
34  (:report (lambda (c s)
35  (format s "Illegal SQL State: ~A" (illegal-state c)))))
36 
37 (defun illegal-sql-state (state)
38  (error 'illegal-sql-state :state state))
39 
40 ;;; Logical Classes
41 (defclass sql-query (query) ())
42 
43 (defclass sql-data-source (data-source) ()
44  (:documentation "Data source which can be used within SQL expressions."))
45 
46 (defclass sql-expression () ())
47 
48 (deftype sql-expression-vector () '(vector sql-expression))
49 
50 (defclass sql-identifier (id sql-expression) ())
51 
52 (defclass sql-binary-expression (binary-expression sql-expression) ())
53 
54 (defclass sql-math-expression (sql-binary-expression)
55  ((op :initarg :op :type symbol :accessor binary-expression-op)))
56 
57 (defclass sql-string (sql-expression literal-expression)
58  ((value :type string :initarg :value :accessor literal-value)))
59 
60 (defclass sql-number (sql-expression literal-expression)
61  ((value :type number :initarg :value :accessor literal-value)))
62 
63 (defclass sql-function (id sql-expression)
64  ((args :type sql-expression-vector :initarg :args)))
65 
66 (defclass sql-alias (sql-expression alias-expression) ())
67 
68 (defclass sql-cast (sql-expression)
69  ((expr :type sql-expression :initarg :expr)
70  (type :type sql-identifier :initarg :type)))
71 
72 (defclass sql-sort (sql-expression)
73  ((expr :type sql-expression :initarg :expr)
74  (asc :type boolean :initarg :asc)))
75 
76 (defclass sql-relation (sql-expression) ())
77 
78 (defclass sql-select (sql-relation)
79  ((projection :type sql-expression-vector :initarg :projection)
80  (selection :type (or sql-expression null) :initarg :selection)
81  (group-by :type sql-expression-vector :initarg :group-by)
82  (order-by :type sql-expression-vector :initarg :order-by)
83  (having :type (or null sql-expression) :initarg :having)
84  (table-name :type string :initarg :table-name)))
85 
86 ;;; Lexer
87 (eval-always
88  (defvar *sql-token-types* (list :ident :str :num :kw :op :sym t))
89  (deftype sql-token-type-designator () `(member ,@*sql-token-types*))
90  (defvar *sql-keywords*
91  (list "SCHEMA"
92  "DATABASE"
93  "TABLE"
94  "COLUMN"
95  "VIEW"
96  "INDEX"
97  "TRIGGER"
98  "PROCEDURE"
99  "TABLESPACE"
100  "FUNCTION"
101  "SEQUENCE"
102  "CURSOR"
103  "FROM"
104  "TO"
105  "OF"
106  "IF"
107  "ON"
108  "FOR"
109  "WHILE"
110  "DO"
111  "NO"
112  "BY"
113  "WITH"
114  "WITHOUT"
115  "TRUE"
116  "FALSE"
117  "TEMPORARY"
118  "TEMP"
119  "COMMENT"
120  ;; create
121  "CREATE"
122  "REPLACE"
123  "BEFORE"
124  "AFTER"
125  "INSTEAD"
126  "EACH"
127  "ROW"
128  "STATEMENT"
129  "EXECUTE"
130  "BITMAP"
131  "NOSORT"
132  "REVERSE"
133  "COMPILE"
134  ;; alter
135  "ALTER"
136  "ADD"
137  "MODIFY"
138  "RENAME"
139  "ENABLE"
140  "DISABLE"
141  "VALIDATE"
142  "USER"
143  "IDENTIFIED"
144  ;; truncate
145  "TRUNCATE"
146  ;; drop
147  "DROP"
148  "CASCADE"
149  ;; insert
150  "INSERT"
151  "INTO"
152  "VALUES"
153  ;; update
154  "UPDATE"
155  "SET"
156  ;; delete
157  "DELETE"
158  ;; select
159  "SELECT"
160  "DISTINCT"
161  "AS"
162  "CASE"
163  "WHEN"
164  "ELSE"
165  "THEN"
166  "END"
167  "LEFT"
168  "RIGHT"
169  "FULL"
170  "INNER"
171  "OUTER"
172  "CROSS"
173  "JOIN"
174  "USE"
175  "USING"
176  "NATURAL"
177  "WHERE"
178  "ORDER"
179  "ASC"
180  "DESC"
181  "GROUP"
182  "HAVING"
183  "UNION"
184  ;; others
185  "DECLARE"
186  "GRANT"
187  "FETCH"
188  "REVOKE"
189  "CLOSE"
190  "CAST"
191  "NEW"
192  "ESCAPE"
193  "LOCK"
194  "SOME"
195  "LEAVE"
196  "ITERATE"
197  "REPEAT"
198  "UNTIL"
199  "OPEN"
200  "OUT"
201  "INOUT"
202  "OVER"
203  "ADVISE"
204  "SIBLINGS"
205  "LOOP"
206  "EXPLAIN"
207  "DEFAULT"
208  "EXCEPT"
209  "INTERSECT"
210  "MINUS"
211  "PASSWORD"
212  "LOCAL"
213  "GLOBAL"
214  "STORAGE"
215  "DATA"
216  "COALESCE"
217  ;; Types
218  "CHAR"
219  "CHARACTER"
220  "VARYING"
221  "VARCHAR"
222  "VARCHAR2"
223  "INTEGER"
224  "INT"
225  "SMALLINT"
226  "DECIMAL"
227  "DEC"
228  "NUMERIC"
229  "FLOAT"
230  "REAL"
231  "DOUBLE"
232  "PRECISION"
233  "DATE"
234  "TIME"
235  "INTERVAL"
236  "BOOLEAN"
237  "BLOB"
238  ;; Conditionals
239  "AND"
240  "OR"
241  "XOR"
242  "IS"
243  "NOT"
244  "NULL"
245  "IN"
246  "BETWEEN"
247  "LIKE"
248  "ANY"
249  "ALL"
250  "EXISTS"
251  ;; Functions
252  "AVG"
253  "MAX"
254  "MIN"
255  "SUM"
256  "COUNT"
257  "GREATEST"
258  "LEAST"
259  "ROUND"
260  "TRUNC"
261  "POSITION"
262  "EXTRACT"
263  "LENGTH"
264  "CHAR_LENGTH"
265  "SUBSTRING"
266  "SUBSTR"
267  "INSTR"
268  "INITCAP"
269  "UPPER"
270  "LOWER"
271  "TRIM"
272  "LTRIM"
273  "RTRIM"
274  "BOTH"
275  "LEADING"
276  "TRAILING"
277  "TRANSLATE"
278  "CONVERT"
279  "LPAD"
280  "RPAD"
281  "DECODE"
282  "NVL"
283  ;; Constraints
284  "CONSTRAINT"
285  "UNIQUE"
286  "PRIMARY"
287  "FOREIGN"
288  "KEY"
289  "CHECK"
290  "REFERENCES"))
291 
292  (defvar *sql-keyword-start-chars*
293  (remove-duplicates (mapcar
294  (lambda (k)
295  (declare (simple-string k))
296  (char k 0))
297  *sql-keywords*)))
298 
299  (defvar *sql-keyword-table*
300  (let* ((pairs (mapcar (lambda (x) (cons (keywordicate x) x)) *sql-keywords*))
301  (table (make-hash-table :size (length pairs))))
302  (dolist (p pairs table)
303  (setf (gethash (car p) table) (cdr p)))))
304 
305  (defvar *sql-symbol-table*
306  (let* ((pairs '((:LEFT-PAREN . "(")
307  (:RIGHT-PAREN . ")")
308  (:LEFT-BRACE . "{")
309  (:RIGHT-BRACE . "}")
310  (:LEFT-BRACKET . "[")
311  (:RIGHT-BRACKET . "]")
312  (:SEMI . ";")
313  (:COMMA . ",")
314  (:DOT . ".")
315  (:DOUBLE-DOT . "..")
316  (:PLUS . "+")
317  (:SUB . "-")
318  (:STAR . "*")
319  (:SLASH . "/")
320  (:QUESTION . "?")
321  (:EQ . "=")
322  (:GT . ">")
323  (:LT . "<")
324  (:BANG . "!")
325  (:TILDE . "~")
326  (:CARET . "^")
327  (:PERCENT . "%")
328  (:COLON . ":")
329  (:DOUBLE-COLON . "::")
330  (:COLON-EQ . ":=")
331  (:LT-EQ . "<=")
332  (:GT-EQ . ">=")
333  (:LT-EQ-GT . "<=>")
334  (:LT-GT . "<>")
335  (:BANG-EQ . "!=")
336  (:BANG-GT . "!>")
337  (:BANG-LT . "!<")
338  (:AMP . "&")
339  (:BAR . "|")
340  (:DOUBLE-AMP . "&&")
341  (:DOUBLE-BAR . "||")
342  (:DOUBLE-LT . "<<")
343  (:DOUBLE-GT . ">>")
344  (:AT . "@")
345  (:POUND . "#")))
346  (table (make-hash-table :size (length pairs))))
347  (dolist (p pairs table)
348  (setf (gethash (car p) table) (cdr p)))))
349 
350  (declaim (ftype (function (keyword) (values string boolean))
351  get-sql-keyword
352  get-sql-symbol))
353  (defun get-sql-keyword (kw) (gethash kw *sql-keyword-table*))
354  (defun get-sql-symbol (kw) (gethash kw *sql-symbol-table*)))
355 
356 (defvar *sql-symbols* (hash-table-values *sql-symbol-table*))
357 
358 (defvar *sql-symbol-start-chars* (remove-duplicates
359  (mapcar (lambda (x)
360  (declare (simple-string x))
361  (char x 0))
362  *sql-symbols*)))
363 
364 (defstruct sql-token
365  (text "" :type string)
366  (type t :type sql-token-type-designator)
367  (end 0 :type fixnum))
368 
369 (defun num-start-p (c) (or (digit-char-p c) (char= #\. c) (char= #\- c)))
370 (defun ident-start-p (c) (alpha-char-p c))
371 (defun ident-part-p (c) (or (alpha-char-p c) (digit-char-p c) (char= #\_ c)))
372 (defun str-start-p (c) (or (char= #\' c) (char= #\" c)))
373 (defun kw-start-p (c) (member c *sql-keyword-start-chars* :test 'char=))
374 (defun sym-start-p (c) (member c *sql-symbol-start-chars* :test 'char=))
375 
376 ;; low-level token readers
377 (defmacro def-sql-reader (name (&rest args) &body body)
378  `(defun ,(symbolicate 'read-sql- name) (,@args)
379  (declare (optimize (safety 0)))
380  ,@body))
381 
382 (defun peek-sql-char (expected stream &optional skip-ws)
383  (char= (peek-char skip-ws stream) expected))
384 
385 (def-sql-reader char (stream expected &optional skip-ws)
386  (when (peek-sql-char expected stream skip-ws)
387  (read-char stream nil nil)))
388 
389 (def-sql-reader num-token (stream)
390  (make-sql-token
391  :text
392  (with-output-to-string (s)
393  (when (read-sql-char stream #\- nil)
394  (write-char #\- s))
395  (loop for x = (peek-char nil stream nil nil)
396  while x
397  while (or (digit-char-p x) (char= #\. x))
398  do (write-char (read-char stream nil nil) s)
399  finally (return s)))
400  :type :num
401  :end (file-position stream)))
402 
403 (def-sql-reader str-token (stream)
404  (let ((tok (make-sql-token :type :str))
405  (terminator #\"))
406  (unless (read-sql-char stream terminator)
407  (setf terminator #\')
408  (unless (read-sql-char stream terminator)
409  (sql-token-error tok)))
410  (setf (sql-token-text tok)
411  (with-output-to-string (s)
412  (loop for x = (peek-char nil stream) ;; must not be EOF before terminator
413  if (not (char= terminator x))
414  do (write-char (read-char stream) s)
415  else if (char= terminator x)
416  do (return (read-char stream)))))
417  (setf (sql-token-end tok) (file-position stream))
418  tok))
419 
420 (def-sql-reader sym-token (stream)
421  (let ((tok (make-sql-token :type :sym)))
422  (setf (sql-token-text tok)
423  (with-output-to-string (s)
424  (write-char (read-char stream nil nil) s))
425  (sql-token-end tok) (file-position stream))
426  tok))
427 
428 (defun ambiguous-ident-p (tok)
429  (let ((text (sql-token-text tok)))
430  (or (string-equal #.(get-sql-keyword :ORDER) text)
431  (string-equal #.(get-sql-keyword :GROUP) text))))
432 
433 (defun proc-ambiguous-ident (stream start)
434  (declare (stream stream) (fixnum start))
435  (if (equalp
436  (read-sequence (make-string 2) stream :start start :end (the fixnum (+ start 2)))
437  #.(get-sql-keyword :BY))
438  :kw
439  :ident))
440 
441 (def-sql-reader ident-token (stream)
442  (let ((tok (make-sql-token)))
443  (if (read-sql-char stream #\`)
444  (setf (sql-token-text tok)
445  (with-output-to-string (s)
446  (loop for x = (peek-char nil stream) ;; must not be EOF before terminator
447  if (not (char= #\` x))
448  do (write-char (read-char stream) s)
449  else do (return (read-char stream))))
450  (sql-token-type tok) :ident)
451  ;; may not actually be ident - we check for kw after we have a known end position
452  (setf (sql-token-text tok)
453  (with-output-to-string (s)
454  (loop for x = (peek-char nil stream nil nil)
455  while (and x (ident-part-p x))
456  do (write-char (read-char stream) s)))))
457  (setf (sql-token-end tok) (file-position stream))
458  ;; resolve sql-token-type
459  (cond
460  ((ambiguous-ident-p tok)
461  (setf (sql-token-type tok) (proc-ambiguous-ident stream (sql-token-end tok))))
462  ((and (not (eql (sql-token-type tok) :ident)) (member (sql-token-text tok) *sql-keywords* :test 'string-equal))
463  (setf (sql-token-type tok) :kw)))
464  tok))
465 
466 (defun next-sql-token (stream)
467  "Parse the next sql token from input STREAM else return nil."
468  (block :next
469  (let ((tok)
470  (next (peek-char t stream nil nil)))
471  (unless next
472  (return-from :next tok))
473  (cond
474  ((num-start-p next) (read-sql-num-token stream))
475  ((ident-start-p next) (read-sql-ident-token stream))
476  ((str-start-p next) (read-sql-str-token stream))
477  ((sym-start-p next) (read-sql-sym-token stream))
478  (t (make-sql-token :end (file-position stream)))))))
479 
480 (defun read-sql-stream (stream)
481  (loop for tok = (next-sql-token stream)
482  while tok
483  collect tok))
484 
485 (defun read-sql-string (sql)
486  "Convert SQL string into a list of tokens. Tokens are of the form
487 (SQL-TYPE . VALUE)."
488  (with-input-from-string (sql sql)
489  (read-sql-stream sql)))
490 
491 ;;; Parser
492 
493 ;; At this point we have a sequence (list) of tokens
494 (defclass sql-parser (pratt-parser query-parser)
495  ((tokens :type list :initarg :tokens :accessor sql-tokens)))
496 
497 (defmethod next-precedence ((self sql-parser))
498  (let ((token (car (sql-tokens self))))
499  (if (null token)
500  0
501  (case (sql-token-type token)
502  (:kw (string-case ((sql-token-text token) :default 0)
503  ("AS" 10)
504  ("ASC" 10)
505  ("DESC" 10)
506  ("OR" 20)
507  ("AND" 30)))
508  (:sym (string-case ((sql-token-text token) :default 0)
509  (#.(get-sql-symbol :LT) 40)
510  (#.(get-sql-symbol :LT-EQ) 40)
511  (#.(get-sql-symbol :EQ) 40)
512  (#.(get-sql-symbol :BANG-EQ) 40)
513  (#.(get-sql-symbol :GT-EQ) 40)
514  (#.(get-sql-symbol :GT) 40)
515  (#.(get-sql-symbol :PLUS) 50)
516  (#.(get-sql-symbol :SUB) 50)
517  (#.(get-sql-symbol :STAR) 60)
518  (#.(get-sql-symbol :SLASH) 60)
519  (#.(get-sql-symbol :LEFT-PAREN) 70)))
520  (t 0)))))
521 
522 (defmethod parse-prefix ((self sql-parser))
523  (let ((token (pop (sql-tokens self))))
524  (unless (null token)
525  (case (sql-token-type token)
526  (:kw (string-case ((sql-token-text token))
527  ("SELECT" (parse-select self))
528  ("CAST" (parse-cast self))
529  ("MAX" (make-instance 'sql-identifier :id "MAX"))
530  ("INT" (make-instance 'sql-identifier :id "INT"))
531  ("DOUBLE" (make-instance 'sql-identifier :id "DOUBLE"))))
532  (:ident (make-instance 'sql-identifier :id (sql-token-text token)))
533  (:str (make-instance 'sql-string :value (sql-token-text token)))
534  (:num (make-instance 'sql-number :value (parse-number (sql-token-text token))))
535  ;; unknown identifier
536  (t (make-instance 'sql-identifier :id (sql-token-text token)))))))
537 
538 (defmethod parse-infix ((self sql-parser) (left sql-expression) precedence)
539  (let* ((tokens (sql-tokens self))
540  (token (pop tokens)))
541  (unless (null token)
542  (case (sql-token-type token)
543  (:sym (cond
544  ((member (sql-token-text token) (list #.(get-sql-symbol :PLUS) #.(get-sql-symbol :SUB)
545  #.(get-sql-symbol :STAR) #.(get-sql-symbol :SLASH)
546  #.(get-sql-symbol :EQ) #.(get-sql-symbol :GT)
547  #.(get-sql-symbol :LT))
548  :test 'string=)
549  (pop (sql-tokens self)) ;; consume
550  (make-instance 'sql-math-expression
551  :lhs left
552  :op (sql-token-text token)
553  :rhs (parse self precedence)))
554  ((string-equal "(" (sql-token-text token))
555  (pop tokens)
556  (let ((args (parse-expression-list self)))
557  (assert (string-equal (sql-token-text (pop tokens)) ")"))
558  (make-instance 'sql-function :id (id left) :args args)))
559  (t nil)))
560  (:kw (string-case ((sql-token-text token))
561  ("AS" (pop tokens)
562  (make-instance 'sql-alias
563  :expr left
564  :alias (parse-identifier self)))
565  ("AND" (pop tokens)
566  (make-instance 'sql-binary-expression
567  :lhs left
568  :op "AND"
569  :rhs (parse self precedence)))
570  ("OR" (pop tokens)
571  (make-instance 'sql-binary-expression
572  :lhs left
573  :op "OR"
574  :rhs (parse self precedence)))
575  ("ASC" (pop tokens))
576  ("DESC" (pop tokens))))))))
577 
578 (defmethod parse-order ((self sql-parser))
579  (let ((sort-list)
580  (sort (parse-expression self)))
581  (loop while sort
582  do (progn
583  (case (sql-token-type sort)
584  (:ident (setf sort (make-instance 'sql-sort :expr sort :asc t)))
585  (t nil))
586  (push sort sort-list)
587  (let ((next (car (sql-tokens self))))
588  (when (and (eql (sql-token-type next) :sym) (string-equal (sql-token-text next) ","))
589  (pop (sql-tokens self)))
590  (setf sort (parse-expression self))))
591  finally (return sort-list))))
592 
593 (defmethod parse-cast ((self sql-parser))
594  (let ((tokens (sql-tokens self)))
595  (assert (string-equal (sql-token-text (pop tokens)) "("))
596  (let* ((expr (parse-expression self))
597  (alias (make-instance 'sql-alias :expr expr)))
598  (assert (string-equal (sql-token-text (pop tokens)) ")"))
599  (make-instance 'sql-cast :expr expr :type (slot-value alias 'alias)))))
600 
601 (defmethod parse-select ((self sql-parser))
602  (let ((projection (parse-expression-list self))
603  table filter-expr group-by having-expr order-by
604  (tok (pop (sql-tokens self))))
605  (case (sql-token-type tok)
606  (:kw (string-case ((sql-token-text tok))
607  ("FROM"
608  (setf table (parse-expression self))
609  ;; TODO 2024-06-29:
610  ;; parse optional WHERE
611  (let ((next (car (sql-tokens self))))
612  (when next
613  (when (string-equal "WHERE" (sql-token-text next))
614  (setf filter-expr (parse-expression self)))
615  (when (and
616  (string-equal "GROUP" (sql-token-text next))
617  (string-equal "BY" (sql-token-text (cadr (sql-tokens self)))))
618  (setf group-by (parse-expression-list self)))
619  (when (string-equal "HAVING" (sql-token-text next))
620  (setf having-expr (parse-expression self)))
621  (when (and (string-equal "ORDER" (sql-token-text next))
622  (string-equal "BY" (sql-token-text next)))
623  (setf order-by (parse-order self))))))))
624  (t (illegal-sql-state tok)))
625  (make-instance 'sql-select
626  :projection projection
627  :selection filter-expr
628  :group-by group-by
629  :order-by order-by
630  :having having-expr
631  :table-name (id table))))
632 
633 (defmethod parse-expression-list ((self sql-parser))
634  (log:trace! "> parse-expression-list")
635  (let ((ret))
636  (loop for expr = (parse-expression self)
637  while expr
638  do (push expr ret)
639  if ;; check for comma and repeat, else return
640  (let ((peek (car (sql-tokens self))))
641  (and
642  (eql :sym (sql-token-type peek))
643  (string-equal (sql-token-text peek) #.(get-sql-symbol :comma))))
644  do (pop (sql-tokens self))
645  else return ret
646  finally (return ret))))
647 
648 (defmethod parse-expression ((self sql-parser))
649  (parse self 0))
650 
651 (defmethod parse-identifier ((self sql-parser))
652  (let ((expr (parse-expression self)))
653  (if (typep expr 'sql-identifier)
654  expr
655  (simple-sql-error "Expected identifier, got ~A" expr))))
656 
657 (defmacro with-sql-parser ((sym &optional tokens) &body body)
658  `(let ((,sym (make-instance 'sql-parser :tokens ,tokens)))
659  ,@body))
660 
661 (defmacro with-sql-string ((sym str) &body body)
662  `(with-sql-parser (,sym (read-sql-string ,str))
663  ,@body))
664 
665 (defmacro with-sql-stream ((sym stream) &body body)
666  `(with-sql-parser (,sym (read-sql-stream ,stream))
667  ,@body))
668 
669 ;;; Planner
670 (defclass sql-planner (query-planner) ())
671 
672 (defun make-sql-logical-expression (expr input)
673  (etypecase expr
674  (sql-identifier (make-instance 'column-expression :name (id expr)))
675  (sql-string (literal-value expr))
676  (sql-number (literal-value expr))
677  ;; TODO 2024-08-04: sql-unary-expression
678  (sql-binary-expression
679  (let ((l (make-sql-logical-expression (lhs expr) input))
680  (r (make-sql-logical-expression (rhs expr) input)))
681  (etypecase expr
682  (sql-math-expression
683  (string-case ((binary-expression-op expr))
684  ;; equiv ops
685  ("=" (make-instance 'eq-expression :lhs l :rhs r))
686  ("!=" (make-instance 'neq-expression :lhs l :rhs r))
687  (">" (make-instance 'gt-expression :lhs l :rhs r))
688  (">=" (make-instance 'gteq-expression :lhs l :rhs r))
689  ("<" (make-instance 'lt-expression :lhs l :rhs r))
690  ("<=" (make-instance 'lteq-expression :lhs l :rhs r))
691  ;; boolean ops
692  ("AND" (make-instance 'and-expression :lhs l :rhs r))
693  ("OR" (make-instance 'or-expression :lhs l :rhs r))
694  ;; math ops
695  ("+" (make-instance 'add-expression :lhs l :rhs r))
696  ("-" (make-instance 'sub-expression :lhs l :rhs r))
697  ("*" (make-instance 'mult-expression :lhs l :rhs r))
698  ("/" (make-instance 'div-expression :lhs l :rhs r))
699  ("%" (make-instance 'mod-expression :lhs l :rhs r)))))))
700  (sql-alias (make-instance 'alias-expression
701  :expr (make-sql-logical-expression (slot-value expr 'expr) input)
702  :alias (id (slot-value expr 'alias))))
703  ;; TODO 2024-08-04: requires cast-expression impl in obj/query
704  ;; (sql-cast (make-instance 'cast))
705  (sql-function
706  (when (id expr)
707  (string-case ((id expr))
708  ("MIN" (make-instance 'min-expression
709  :expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
710  ("MAX" (make-instance 'max-expression
711  :expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
712  ("SUM" (make-instance 'sum-expression
713  :expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
714  ("AVG" (make-instance 'avg-expression
715  :expr (make-sql-logical-expression (car (slot-value expr 'args)) input))))))))
716 
717 (labels ((visit (expr accum)
718  (when expr
719  (typecase expr
720  (column-expression (accumulate accum (column-name expr)))
721  (alias-expression (visit (slot-value expr 'expr) accum))
722  (binary-expression
723  (visit (lhs expr) accum)
724  (visit (rhs expr) accum))
725  (aggregate-expression (visit (slot-value expr 'expr) accum))))))
726  (defun get-ref-columns (exprs)
727  (let ((accum))
728  (loop for expr across exprs
729  collect (visit expr accum))))
730  (defun get-selection-ref-columns (select table)
731  (let ((accum))
732  (when (slot-value select 'selection)
733  (let ((filter-expr (make-sql-logical-expression (slot-value select 'selection) table)))
734  (visit filter-expr accum)
735  (let ((valid-cols (map 'list (lambda (x) (field-name x)) (fields (schema table)))))
736  (remove-if (lambda (x) (not (member x valid-cols :test 'string-equal))) accum)))))))
737 
738 (defun plan-non-aggregate-query (select df projection-expr column-names-in-selection column-names-in-projection)
739  (let ((plan df))
740  (unless (slot-value select 'selection)
741  (return-from plan-non-aggregate-query (df-project plan projection-expr)))
742  (let ((missing (member-if-not
743  (lambda (x) (member x column-names-in-projection :test 'string-equal))
744  column-names-in-selection)))
745  (if (null missing)
746  (setq plan (df-filter
747  plan
748  (make-sql-logical-expression
749  (slot-value select 'selection)
750  (setf plan (df-project plan projection-expr)))))
751  (let ((n (length projection-expr)))
752  (setq plan (df-filter plan
753  (make-sql-logical-expression
754  (slot-value select 'selection)
755  (setf plan
756  (df-project plan
757  (merge 'vector
758  projection-expr
759  (mapcar
760  (lambda (x) (make-instance 'column-expression :name x))
761  missing)
762  (lambda (x y) (declare (ignore y)) x)))))))
763 
764  (df-project plan
765  (coerce
766  (loop for i below n
767  collect (make-instance 'column-expression
768  :name (field-name (field (schema plan) i))))
769  'vector))))
770  plan)))
771 
772 (defun plan-aggregate-query (projection-expr select column-names-in-selection df aggregate-expr)
773  (let ((plan df)
774  (proj-no-agg (remove-if 'aggregate-expression-p projection-expr)))
775  (when (slot-value select 'selection)
776  (let* ((cols-in-proj-no-agg (get-ref-columns proj-no-agg))
777  (missing (member-if-not
778  (lambda (x) (member x cols-in-proj-no-agg :test 'string-equal))
779  column-names-in-selection)))
780  (if (null missing)
781  (setq plan (df-filter
782  plan
783  (make-sql-logical-expression
784  (slot-value select 'selection)
785  (setf plan (df-project plan proj-no-agg)))))
786  (setq plan (df-filter
787  plan
788  (make-sql-logical-expression
789  (slot-value select 'selection)
790  (setf plan
791  (df-project plan
792  (merge 'vector
793  proj-no-agg
794  (mapcar (lambda (x) (make-instance 'column-expression :name x))
795  missing)
796  (lambda (x y) (declare (ignore y)) x))))))))
797  (df-aggregate plan
798  (map 'vector (lambda (x) (make-sql-logical-expression x plan))
799  (slot-value select 'group-by))
800  aggregate-expr)))))
801 
802 ;; TODO 2024-08-04: fix deadlock
803 (defun make-sql-data-frame (select tables)
804  (let* ((table (gethash (slot-value select 'table-name)
805  tables
806  (simple-sql-error "No table named ~A" (slot-value select 'table-name))))
807  (proj (map 'vector
808  (lambda (x) (make-sql-logical-expression x table))
809  (slot-value select 'projection)))
810  (cols-in-proj (get-ref-columns proj))
811  (agg-count (count-if 'aggregate-expression-p proj)))
812  (when (and (zerop agg-count) (not (sequence:emptyp (slot-value select 'group-by))))
813  (simple-sql-error "GROUP BY without aggregate expression is not supported"))
814  (let ((cols-in-sel (get-selection-ref-columns select table))
815  (plan table))
816  (if (zerop agg-count)
817  (plan-non-aggregate-query select plan proj cols-in-sel cols-in-proj)
818  (let ((pro)
819  (agg)
820  (n-group-cols 0)
821  (group-count 0))
822  plan)))))
823 
824 ;;; Optimizer
825 (defclass sql-optimizer (query-optimizer) ())
826 
827 ;;; Engine
828 (defclass sql-engine (query-engine) ()
829  (:default-initargs
830  :parser (make-instance 'sql-parser)))
831 
832 ;;; Top-level Macros
833 (defmacro with-sql ((sym input &key (parse t) optimize execute) &body body)
834  `(with-sql-parser (,sym ,@(etypecase input
835  (stream `((read-sql-stream ,input)))
836  (string `((read-sql-string ,input)))))
837  ,@(cond
838  (optimize `((setq ,sym (optimize (parse ,sym)))))
839  (parse `((setq ,sym (parse ,sym)))))
840  ,@(when execute
841  `((execute (make-physical-plan ,sym))))
842  ,@body))