changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/q/sql.lisp

changeset 698: 96958d3eb5b0
parent: 568c39371122
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; sql.lisp --- Structured Query Langs
2 
3 ;; SQL parser and query specification.
4 
5 ;;; Commentary:
6 
7 ;; Parser derived from PARSE/PRATT:PRATT-PARSER
8 
9 ;; ref: https://tdop.github.io/
10 
11 ;;; Code:
12 (in-package :q/sql)
13 
14 (declaim (optimize (speed 3)))
15 
16 ;;; Conditions
17 (define-condition sql-error (error) ())
18 
19 (deferror simple-sql-error (sql-error simple-error) ())
20 
21 (defun simple-sql-error (ctrl &rest args)
22  (error 'simple-sql-error :format-control ctrl :format-arguments args))
23 
24 (define-condition sql-token-error (sql-error)
25  ((token :initarg :token :reader bad-token))
26  (:report (lambda (c s)
27  (format s "Bad Token: ~A" (bad-token c)))))
28 
29 (defun sql-token-error (token)
30  (error 'sql-token-error :token token))
31 
32 (define-condition illegal-sql-state (sql-error)
33  ((state :initform nil :initarg :state :reader illegal-state))
34  (:report (lambda (c s)
35  (format s "Illegal SQL State: ~A" (illegal-state c)))))
36 
37 (defun illegal-sql-state (state)
38  (error 'illegal-sql-state :state state))
39 
40 ;;; Logical Classes
41 (defclass sql-query (query) ())
42 
43 (defclass sql-data-source (data-source) ()
44  (:documentation "Data source which can be used within SQL expressions."))
45 
46 ;; SQL-EXPRESSIONs are the output of a SQL-PARSER. These objects are further
47 ;; lowered to LOGICAL-EXPRESSIONs.
48 (defclass sql-expression () ())
49 
50 (deftype sql-expression-vector () '(vector sql-expression))
51 
52 (defclass sql-identifier (id sql-expression) ())
53 
54 (defclass sql-binary-expression (binary-expression sql-expression) ())
55 
56 (defclass sql-math-expression (sql-binary-expression)
57  ((op :initarg :op :type symbol :accessor binary-expression-op)))
58 
59 (defclass sql-string (sql-expression literal-expression)
60  ((value :type string :initarg :value :accessor literal-value)))
61 
62 (defclass sql-number (sql-expression literal-expression)
63  ((value :type number :initarg :value :accessor literal-value)))
64 
65 (defclass sql-function (id sql-expression)
66  ((args :type sql-expression-vector :initarg :args)))
67 
68 (defclass sql-alias (sql-expression alias-expression) ())
69 
70 (defclass sql-cast (sql-expression)
71  ((expr :type sql-expression :initarg :expr)
72  (type :type sql-identifier :initarg :type)))
73 
74 (defclass sql-sort (sql-expression)
75  ((expr :type sql-expression :initarg :expr)
76  (asc :type boolean :initarg :asc)))
77 
78 (defclass sql-relation (sql-expression) ())
79 
80 (defclass sql-select (sql-relation)
81  ((projection :type sql-expression-vector :initarg :projection)
82  (selection :type (or sql-expression null) :initarg :selection)
83  (group-by :type sql-expression-vector :initarg :group-by)
84  (order-by :type sql-expression-vector :initarg :order-by)
85  (having :type (or null sql-expression) :initarg :having)
86  (table-name :type string :initarg :table-name)))
87 
88 ;;; Lexer
89 (eval-always
90  (defvar *sql-token-types* (list :ident :str :num :kw :op :sym t))
91  (deftype sql-token-type-designator () `(member ,@*sql-token-types*))
92  (defvar *sql-keywords*
93  (list "SCHEMA"
94  "DATABASE"
95  "TABLE"
96  "COLUMN"
97  "VIEW"
98  "INDEX"
99  "TRIGGER"
100  "PROCEDURE"
101  "TABLESPACE"
102  "FUNCTION"
103  "SEQUENCE"
104  "CURSOR"
105  "FROM"
106  "TO"
107  "OF"
108  "IF"
109  "ON"
110  "FOR"
111  "WHILE"
112  "DO"
113  "NO"
114  "BY"
115  "WITH"
116  "WITHOUT"
117  "TRUE"
118  "FALSE"
119  "TEMPORARY"
120  "TEMP"
121  "COMMENT"
122  ;; create
123  "CREATE"
124  "REPLACE"
125  "BEFORE"
126  "AFTER"
127  "INSTEAD"
128  "EACH"
129  "ROW"
130  "STATEMENT"
131  "EXECUTE"
132  "BITMAP"
133  "NOSORT"
134  "REVERSE"
135  "COMPILE"
136  ;; alter
137  "ALTER"
138  "ADD"
139  "MODIFY"
140  "RENAME"
141  "ENABLE"
142  "DISABLE"
143  "VALIDATE"
144  "USER"
145  "IDENTIFIED"
146  ;; truncate
147  "TRUNCATE"
148  ;; drop
149  "DROP"
150  "CASCADE"
151  ;; insert
152  "INSERT"
153  "INTO"
154  "VALUES"
155  ;; update
156  "UPDATE"
157  "SET"
158  ;; delete
159  "DELETE"
160  ;; select
161  "SELECT"
162  "DISTINCT"
163  "AS"
164  "CASE"
165  "WHEN"
166  "ELSE"
167  "THEN"
168  "END"
169  "LEFT"
170  "RIGHT"
171  "FULL"
172  "INNER"
173  "OUTER"
174  "CROSS"
175  "JOIN"
176  "USE"
177  "USING"
178  "NATURAL"
179  "WHERE"
180  "ORDER"
181  "ASC"
182  "DESC"
183  "GROUP"
184  "HAVING"
185  "UNION"
186  ;; others
187  "DECLARE"
188  "GRANT"
189  "FETCH"
190  "REVOKE"
191  "CLOSE"
192  "CAST"
193  "NEW"
194  "ESCAPE"
195  "LOCK"
196  "SOME"
197  "LEAVE"
198  "ITERATE"
199  "REPEAT"
200  "UNTIL"
201  "OPEN"
202  "OUT"
203  "INOUT"
204  "OVER"
205  "ADVISE"
206  "SIBLINGS"
207  "LOOP"
208  "EXPLAIN"
209  "DEFAULT"
210  "EXCEPT"
211  "INTERSECT"
212  "MINUS"
213  "PASSWORD"
214  "LOCAL"
215  "GLOBAL"
216  "STORAGE"
217  "DATA"
218  "COALESCE"
219  ;; Types
220  "CHAR"
221  "CHARACTER"
222  "VARYING"
223  "VARCHAR"
224  "VARCHAR2"
225  "INTEGER"
226  "INT"
227  "SMALLINT"
228  "DECIMAL"
229  "DEC"
230  "NUMERIC"
231  "FLOAT"
232  "REAL"
233  "DOUBLE"
234  "PRECISION"
235  "DATE"
236  "TIME"
237  "INTERVAL"
238  "BOOLEAN"
239  "BLOB"
240  ;; Conditionals
241  "AND"
242  "OR"
243  "XOR"
244  "IS"
245  "NOT"
246  "NULL"
247  "IN"
248  "BETWEEN"
249  "LIKE"
250  "ANY"
251  "ALL"
252  "EXISTS"
253  ;; Functions
254  "AVG"
255  "MAX"
256  "MIN"
257  "SUM"
258  "COUNT"
259  "GREATEST"
260  "LEAST"
261  "ROUND"
262  "TRUNC"
263  "POSITION"
264  "EXTRACT"
265  "LENGTH"
266  "CHAR_LENGTH"
267  "SUBSTRING"
268  "SUBSTR"
269  "INSTR"
270  "INITCAP"
271  "UPPER"
272  "LOWER"
273  "TRIM"
274  "LTRIM"
275  "RTRIM"
276  "BOTH"
277  "LEADING"
278  "TRAILING"
279  "TRANSLATE"
280  "CONVERT"
281  "LPAD"
282  "RPAD"
283  "DECODE"
284  "NVL"
285  ;; Constraints
286  "CONSTRAINT"
287  "UNIQUE"
288  "PRIMARY"
289  "FOREIGN"
290  "KEY"
291  "CHECK"
292  "REFERENCES"))
293 
294  (defvar *sql-keyword-start-chars*
295  (remove-duplicates (mapcar
296  (lambda (k)
297  (declare (simple-string k))
298  (char k 0))
299  *sql-keywords*)))
300 
301  (defvar *sql-keyword-table*
302  (let* ((pairs (mapcar (lambda (x) (cons (keywordicate x) x)) *sql-keywords*))
303  (table (make-hash-table :size (length pairs))))
304  (dolist (p pairs table)
305  (setf (gethash (car p) table) (cdr p)))))
306 
307  (defvar *sql-symbol-table*
308  (let* ((pairs '((:LEFT-PAREN . "(")
309  (:RIGHT-PAREN . ")")
310  (:LEFT-BRACE . "{")
311  (:RIGHT-BRACE . "}")
312  (:LEFT-BRACKET . "[")
313  (:RIGHT-BRACKET . "]")
314  (:SEMI . ";")
315  (:COMMA . ",")
316  (:DOT . ".")
317  (:DOUBLE-DOT . "..")
318  (:PLUS . "+")
319  (:SUB . "-")
320  (:STAR . "*")
321  (:SLASH . "/")
322  (:QUESTION . "?")
323  (:EQ . "=")
324  (:GT . ">")
325  (:LT . "<")
326  (:BANG . "!")
327  (:TILDE . "~")
328  (:CARET . "^")
329  (:PERCENT . "%")
330  (:COLON . ":")
331  (:DOUBLE-COLON . "::")
332  (:COLON-EQ . ":=")
333  (:LT-EQ . "<=")
334  (:GT-EQ . ">=")
335  (:LT-EQ-GT . "<=>")
336  (:LT-GT . "<>")
337  (:BANG-EQ . "!=")
338  (:BANG-GT . "!>")
339  (:BANG-LT . "!<")
340  (:AMP . "&")
341  (:BAR . "|")
342  (:DOUBLE-AMP . "&&")
343  (:DOUBLE-BAR . "||")
344  (:DOUBLE-LT . "<<")
345  (:DOUBLE-GT . ">>")
346  (:AT . "@")
347  (:POUND . "#")))
348  (table (make-hash-table :size (length pairs))))
349  (dolist (p pairs table)
350  (setf (gethash (car p) table) (cdr p)))))
351 
352  (declaim (ftype (function (keyword) (values string boolean))
353  get-sql-keyword
354  get-sql-symbol))
355  (defun get-sql-keyword (kw) (gethash kw *sql-keyword-table*))
356  (defun get-sql-symbol (kw) (gethash kw *sql-symbol-table*)))
357 
358 (defvar *sql-symbols* (hash-table-values *sql-symbol-table*))
359 
360 (defvar *sql-symbol-start-chars* (remove-duplicates
361  (mapcar (lambda (x)
362  (declare (simple-string x))
363  (char x 0))
364  *sql-symbols*)))
365 
366 (defstruct sql-token
367  (text "" :type string)
368  (type t :type sql-token-type-designator)
369  (end 0 :type fixnum))
370 
371 (defun num-start-p (c) (or (digit-char-p c) (char= #\. c) (char= #\- c)))
372 (defun ident-start-p (c) (alpha-char-p c))
373 (defun ident-part-p (c) (or (alpha-char-p c) (digit-char-p c) (char= #\_ c)))
374 (defun str-start-p (c) (or (char= #\' c) (char= #\" c)))
375 (defun kw-start-p (c) (member c *sql-keyword-start-chars* :test 'char=))
376 (defun sym-start-p (c) (member c *sql-symbol-start-chars* :test 'char=))
377 
378 ;; low-level token readers
379 (defmacro def-sql-reader (name (&rest args) &body body)
380  `(defun ,(symbolicate 'read-sql- name) (,@args)
381  (declare (optimize (safety 0)))
382  ,@body))
383 
384 (defun peek-sql-char (expected stream &optional skip-ws)
385  (char= (peek-char skip-ws stream) expected))
386 
387 (def-sql-reader char (stream expected &optional skip-ws)
388  (when (peek-sql-char expected stream skip-ws)
389  (read-char stream nil nil)))
390 
391 (def-sql-reader num-token (stream)
392  (make-sql-token
393  :text
394  (with-output-to-string (s)
395  (when (read-sql-char stream #\- nil)
396  (write-char #\- s))
397  (loop for x = (peek-char nil stream nil nil)
398  while x
399  while (or (digit-char-p x) (char= #\. x))
400  do (write-char (read-char stream nil nil) s)
401  finally (return s)))
402  :type :num
403  :end (file-position stream)))
404 
405 (def-sql-reader str-token (stream)
406  (let ((tok (make-sql-token :type :str))
407  (terminator #\"))
408  (unless (read-sql-char stream terminator)
409  (setf terminator #\')
410  (unless (read-sql-char stream terminator)
411  (sql-token-error tok)))
412  (setf (sql-token-text tok)
413  (with-output-to-string (s)
414  (loop for x = (peek-char nil stream) ;; must not be EOF before terminator
415  if (not (char= terminator x))
416  do (write-char (read-char stream) s)
417  else if (char= terminator x)
418  do (return (read-char stream)))))
419  (setf (sql-token-end tok) (file-position stream))
420  tok))
421 
422 (def-sql-reader sym-token (stream)
423  (let ((tok (make-sql-token :type :sym)))
424  (setf (sql-token-text tok)
425  (with-output-to-string (s)
426  (write-char (read-char stream nil nil) s))
427  (sql-token-end tok) (file-position stream))
428  tok))
429 
430 (defun ambiguous-ident-p (tok)
431  (let ((text (sql-token-text tok)))
432  (or (string-equal #.(get-sql-keyword :ORDER) text)
433  (string-equal #.(get-sql-keyword :GROUP) text))))
434 
435 (defun proc-ambiguous-ident (stream start)
436  (declare (stream stream) (fixnum start))
437  (if (equalp
438  (read-sequence (make-string 2) stream :start start :end (the fixnum (+ start 2)))
439  #.(get-sql-keyword :BY))
440  :kw
441  :ident))
442 
443 (def-sql-reader ident-token (stream)
444  (let ((tok (make-sql-token)))
445  (if (read-sql-char stream #\`)
446  (setf (sql-token-text tok)
447  (with-output-to-string (s)
448  (loop for x = (peek-char nil stream) ;; must not be EOF before terminator
449  if (not (char= #\` x))
450  do (write-char (read-char stream) s)
451  else do (return (read-char stream))))
452  (sql-token-type tok) :ident)
453  ;; may not actually be ident - we check for kw after we have a known end position
454  (setf (sql-token-text tok)
455  (with-output-to-string (s)
456  (loop for x = (peek-char nil stream nil nil)
457  while (and x (ident-part-p x))
458  do (write-char (read-char stream) s)))))
459  (setf (sql-token-end tok) (file-position stream))
460  ;; resolve sql-token-type
461  (cond
462  ((ambiguous-ident-p tok)
463  (setf (sql-token-type tok) (proc-ambiguous-ident stream (sql-token-end tok))))
464  ((and (not (eql (sql-token-type tok) :ident)) (member (sql-token-text tok) *sql-keywords* :test 'string-equal))
465  (setf (sql-token-type tok) :kw)))
466  tok))
467 
468 (defun next-sql-token (stream)
469  "Parse the next sql token from input STREAM else return nil."
470  (block :next
471  (let ((tok)
472  (next (peek-char t stream nil nil)))
473  (unless next
474  (return-from :next tok))
475  (cond
476  ((num-start-p next) (read-sql-num-token stream))
477  ((ident-start-p next) (read-sql-ident-token stream))
478  ((str-start-p next) (read-sql-str-token stream))
479  ((sym-start-p next) (read-sql-sym-token stream))
480  (t (make-sql-token :end (file-position stream)))))))
481 
482 (defun read-sql-stream (stream)
483  (loop for tok = (next-sql-token stream)
484  while tok
485  collect tok))
486 
487 (defun read-sql-string (sql)
488  "Convert SQL string into a list of tokens. Tokens are of the form
489 (SQL-TYPE . VALUE)."
490  (with-input-from-string (sql sql)
491  (read-sql-stream sql)))
492 
493 ;;; Parser
494 
495 ;; At this point we have a sequence (list) of tokens
496 (defclass sql-parser (pratt-parser query-parser)
497  ((tokens :type list :initarg :tokens :accessor sql-tokens)))
498 
499 (defmethod next-precedence ((self sql-parser))
500  (let ((token (car (sql-tokens self))))
501  (if (null token)
502  0
503  (case (sql-token-type token)
504  (:kw (string-case ((sql-token-text token) :default 0)
505  ("AS" 10)
506  ("ASC" 10)
507  ("DESC" 10)
508  ("OR" 20)
509  ("AND" 30)))
510  (:sym (string-case ((sql-token-text token) :default 0)
511  (#.(get-sql-symbol :LT) 40)
512  (#.(get-sql-symbol :LT-EQ) 40)
513  (#.(get-sql-symbol :EQ) 40)
514  (#.(get-sql-symbol :BANG-EQ) 40)
515  (#.(get-sql-symbol :GT-EQ) 40)
516  (#.(get-sql-symbol :GT) 40)
517  (#.(get-sql-symbol :PLUS) 50)
518  (#.(get-sql-symbol :SUB) 50)
519  (#.(get-sql-symbol :STAR) 60)
520  (#.(get-sql-symbol :SLASH) 60)
521  (#.(get-sql-symbol :LEFT-PAREN) 70)))
522  (t 0)))))
523 
524 (defmethod parse-prefix ((self sql-parser))
525  (let ((token (pop (sql-tokens self))))
526  (unless (null token)
527  (case (sql-token-type token)
528  (:kw (string-case ((sql-token-text token))
529  ("SELECT" (parse-select self))
530  ("CAST" (parse-cast self))
531  ("MAX" (make-instance 'sql-identifier :id "MAX"))
532  ("INT" (make-instance 'sql-identifier :id "INT"))
533  ("DOUBLE" (make-instance 'sql-identifier :id "DOUBLE"))))
534  (:ident (make-instance 'sql-identifier :id (sql-token-text token)))
535  (:str (make-instance 'sql-string :value (sql-token-text token)))
536  (:num (make-instance 'sql-number :value (parse-number (sql-token-text token))))
537  ;; unknown identifier
538  (t (make-instance 'sql-identifier :id (sql-token-text token)))))))
539 
540 (defmethod parse-infix ((self sql-parser) (left sql-expression) precedence)
541  (let* ((tokens (sql-tokens self))
542  (token (pop tokens)))
543  (unless (null token)
544  (case (sql-token-type token)
545  (:sym (cond
546  ((member (sql-token-text token) (list #.(get-sql-symbol :PLUS) #.(get-sql-symbol :SUB)
547  #.(get-sql-symbol :STAR) #.(get-sql-symbol :SLASH)
548  #.(get-sql-symbol :EQ) #.(get-sql-symbol :GT)
549  #.(get-sql-symbol :LT))
550  :test 'string=)
551  (pop (sql-tokens self)) ;; consume
552  (make-instance 'sql-math-expression
553  :lhs left
554  :op (sql-token-text token)
555  :rhs (parse self precedence)))
556  ((string-equal "(" (sql-token-text token))
557  (pop tokens)
558  (let ((args (parse-expression-list self)))
559  (assert (string-equal (sql-token-text (pop tokens)) ")"))
560  (make-instance 'sql-function :id (id left) :args args)))
561  (t nil)))
562  (:kw (string-case ((sql-token-text token))
563  ("AS" (pop tokens)
564  (make-instance 'sql-alias
565  :expr left
566  :alias (parse-identifier self)))
567  ("AND" (pop tokens)
568  (make-instance 'sql-binary-expression
569  :lhs left
570  :op "AND"
571  :rhs (parse self precedence)))
572  ("OR" (pop tokens)
573  (make-instance 'sql-binary-expression
574  :lhs left
575  :op "OR"
576  :rhs (parse self precedence)))
577  ("ASC" (pop tokens))
578  ("DESC" (pop tokens))))))))
579 
580 (defmethod parse-order ((self sql-parser))
581  (let ((sort-list)
582  (sort (parse-expression self)))
583  (loop while sort
584  do (progn
585  (case (sql-token-type sort)
586  (:ident (setf sort (make-instance 'sql-sort :expr sort :asc t)))
587  (t nil))
588  (push sort sort-list)
589  (let ((next (car (sql-tokens self))))
590  (when (and (eql (sql-token-type next) :sym) (string-equal (sql-token-text next) ","))
591  (pop (sql-tokens self)))
592  (setf sort (parse-expression self))))
593  finally (return sort-list))))
594 
595 (defmethod parse-cast ((self sql-parser))
596  (let ((tokens (sql-tokens self)))
597  (assert (string-equal (sql-token-text (pop tokens)) "("))
598  (let* ((expr (parse-expression self))
599  (alias (make-instance 'sql-alias :expr expr)))
600  (assert (string-equal (sql-token-text (pop tokens)) ")"))
601  (make-instance 'sql-cast :expr expr :type (slot-value alias 'alias)))))
602 
603 (defmethod parse-select ((self sql-parser))
604  (let ((projection (parse-expression-list self))
605  table filter-expr group-by having-expr order-by
606  (tok (pop (sql-tokens self))))
607  (case (sql-token-type tok)
608  (:kw (string-case ((sql-token-text tok))
609  ("FROM"
610  (setf table (parse-expression self))
611  ;; TODO 2024-06-29:
612  ;; parse optional WHERE
613  (let ((next (car (sql-tokens self))))
614  (when next
615  (when (string-equal "WHERE" (sql-token-text next))
616  (setf filter-expr (parse-expression self)))
617  (when (and
618  (string-equal "GROUP" (sql-token-text next))
619  (string-equal "BY" (sql-token-text (cadr (sql-tokens self)))))
620  (setf group-by (parse-expression-list self)))
621  (when (string-equal "HAVING" (sql-token-text next))
622  (setf having-expr (parse-expression self)))
623  (when (and (string-equal "ORDER" (sql-token-text next))
624  (string-equal "BY" (sql-token-text next)))
625  (setf order-by (parse-order self))))))))
626  (t (illegal-sql-state tok)))
627  (make-instance 'sql-select
628  :projection projection
629  :selection filter-expr
630  :group-by group-by
631  :order-by order-by
632  :having having-expr
633  :table-name (id table))))
634 
635 (defmethod parse-expression-list ((self sql-parser))
636  (log:trace! "> parse-expression-list")
637  (let ((ret))
638  (loop for expr = (parse-expression self)
639  while expr
640  do (push expr ret)
641  if ;; check for comma and repeat, else return
642  (let ((peek (car (sql-tokens self))))
643  (and
644  (eql :sym (sql-token-type peek))
645  (string-equal (sql-token-text peek) #.(get-sql-symbol :comma))))
646  do (pop (sql-tokens self))
647  else return ret
648  finally (return ret))))
649 
650 (defmethod parse-expression ((self sql-parser))
651  (parse self 0))
652 
653 (defmethod parse-identifier ((self sql-parser))
654  (let ((expr (parse-expression self)))
655  (if (typep expr 'sql-identifier)
656  expr
657  (simple-sql-error "Expected identifier, got ~A" expr))))
658 
659 (defmacro with-sql-parser ((sym &optional tokens) &body body)
660  `(let ((,sym (make-instance 'sql-parser :tokens ,tokens)))
661  ,@body))
662 
663 (defmacro with-sql-string ((sym str) &body body)
664  `(with-sql-parser (,sym (read-sql-string ,str))
665  ,@body))
666 
667 (defmacro with-sql-stream ((sym stream) &body body)
668  `(with-sql-parser (,sym (read-sql-stream ,stream))
669  ,@body))
670 
671 ;;; Planner
672 (defun make-sql-logical-expression (expr input)
673  (etypecase expr
674  (sql-identifier (make-instance 'column-expression :name (id expr)))
675  (sql-string (literal-value expr))
676  (sql-number (literal-value expr))
677  ;; TODO 2024-08-04: sql-unary-expression
678  (sql-binary-expression
679  (let ((l (make-sql-logical-expression (lhs expr) input))
680  (r (make-sql-logical-expression (rhs expr) input)))
681  (etypecase expr
682  (sql-math-expression
683  (string-case ((binary-expression-op expr))
684  ;; equiv ops
685  ("=" (make-instance 'eq-expression :lhs l :rhs r))
686  ("!=" (make-instance 'neq-expression :lhs l :rhs r))
687  (">" (make-instance 'gt-expression :lhs l :rhs r))
688  (">=" (make-instance 'gteq-expression :lhs l :rhs r))
689  ("<" (make-instance 'lt-expression :lhs l :rhs r))
690  ("<=" (make-instance 'lteq-expression :lhs l :rhs r))
691  ;; boolean ops
692  ("AND" (make-instance 'and-expression :lhs l :rhs r))
693  ("OR" (make-instance 'or-expression :lhs l :rhs r))
694  ;; math ops
695  ("+" (make-instance 'add-expression :lhs l :rhs r))
696  ("-" (make-instance 'sub-expression :lhs l :rhs r))
697  ("*" (make-instance 'mult-expression :lhs l :rhs r))
698  ("/" (make-instance 'div-expression :lhs l :rhs r))
699  ("%" (make-instance 'mod-expression :lhs l :rhs r)))))))
700  (sql-alias (make-instance 'alias-expression
701  :expr (make-sql-logical-expression (slot-value expr 'expr) input)
702  :alias (id (slot-value expr 'alias))))
703  ;; TODO 2024-08-04: requires cast-expression impl in obj/query
704  ;; (sql-cast (make-instance 'cast))
705  (sql-function
706  (when (id expr)
707  (string-case ((id expr))
708  ("MIN" (make-instance 'min-expression
709  :expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
710  ("MAX" (make-instance 'max-expression
711  :expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
712  ("SUM" (make-instance 'sum-expression
713  :expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
714  ("AVG" (make-instance 'avg-expression
715  :expr (make-sql-logical-expression (car (slot-value expr 'args)) input))))))))
716 
717 (labels ((visit (expr accum)
718  (when expr
719  (typecase expr
720  (column-expression (accumulate accum (column-name expr)))
721  (alias-expression (visit (slot-value expr 'expr) accum))
722  (binary-expression
723  (visit (lhs expr) accum)
724  (visit (rhs expr) accum))
725  (aggregate-expression (visit (slot-value expr 'expr) accum))))))
726  (defun get-ref-columns (exprs)
727  (let ((accum))
728  (loop for expr across exprs
729  collect (visit expr accum))))
730  (defun get-selection-ref-columns (select table)
731  (let ((accum))
732  (when (slot-value select 'selection)
733  (let ((filter-expr (make-sql-logical-expression (slot-value select 'selection) table)))
734  (visit filter-expr accum)
735  (let ((valid-cols (map 'list (lambda (x) (field-name x)) (fields (schema table)))))
736  (remove-if (lambda (x) (not (member x valid-cols :test 'string-equal))) accum)))))))
737 
738 (defun plan-non-aggregate-query (select df projection-expr column-names-in-selection column-names-in-projection)
739  (let ((plan df))
740  (unless (slot-value select 'selection)
741  (return-from plan-non-aggregate-query (df-project plan projection-expr)))
742  (let ((missing (member-if-not
743  (lambda (x) (member x column-names-in-projection :test 'string-equal))
744  column-names-in-selection)))
745  (if (null missing)
746  (setq plan (df-filter
747  plan
748  (make-sql-logical-expression
749  (slot-value select 'selection)
750  (setf plan (df-project plan projection-expr)))))
751  (let ((n (length projection-expr)))
752  (setq plan (df-filter plan
753  (make-sql-logical-expression
754  (slot-value select 'selection)
755  (setf plan
756  (df-project plan
757  (merge 'vector
758  projection-expr
759  (mapcar
760  (lambda (x) (make-instance 'column-expression :name x))
761  missing)
762  (lambda (x y) (declare (ignore y)) x)))))))
763 
764  (df-project plan
765  (coerce
766  (loop for i below n
767  collect (make-instance 'column-expression
768  :name (field-name (field (schema plan) i))))
769  'vector))))
770  plan)))
771 
772 (defun plan-aggregate-query (projection-expr select column-names-in-selection df aggregate-expr)
773  (let ((plan df)
774  (proj-no-agg (remove-if 'aggregate-expression-p projection-expr)))
775  (when (slot-value select 'selection)
776  (let* ((cols-in-proj-no-agg (get-ref-columns proj-no-agg))
777  (missing (member-if-not
778  (lambda (x) (member x cols-in-proj-no-agg :test 'string-equal))
779  column-names-in-selection)))
780  (if (null missing)
781  (setq plan (df-filter
782  plan
783  (make-sql-logical-expression
784  (slot-value select 'selection)
785  (setf plan (df-project plan proj-no-agg)))))
786  (setq plan (df-filter
787  plan
788  (make-sql-logical-expression
789  (slot-value select 'selection)
790  (setf plan
791  (df-project plan
792  (merge 'vector
793  proj-no-agg
794  (mapcar (lambda (x) (make-instance 'column-expression :name x))
795  missing)
796  (lambda (x y) (declare (ignore y)) x))))))))
797  (df-aggregate plan
798  (map 'vector (lambda (x) (make-sql-logical-expression x plan))
799  (slot-value select 'group-by))
800  aggregate-expr)))))
801 
802 (defun make-sql-data-frame (select tables)
803  "Process the given SELECT statement with the provided hash-table of
804 string:data-frame. Returns a data-frame."
805  (let* ((table (or
806  (gethash (slot-value select 'table-name)
807  tables
808  )
809  (simple-sql-error "No table named ~A" (slot-value select 'table-name))))
810  (proj (map 'vector
811  (lambda (x) (make-sql-logical-expression x table))
812  (slot-value select 'projection)))
813  (cols-in-proj (get-ref-columns proj))
814  (agg-count (count-if 'aggregate-expression-p proj)))
815  (when (and (zerop agg-count) (not (sequence:emptyp (slot-value select 'group-by))))
816  (simple-sql-error "GROUP BY without aggregate expression is not supported"))
817  (let ((cols-in-sel (get-selection-ref-columns select table))
818  (plan table))
819  (if (zerop agg-count)
820  (plan-non-aggregate-query select plan proj cols-in-sel cols-in-proj)
821  (let ((pro)
822  (agg)
823  (n-group-cols 0)
824  (group-count 0))
825  (declare (fixnum n-group-cols group-count))
826  (loop for expr across proj
827  do (typecase expr
828  (aggregate-expression
829  (progn
830  (push (+ n-group-cols (length agg)) pro)
831  (push expr agg)))
832  (alias-expression
833  (progn
834  (push (make-instance 'alias-expression
835  :name (+ n-group-cols (length agg))
836  :expr (slot-value expr 'alias))
837  pro)
838  ;; TODO 2024-08-07: does this need to be cast to aggregate-expression?
839  (push (expr expr) agg)))
840  (t (progn
841  (push group-count pro)
842  (incf group-count)))))
843  (let ((plan
844  (df-project
845  (plan-aggregate-query proj select cols-in-sel plan agg)
846  pro)))
847  (if-let ((having (slot-value select 'having)))
848  (df-filter plan (make-sql-logical-expression having plan))
849  plan)))))))
850 
851 (defmethod make-df ((self sql-select) &key tables &allow-other-keys)
852  (when tables
853  (make-sql-data-frame self tables)))
854 
855 ;;; Optimizer
856 (defclass sql-optimizer (query-optimizer) ())
857 
858 ;;; Engine
859 (defclass sql-engine (query-engine) ()
860  (:default-initargs
861  :parser (make-instance 'sql-parser)))
862 
863 ;;; Top-level Macros
864 (defmacro with-sql ((sym input &key (parse t) optimize execute) &body body)
865  `(with-sql-parser (,sym ,@(etypecase input
866  (stream `((read-sql-stream ,input)))
867  (string `((read-sql-string ,input)))))
868  ,@(cond
869  (optimize `((setq ,sym (optimize (parse ,sym)))))
870  (parse `((setq ,sym (parse ,sym)))))
871  ,@(when execute
872  `((execute (make-physical-plan ,sym))))
873  ,@body))