# HG changeset patch # User Richard Westhaver # Date 1716344429 14400 # Node ID 7c1383c08493fd785899c6720df13f97dd30ac1f # Parent aac665e2f5bf67e9699636949d609673a10cf237 port xsubseq, proc-parse. work on http and clap diff -r aac665e2f5bf -r 7c1383c08493 lisp/bin/packy.lisp --- a/lisp/bin/packy.lisp Tue May 21 17:13:34 2024 -0400 +++ b/lisp/bin/packy.lisp Tue May 21 22:20:29 2024 -0400 @@ -14,7 +14,7 @@ (define-cli $cli :name "packy" :version "0.1.0" - :description "user home manager" + :description "Universal Package Manager" :thunk pk-show :opts (make-opts (:name "level" :global t :description "set the log level" :thunk pk-log-level) diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/cli/clap.lisp --- a/lisp/lib/cli/clap.lisp Tue May 21 17:13:34 2024 -0400 +++ b/lisp/lib/cli/clap.lisp Tue May 21 22:20:29 2024 -0400 @@ -53,15 +53,17 @@ (defmacro with-cli (slots cli &body body) "Like with-slots with some extra bindings." - ;; (with-gensyms (cli-body) - ;; (let ((cli-body (mapcar (lambda (x) ()) cli-body) `(progn - (setf (cli-cwd ,cli) (sb-posix:getcwd)) + (setf (cli-cd ,cli) (sb-posix:getcwd)) (with-slots ,slots (parse-args ,cli (cli-args) :compile t) ,@body))) (defvar *default-cli-def* 'defparameter) +(defvar *default-cli-class* 'cli + "The name of the class of the top-level CLI object which will be +generated by the DEFINE-CLI macro.") + (defmacro defcmd (name &body body) `(defun ,name ($args $opts) (declare (ignorable $args $opts)) @@ -87,8 +89,13 @@ (defmacro define-cli (name &body body) "Define a symbol NAME bound to a top-level CLI object." - (declare (type symbol name)) - `(,*default-cli-def* ,name (apply #'make-cli t (walk-cli-slots ',body)))) + (with-gensyms (%name %class) + (if (atom name) + (setq %name name + %class nil) + (setq %name (car name) + %class (cdr name))) + `(,*default-cli-def* ,%name (apply #'make-cli ,%class (walk-cli-slots ',body))))) (defmacro defmain ((&key return (exit t)) &body body) "Define a CLI main function in the current package." @@ -111,12 +118,13 @@ ((eql kind :cli) (apply #'make-instance 'cli slots)) ((eql kind :opt) (apply #'make-cli-opt slots)) ((eql kind :cmd) (apply #'make-instance 'cli-cmd slots)) - (t (apply #'make-instance 'cli slots)))) + (t (apply #'make-instance kind slots)))) ;; RESEARCH 2023-09-12: closed over hash-table with short/long flags ;; to avoid conflicts. if not, need something like a flag-function ;; slot at class allocation. (defmacro make-opts (&body opts) + "Make a vector of CLI-OPTs based on OPTS." `(map 'vector (lambda (x) (etypecase x @@ -125,14 +133,15 @@ (t (make-cli :opt :name (format nil "~(~A~)" x) :global t)))) (walk-cli-slots ',opts))) -(defmacro make-cmds (&body opts) +(defmacro make-cmds (&body cmds) + "Make a vector of CLI-CMDs based on CMDS." `(map 'vector (lambda (x) (etypecase x (string (make-cli :cmd :name x)) (list (apply #'make-cli :cmd x)) (t (make-cli :cmd :name (format nil "~(~A~)" x))))) - (walk-cli-slots ',opts))) + (walk-cli-slots ',cmds))) (defun long-opt-p (str) (declare (simple-string str)) @@ -338,7 +347,8 @@ (lock :initform nil :initarg :lock :accessor cli-lock-p :type boolean) (description :initarg :description :accessor cli-description :type string) (args :initform nil :initarg :args :accessor cli-cmd-args)) - (:documentation "CLI command")) + (:documentation "CLI command class inherited by both the 'main' command which is executed when +a CLI is called without arguments, and all subcommands.")) (defmethod initialize-instance :after ((self cli-cmd) &key) (with-slots (name cmds opts thunk) self @@ -562,7 +572,7 @@ (install-ast self args)))) ;; warning: make sure to fill in the opt and cmd slots with values -;; from the top-level args before doing a command. +;; from the top-level args before calling a command. (defmethod call-cmd ((self cli-cmd) args opts) (trace! args opts) (funcall (cli-thunk self) args opts)) @@ -580,8 +590,8 @@ ;; name slot defaults to *package*, must be string ((name :initarg :name :initform (string-downcase (package-name *package*)) :accessor cli-name :type string) (version :initarg :version :initform "0.1.0" :accessor cli-version :type string) - ;; TODO 2023-10-11: look into pushd popd - wd-stack? - (cwd :initarg :cwd :initform (sb-posix:getcwd) :type string :accessor cli-cwd + ;; TODO 2023-10-11: look into pushd popd - cd-stack? + (cd :initarg :cd :initform (sb-posix:getcwd) :type string :accessor cli-cd :documentation "working directory of the top-level CLI.")) (:documentation "CLI")) @@ -620,7 +630,7 @@ (let ((o (active-opts cli)) (a (cli-cmd-args cli)) (c (active-cmds cli))) - (log:debug! (cli-cwd cli) o a c))) + (log:debug! (cli-cd cli) o a c))) ;;; SIMPLE-CLI @@ -663,6 +673,14 @@ (setf (cdr *posix-argv*) opts)) ,@body)))) +;;; TOPLEVEL + +;; These macros help with defining a toplevel initialization +;; function. Initialization functions are responsible for parsing runtime +;; options and starting a REPL if needed. +;; (defmacro define-toplevel-init (name (props opts) &body body)) +;; (defmacro define-toplevel-repl (name (props opts) &body body)) + (defun default-toplevel-init () (let ((opts (cdr *posix-argv*)) (sysinit)) @@ -687,11 +705,3 @@ (return)))))) (when *posix-argv* (setf (cdr *posix-argv*) opts))))) - -;;; TOPLEVEL - -;; These macros help with defining a toplevel initialization -;; function. Initialization functions are responsible for parsing runtime -;; options and starting a REPL if needed. -;; (defmacro define-toplevel-init (name (props opts) &body body)) -;; (defmacro define-toplevel-repl (name (props opts) &body body)) diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/cli/pkg.lisp --- a/lisp/lib/cli/pkg.lisp Tue May 21 17:13:34 2024 -0400 +++ b/lisp/lib/cli/pkg.lisp Tue May 21 22:20:29 2024 -0400 @@ -163,7 +163,7 @@ :cli-val :cli-cmd-args :cli-cmd - :cli-cwd + :cli-cd :find-cmd :find-opt :find-short-opt diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/io/io.asd --- a/lisp/lib/io/io.asd Tue May 21 17:13:34 2024 -0400 +++ b/lisp/lib/io/io.asd Tue May 21 22:20:29 2024 -0400 @@ -3,7 +3,8 @@ :depends-on (:cl-ppcre :std :obj :uring :sb-bsd-sockets) :version "0.1.0" :serial t - :components ((:file "pkg")) + :components ((:file "pkg") + (:file "xsubseq")) :in-order-to ((test-op (test-op "io/tests")))) (defsystem :io/tests diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/io/xsubseq.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/lib/io/xsubseq.lisp Tue May 21 22:20:29 2024 -0400 @@ -0,0 +1,286 @@ +;;; io/xsubseq.lisp --- Subseq Optimizations + +;; This is ported from Fukamachi's XSUBSEQ + +;;; Code: +(defpackage io/xsubseq + (:use :cl) + (:import-from :sb-cltl2 :variable-information) + (:import-from :std/type :octet-vector) + (:export :xsubseq + :octet-xsubseq + :string-xsubseq + :concatenated-xsubseqs + :null-concatenated-xsubseqs + :octet-concatenated-xsubseqs + :string-concatenated-xsubseqs + :make-concatenated-xsubseqs + :xlength + :xnconc + :xnconcf + :coerce-to-sequence + :coerce-to-string + :with-xsubseqs)) +(in-package :io/xsubseq) + +(defstruct (xsubseq (:constructor make-xsubseq (data start &optional (end (length data)) + &aux (len (- end start))))) + (data nil) + (start 0 :type integer) + (end 0 :type integer) + (len 0 :type integer)) + +(defstruct (octet-xsubseq (:include xsubseq) + (:constructor make-octet-xsubseq (data start &optional (end (length data)) + &aux (len (- end start)))))) + +(defstruct (string-xsubseq (:include xsubseq) + (:constructor make-string-xsubseq (data start &optional (end (length data)) + &aux (len (- end start)))))) + +(defstruct (concatenated-xsubseqs (:constructor %make-concatenated-xsubseqs)) + (len 0 :type integer) + (last nil :type list) + (children nil :type list)) + +(defun make-concatenated-xsubseqs (&rest children) + (if (null children) + (make-null-concatenated-xsubseqs) + (%make-concatenated-xsubseqs :children children + :last (last children) + :len (reduce #'+ + children + :key #'xsubseq-len + :initial-value 0)))) + +(defstruct (null-concatenated-xsubseqs (:include concatenated-xsubseqs))) + +(defstruct (octet-concatenated-xsubseqs (:include concatenated-xsubseqs))) + +(defstruct (string-concatenated-xsubseqs (:include concatenated-xsubseqs))) + +(defun xsubseq (data start &optional (end (length data))) + (typecase data + (octet-vector (make-octet-xsubseq data start end)) + (string (make-string-xsubseq data start end)) + (t (make-xsubseq data start end)))) + +#+(or sbcl openmcl cmu allegro) +(define-compiler-macro xsubseq (&whole form &environment env data start &optional end) + (let ((type (cond + ((constantp data) (type-of data)) + ((and (symbolp data) + (assoc 'type (nth-value 2 (variable-information data env))))) + ((and (listp data) + (eq (car data) 'make-string)) + 'string) + ((and (listp data) + (eq (car data) 'the) + (cadr data))) + ((and (listp data) + (eq (car data) 'make-array) + (null (cadr (member :adjustable data))) + (null (cadr (member :fill-pointer data))) + (cadr (member :element-type data)))))) + (g-data (gensym "DATA"))) + (if (null type) + form + (cond + ((subtypep type 'octet-vector) `(let ((,g-data ,data)) + (make-octet-xsubseq ,g-data ,start ,(or end `(length ,g-data))))) + ((subtypep type 'string) `(let ((,g-data ,data)) + (make-string-xsubseq ,g-data ,start ,(or end `(length ,g-data))))) + (t form))))) + +(defun %xnconc2 (seq1 seq2) + (flet ((seq-values (seq) + (if (concatenated-xsubseqs-p seq) + (values (concatenated-xsubseqs-children seq) + (concatenated-xsubseqs-last seq) + (concatenated-xsubseqs-len seq)) + (let ((children (list seq))) + (values children children + (xsubseq-len seq)))))) + (macrolet ((make-concatenated (type seq1 seq2) + `(multiple-value-bind (children last len) + (seq-values ,seq2) + (,(cond + ((eq type 'octet-vector) 'make-octet-concatenated-xsubseqs) + ((eq type 'string) 'make-string-concatenated-xsubseqs) + (t '%make-concatenated-xsubseqs)) + :len (+ (xsubseq-len ,seq1) len) + :children (cons ,seq1 children) + :last last)))) + (etypecase seq1 + (null-concatenated-xsubseqs seq2) + (concatenated-xsubseqs + (multiple-value-bind (children last len) + (seq-values seq2) + (if (concatenated-xsubseqs-last seq1) + (progn + (rplacd (concatenated-xsubseqs-last seq1) + children) + (setf (concatenated-xsubseqs-last seq1) last) + (incf (concatenated-xsubseqs-len seq1) len)) + ;; empty concatenated-xsubseqs + (progn + (setf (concatenated-xsubseqs-children seq1) children + (concatenated-xsubseqs-len seq1) len + (concatenated-xsubseqs-last seq1) last))) + seq1)) + (octet-xsubseq + (make-concatenated octet-vector seq1 seq2)) + (string-xsubseq + (make-concatenated string seq1 seq2)) + (xsubseq (make-concatenated t seq1 seq2)))))) + +(defun xnconc (subseq &rest more-subseqs) + (reduce #'%xnconc2 more-subseqs :initial-value subseq)) + +(define-modify-macro xnconcf (subseq &rest more-subseqs) xnconc) + +(defun xlength (seq) + (etypecase seq + (xsubseq (xsubseq-len seq)) + (concatenated-xsubseqs (concatenated-xsubseqs-len seq)))) + +(defun coerce-to-sequence (seq) + (etypecase seq + (octet-concatenated-xsubseqs (octet-concatenated-xsubseqs-to-sequence seq)) + (string-concatenated-xsubseqs (string-concatenated-xsubseqs-to-sequence seq)) + (concatenated-xsubseqs (concatenated-xsubseqs-to-sequence seq)) + (xsubseq (xsubseq-to-sequence seq)))) + +#+(or sbcl openmcl cmu allegro) +(define-compiler-macro coerce-to-sequence (&whole form &environment env seq) + (let ((type (cond + ((constantp seq) (type-of seq)) + ((and (symbolp seq) + (assoc 'type (nth-value 2 (variable-information seq env))))) + ((and (listp seq) + (eq (car seq) 'the) + (cadr seq)))))) + (if (null type) + form + (cond + ((subtypep type 'octet-concatenated-xsubseqs) `(octet-concatenated-xsubseqs-to-sequence ,seq)) + ((subtypep type 'string-concatenated-xsubseqs) `(string-concatenated-xsubseqs-to-sequence ,seq)) + ((subtypep type 'concatenated-xsubseqs) `(concatenated-xsubseqs-to-sequence ,seq)) + ((subtypep type 'xsubseq) `(xsubseq-to-sequence ,seq)) + (t form))))) + +(defun coerce-to-string (seq) + (etypecase seq + (null-concatenated-xsubseqs "") + (octet-concatenated-xsubseqs (octet-concatenated-xsubseqs-to-string seq)) + (string-concatenated-xsubseqs (string-concatenated-xsubseqs-to-sequence seq)) + (octet-xsubseq (octet-xsubseq-to-string seq)) + (string-xsubseq (xsubseq-to-sequence seq)))) + +#+(or sbcl openmcl cmu allegro) +(define-compiler-macro coerce-to-string (&whole form &environment env seq) + (let ((type (cond + ((constantp seq) (type-of seq)) + ((and (symbolp seq) + (assoc 'type (nth-value 2 (variable-information seq env))))) + ((and (listp seq) + (eq (car seq) 'the) + (cadr seq)))))) + (if (null type) + form + (cond + ((subtypep type 'octet-concatenated-xsubseqs) `(octet-concatenated-xsubseqs-to-string ,seq)) + ((subtypep type 'string-concatenated-xsubseqs) `(string-concatenated-xsubseqs-to-sequence ,seq)) + ((subtypep type 'octet-xsubseq) `(octet-xsubseq-to-string ,seq)) + ((subtypep type 'string-xsubseq) `(xsubseq-to-sequence ,seq)) + (t form))))) + +(defun xsubseq-to-sequence (seq) + (let ((result (make-array (xsubseq-len seq) + :element-type + (array-element-type (xsubseq-data seq))))) + (replace result (xsubseq-data seq) + :start2 (xsubseq-start seq) + :end2 (xsubseq-end seq)) + result)) + +(defun octet-xsubseq-to-string (seq) + (let ((result (make-array (xsubseq-len seq) + :element-type 'character))) + (declare (type simple-string result)) + (let ((data (xsubseq-data seq)) + (end (xsubseq-end seq))) + (do ((i (xsubseq-start seq) (1+ i)) + (j 0 (1+ j))) + ((= i end) result) + (setf (aref result j) + (code-char + (the (unsigned-byte 8) + (aref (the octet-vector data) i)))))))) + +(defun concatenated-xsubseqs-to-sequence (seq) + (let ((result (make-array (concatenated-xsubseqs-len seq) + :element-type + (array-element-type (xsubseq-data (car (concatenated-xsubseqs-children seq))))))) + (loop with current-pos = 0 + for seq in (concatenated-xsubseqs-children seq) + do (replace result (xsubseq-data seq) + :start1 current-pos + :start2 (xsubseq-start seq) + :end2 (xsubseq-end seq)) + (incf current-pos + (xsubseq-len seq))) + result)) + +(defun octet-concatenated-xsubseqs-to-sequence (seq) + (let ((result (make-array (concatenated-xsubseqs-len seq) + :element-type '(unsigned-byte 8)))) + (declare (type octet-vector result)) + (loop with current-pos of-type integer = 0 + for seq in (concatenated-xsubseqs-children seq) + do (replace result (the octet-vector (xsubseq-data seq)) + :start1 current-pos + :start2 (xsubseq-start seq) + :end2 (xsubseq-end seq)) + (incf current-pos + (xsubseq-len seq))) + result)) + +(defun octet-concatenated-xsubseqs-to-string (seq) + (let ((result (make-array (concatenated-xsubseqs-len seq) + :element-type 'character))) + (declare (type simple-string result)) + (loop with current-pos = 0 + for seq in (concatenated-xsubseqs-children seq) + do (do ((i (xsubseq-start seq) (1+ i)) + (j current-pos (1+ j))) + ((= i (xsubseq-end seq)) + (setf current-pos j)) + (setf (aref result j) + (code-char + (the (unsigned-byte 8) + (aref (the octet-vector (xsubseq-data seq)) i)))))) + result)) + +(defun string-concatenated-xsubseqs-to-sequence (seq) + (let ((result (make-string (concatenated-xsubseqs-len seq)))) + (declare (type simple-string result)) + (loop with current-pos of-type integer = 0 + for seq in (concatenated-xsubseqs-children seq) + do (replace result (the simple-string (xsubseq-data seq)) + :start1 current-pos + :start2 (xsubseq-start seq) + :end2 (xsubseq-end seq)) + (incf current-pos + (xsubseq-len seq))) + result)) + +(defmacro with-xsubseqs ((xsubseqs &key initial-value) &body body) + `(let ((,xsubseqs ,(or initial-value + `(make-null-concatenated-xsubseqs)))) + ,@body + + (typecase ,xsubseqs + (null-concatenated-xsubseqs nil) + (xsubseq (xsubseq-to-sequence ,xsubseqs)) + (t (concatenated-xsubseqs-to-sequence ,xsubseqs))))) diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/net/pkg.lisp --- a/lisp/lib/net/pkg.lisp Tue May 21 17:13:34 2024 -0400 +++ b/lisp/lib/net/pkg.lisp Tue May 21 22:20:29 2024 -0400 @@ -156,7 +156,7 @@ (:export)) (defpackage :net/proto/http - (:use :cl :std :net/core :sb-bsd-sockets) + (:use :cl :std :net/core :sb-bsd-sockets :parse/bytes) (:export)) (uiop:define-package :net/fetch diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/net/proto/http.lisp --- a/lisp/lib/net/proto/http.lisp Tue May 21 17:13:34 2024 -0400 +++ b/lisp/lib/net/proto/http.lisp Tue May 21 22:20:29 2024 -0400 @@ -196,10 +196,9 @@ (when header-value-buffer (let ((header-value (locally (declare (optimize (speed 3) (safety 0))) - (coerce-to-string - (the (or octets-concatenated-xsubseqs - octets-xsubseq) - header-value-buffer))))) + (coerce + header-value-buffer + 'string)))) (if (string= parsing-header-field "set-cookie") (push header-value (gethash "set-cookie" headers)) (multiple-value-bind (previous-value existp) @@ -318,6 +317,257 @@ (when parsing-boundary (return-from find-boundary (subseq data start end))))))) +;;; multipart-parser +(defstruct (ll-multipart-parser (:constructor make-ll-multipart-parser + (&key boundary + &aux (header-parser + (let ((parser (make-http))) + (setf (http-state parser) +state-headers+) + parser))))) + (state 0 :type fixnum) + (header-parser) + boundary + body-mark + body-buffer + boundary-mark + boundary-buffer) + +#.`(eval-when (:compile-toplevel :load-toplevel :execute) + ,@(loop for i from 0 + for state in '(parsing-delimiter-dash-start + parsing-delimiter-dash + parsing-delimiter + parsing-delimiter-end + parsing-delimiter-almost-done + parsing-delimiter-done + header-field-start + body-start + looking-for-delimiter + maybe-delimiter-start + maybe-delimiter-first-dash + maybe-delimiter-second-dash + body-almost-done + body-done) + collect `(defconstant ,(format-symbol t "+~A+" state) ,i))) + +(defun http-multipart-parse (parser callbacks data &key (start 0) end) + (declare (type simple-byte-vector data)) + (let* ((end (or end (length data))) + (boundary (map '(simple-array (unsigned-byte 8) (*)) #'char-code (ll-multipart-parser-boundary parser))) + (boundary-length (length boundary)) + (header-parser (ll-multipart-parser-header-parser parser))) + (declare (type simple-byte-vector boundary)) + (when (= start end) + (return-from http-multipart-parse start)) + + (macrolet ((with-body-cb (callback &body body) + `(handler-case (when-let ((,callback (callbacks-body callbacks))) + ,@body) + (error (e) + (error 'cb-body :error e)))) + (call-body-cb (&optional (end '(ll-multipart-parser-boundary-mark parser))) + (let ((g-end (gensym "END"))) + `(with-body-cb callback + (when (ll-multipart-parser-body-buffer parser) + (funcall callback parser + (ll-multipart-parser-body-buffer parser) + 0 (length (ll-multipart-parser-body-buffer parser))) + (setf (ll-multipart-parser-body-buffer parser) nil)) + (when-let ((,g-end ,end)) + (funcall callback parser data + (ll-multipart-parser-body-mark parser) + ,g-end))))) + (flush-boundary-buffer () + `(with-body-cb callback + (when (ll-multipart-parser-boundary-buffer parser) + (funcall callback parser + (ll-multipart-parser-boundary-buffer parser) + 0 (length (ll-multipart-parser-boundary-buffer parser))) + (setf (ll-multipart-parser-boundary-buffer parser) nil))))) + (let* ((p start) + (byte (aref data p))) + #+fast-http-debug + (log:debug (code-char byte)) + (tagbody + (macrolet ((go-state (tag &optional (advance 1)) + `(progn + ,(case advance + (0 ()) + (1 '(incf p)) + (otherwise `(incf p ,advance))) + (setf (ll-multipart-parser-state parser) ,tag) + #+fast-http-debug + (log:debug ,(princ-to-string tag)) + ,@(and (not (eql advance 0)) + `((when (= p end) + (go exit-loop)) + (setq byte (aref data p)) + #+fast-http-debug + (log:debug (code-char byte)))) + (go ,tag)))) + (tagcasev (ll-multipart-parser-state parser) + (+parsing-delimiter-dash-start+ + (unless (= byte +dash+) + (go-state +header-field-start+ 0)) + (go-state +parsing-delimiter-dash+)) + + (+parsing-delimiter-dash+ + (unless (= byte +dash+) + (error 'invalid-multipart-body)) + (go-state +parsing-delimiter+)) + + (+parsing-delimiter+ + (let ((end2 (+ p boundary-length))) + (cond + ((ll-multipart-parser-boundary-buffer parser) + (when (< (+ end (length (ll-multipart-parser-boundary-buffer parser)) -3) end2) + (setf (ll-multipart-parser-boundary-buffer parser) + (concatenate 'simple-byte-vector + (ll-multipart-parser-boundary-buffer parser) + data)) + (go exit-loop)) + (let ((data2 (make-array boundary-length :element-type '(unsigned-byte 8))) + (boundary-buffer-length (length (ll-multipart-parser-boundary-buffer parser)))) + (replace data2 (ll-multipart-parser-boundary-buffer parser) + :start2 2) + (replace data2 data + :start1 (- boundary-buffer-length 2)) + (unless (search boundary data2) + ;; Still in the body + (when (ll-multipart-parser-body-mark parser) + (call-body-cb nil) + (flush-boundary-buffer) + (go-state +looking-for-delimiter+)) + (error 'invalid-boundary)) + (go-state +parsing-delimiter-end+ (- boundary-length boundary-buffer-length -2)))) + ((< (1- end) end2) + ;; EOF + (setf (ll-multipart-parser-boundary-buffer parser) + (if (ll-multipart-parser-boundary-buffer parser) + (concatenate 'simple-byte-vector + (ll-multipart-parser-boundary-buffer parser) + (subseq data (max 0 (- p 2)))) + (subseq data (max 0 (- p 2))))) + (go exit-loop)) + (T + (unless (search boundary data :start2 p :end2 end2) + ;; Still in the body + (when (ll-multipart-parser-body-mark parser) + (go-state +looking-for-delimiter+)) + (error 'invalid-boundary)) + (go-state +parsing-delimiter-end+ boundary-length))))) + + (+parsing-delimiter-end+ + (casev byte + (+cr+ (go-state +parsing-delimiter-almost-done+)) + (+lf+ (go-state +parsing-delimiter-almost-done+ 0)) + (+dash+ (go-state +body-almost-done+)) + (otherwise + ;; Still in the body + (when (ll-multipart-parser-body-mark parser) + (call-body-cb nil) + (flush-boundary-buffer) + (go-state +looking-for-delimiter+)) + (error 'invalid-boundary)))) + + (+parsing-delimiter-almost-done+ + (unless (= byte +lf+) + (error 'invalid-boundary)) + (when (ll-multipart-parser-body-mark parser) + ;; got a part + (when (ll-multipart-parser-boundary-mark parser) + (call-body-cb)) + (when-let ((callback (callbacks-message-complete callbacks))) + (handler-case (funcall callback parser) + (error (e) + (error 'cb-message-complete :error e))))) + (go-state +parsing-delimiter-done+)) + + (+parsing-delimiter-done+ + (when-let ((callback (callbacks-message-begin callbacks))) + (handler-case (funcall callback parser) + (error (e) + (error 'cb-message-begin :error e)))) + (setf (ll-multipart-parser-body-mark parser) p) + (go-state +header-field-start+ 0)) + + (+header-field-start+ + (let ((next (parse-headers header-parser callbacks data p end))) + (setq p (1- next)) ;; XXX + ;; parsing headers done + (when (= (http-state header-parser) +state-body+) + (when-let ((callback (callbacks-headers-complete callbacks))) + (handler-case (funcall callback parser) + (error (e) + (error 'cb-headers-complete :error e)))) + (setf (http-state header-parser) +state-headers+)) + (go-state +body-start+ 0))) + + (+body-start+ + (setf (ll-multipart-parser-body-mark parser) (1+ p)) + (go-state +looking-for-delimiter+)) + + (+looking-for-delimiter+ + (setf (ll-multipart-parser-boundary-mark parser) nil) + (casev byte + (+cr+ (setf (ll-multipart-parser-boundary-mark parser) p) + (go-state +maybe-delimiter-start+)) + (otherwise (go-state +looking-for-delimiter+)))) + + (+maybe-delimiter-start+ + (unless (= byte +lf+) + (go-state +looking-for-delimiter+ 0)) + (go-state +maybe-delimiter-first-dash+)) + + (+maybe-delimiter-first-dash+ + (if (= byte +dash+) + (go-state +maybe-delimiter-second-dash+) + (if (= byte +cr+) + (progn + (setf (ll-multipart-parser-boundary-mark parser) p) + (go-state +maybe-delimiter-start+)) + (go-state +looking-for-delimiter+)))) + + (+maybe-delimiter-second-dash+ + (if (= byte +dash+) + (go-state +parsing-delimiter+) + (go-state +looking-for-delimiter+))) + + (+body-almost-done+ + (casev byte + (+dash+ (go-state +body-done+ 0)) + (otherwise (error 'invalid-multipart-body)))) + + (+body-done+ + (when (ll-multipart-parser-body-mark parser) + ;; got a part + (setf (ll-multipart-parser-body-buffer parser) nil) + (call-body-cb) + (when-let ((callback (callbacks-message-complete callbacks))) + (handler-case (funcall callback parser) + (error (e) + (error 'cb-message-complete :error e)))) + (setf (ll-multipart-parser-body-mark parser) nil)) + (go exit-loop)))) + exit-loop) + (when (ll-multipart-parser-body-mark parser) + (when (<= +looking-for-delimiter+ + (ll-multipart-parser-state parser) + +maybe-delimiter-second-dash+) + (call-body-cb (or (ll-multipart-parser-boundary-mark parser) p))) + ;; buffer the last part + (when (ll-multipart-parser-boundary-mark parser) + (setf (ll-multipart-parser-body-buffer parser) + (if (ll-multipart-parser-body-buffer parser) + (concatenate 'simple-byte-vector + (ll-multipart-parser-body-buffer parser) + (subseq data (ll-multipart-parser-boundary-mark parser))) + (subseq data (ll-multipart-parser-boundary-mark parser))))) + + (setf (ll-multipart-parser-body-mark parser) 0 + (ll-multipart-parser-boundary-mark parser) nil)) + p)))) + (defun make-multipart-parser (content-type callback) (check-type content-type string) (let ((boundary (find-boundary content-type))) @@ -726,7 +976,7 @@ `(when-let ((,callback (,(format-symbol t "~A-~A" :callbacks name) ,callbacks))) (handler-bind ((error (lambda (,e) - (unless (typep ,e 'fast-http-error) + (unless (typep ,e 'http-error) (error ',(format-symbol t "~A-~A" :cb name) :error ,e) (abort ,e))))) @@ -737,7 +987,7 @@ `(when-let ((,callback (,(format-symbol t "~A-~A" :callbacks name) ,callbacks))) (handler-bind ((error (lambda (,e) - (unless (typep ,e 'fast-http-error) + (unless (typep ,e 'http-error) (error ',(format-symbol t "~A-~A" :cb name) :error ,e) (abort ,e))))) @@ -750,7 +1000,7 @@ (define-condition eof () ()) (define-condition expect-failed (parsing-error) - ((fast-http.error::description :initform "expect failed"))) + ((description :initform "expect failed"))) ;; @@ -1583,254 +1833,3 @@ (funcall (the function header-parameter-value-callback) data parameter-value-mark p)))))) p)) - -;;; multipart-parser -(defstruct (ll-multipart-parser (:constructor make-ll-multipart-parser - (&key boundary - &aux (header-parser - (let ((parser (make-http))) - (setf (http-state parser) +state-headers+) - parser))))) - (state 0 :type fixnum) - (header-parser) - boundary - body-mark - body-buffer - boundary-mark - boundary-buffer) - -#.`(eval-when (:compile-toplevel :load-toplevel :execute) - ,@(loop for i from 0 - for state in '(parsing-delimiter-dash-start - parsing-delimiter-dash - parsing-delimiter - parsing-delimiter-end - parsing-delimiter-almost-done - parsing-delimiter-done - header-field-start - body-start - looking-for-delimiter - maybe-delimiter-start - maybe-delimiter-first-dash - maybe-delimiter-second-dash - body-almost-done - body-done) - collect `(defconstant ,(format-symbol t "+~A+" state) ,i))) - -(defun http-multipart-parse (parser callbacks data &key (start 0) end) - (declare (type simple-byte-vector data)) - (let* ((end (or end (length data))) - (boundary (map '(simple-array (unsigned-byte 8) (*)) #'char-code (ll-multipart-parser-boundary parser))) - (boundary-length (length boundary)) - (header-parser (ll-multipart-parser-header-parser parser))) - (declare (type simple-byte-vector boundary)) - (when (= start end) - (return-from http-multipart-parse start)) - - (macrolet ((with-body-cb (callback &body body) - `(handler-case (when-let ((,callback (callbacks-body callbacks))) - ,@body) - (error (e) - (error 'cb-body :error e)))) - (call-body-cb (&optional (end '(ll-multipart-parser-boundary-mark parser))) - (let ((g-end (gensym "END"))) - `(with-body-cb callback - (when (ll-multipart-parser-body-buffer parser) - (funcall callback parser - (ll-multipart-parser-body-buffer parser) - 0 (length (ll-multipart-parser-body-buffer parser))) - (setf (ll-multipart-parser-body-buffer parser) nil)) - (when-let ((,g-end ,end)) - (funcall callback parser data - (ll-multipart-parser-body-mark parser) - ,g-end))))) - (flush-boundary-buffer () - `(with-body-cb callback - (when (ll-multipart-parser-boundary-buffer parser) - (funcall callback parser - (ll-multipart-parser-boundary-buffer parser) - 0 (length (ll-multipart-parser-boundary-buffer parser))) - (setf (ll-multipart-parser-boundary-buffer parser) nil))))) - (let* ((p start) - (byte (aref data p))) - #+fast-http-debug - (log:debug (code-char byte)) - (tagbody - (macrolet ((go-state (tag &optional (advance 1)) - `(progn - ,(case advance - (0 ()) - (1 '(incf p)) - (otherwise `(incf p ,advance))) - (setf (ll-multipart-parser-state parser) ,tag) - #+fast-http-debug - (log:debug ,(princ-to-string tag)) - ,@(and (not (eql advance 0)) - `((when (= p end) - (go exit-loop)) - (setq byte (aref data p)) - #+fast-http-debug - (log:debug (code-char byte)))) - (go ,tag)))) - (tagcasev (ll-multipart-parser-state parser) - (+parsing-delimiter-dash-start+ - (unless (= byte +dash+) - (go-state +header-field-start+ 0)) - (go-state +parsing-delimiter-dash+)) - - (+parsing-delimiter-dash+ - (unless (= byte +dash+) - (error 'invalid-multipart-body)) - (go-state +parsing-delimiter+)) - - (+parsing-delimiter+ - (let ((end2 (+ p boundary-length))) - (cond - ((ll-multipart-parser-boundary-buffer parser) - (when (< (+ end (length (ll-multipart-parser-boundary-buffer parser)) -3) end2) - (setf (ll-multipart-parser-boundary-buffer parser) - (concatenate 'simple-byte-vector - (ll-multipart-parser-boundary-buffer parser) - data)) - (go exit-loop)) - (let ((data2 (make-array boundary-length :element-type '(unsigned-byte 8))) - (boundary-buffer-length (length (ll-multipart-parser-boundary-buffer parser)))) - (replace data2 (ll-multipart-parser-boundary-buffer parser) - :start2 2) - (replace data2 data - :start1 (- boundary-buffer-length 2)) - (unless (search boundary data2) - ;; Still in the body - (when (ll-multipart-parser-body-mark parser) - (call-body-cb nil) - (flush-boundary-buffer) - (go-state +looking-for-delimiter+)) - (error 'invalid-boundary)) - (go-state +parsing-delimiter-end+ (- boundary-length boundary-buffer-length -2)))) - ((< (1- end) end2) - ;; EOF - (setf (ll-multipart-parser-boundary-buffer parser) - (if (ll-multipart-parser-boundary-buffer parser) - (concatenate 'simple-byte-vector - (ll-multipart-parser-boundary-buffer parser) - (subseq data (max 0 (- p 2)))) - (subseq data (max 0 (- p 2))))) - (go exit-loop)) - (T - (unless (search boundary data :start2 p :end2 end2) - ;; Still in the body - (when (ll-multipart-parser-body-mark parser) - (go-state +looking-for-delimiter+)) - (error 'invalid-boundary)) - (go-state +parsing-delimiter-end+ boundary-length))))) - - (+parsing-delimiter-end+ - (casev byte - (+cr+ (go-state +parsing-delimiter-almost-done+)) - (+lf+ (go-state +parsing-delimiter-almost-done+ 0)) - (+dash+ (go-state +body-almost-done+)) - (otherwise - ;; Still in the body - (when (ll-multipart-parser-body-mark parser) - (call-body-cb nil) - (flush-boundary-buffer) - (go-state +looking-for-delimiter+)) - (error 'invalid-boundary)))) - - (+parsing-delimiter-almost-done+ - (unless (= byte +lf+) - (error 'invalid-boundary)) - (when (ll-multipart-parser-body-mark parser) - ;; got a part - (when (ll-multipart-parser-boundary-mark parser) - (call-body-cb)) - (when-let ((callback (callbacks-message-complete callbacks))) - (handler-case (funcall callback parser) - (error (e) - (error 'cb-message-complete :error e))))) - (go-state +parsing-delimiter-done+)) - - (+parsing-delimiter-done+ - (when-let ((callback (callbacks-message-begin callbacks))) - (handler-case (funcall callback parser) - (error (e) - (error 'cb-message-begin :error e)))) - (setf (ll-multipart-parser-body-mark parser) p) - (go-state +header-field-start+ 0)) - - (+header-field-start+ - (let ((next (parse-headers header-parser callbacks data p end))) - (setq p (1- next)) ;; XXX - ;; parsing headers done - (when (= (http-state header-parser) +state-body+) - (when-let ((callback (callbacks-headers-complete callbacks))) - (handler-case (funcall callback parser) - (error (e) - (error 'cb-headers-complete :error e)))) - (setf (http-state header-parser) +state-headers+)) - (go-state +body-start+ 0))) - - (+body-start+ - (setf (ll-multipart-parser-body-mark parser) (1+ p)) - (go-state +looking-for-delimiter+)) - - (+looking-for-delimiter+ - (setf (ll-multipart-parser-boundary-mark parser) nil) - (casev byte - (+cr+ (setf (ll-multipart-parser-boundary-mark parser) p) - (go-state +maybe-delimiter-start+)) - (otherwise (go-state +looking-for-delimiter+)))) - - (+maybe-delimiter-start+ - (unless (= byte +lf+) - (go-state +looking-for-delimiter+ 0)) - (go-state +maybe-delimiter-first-dash+)) - - (+maybe-delimiter-first-dash+ - (if (= byte +dash+) - (go-state +maybe-delimiter-second-dash+) - (if (= byte +cr+) - (progn - (setf (ll-multipart-parser-boundary-mark parser) p) - (go-state +maybe-delimiter-start+)) - (go-state +looking-for-delimiter+)))) - - (+maybe-delimiter-second-dash+ - (if (= byte +dash+) - (go-state +parsing-delimiter+) - (go-state +looking-for-delimiter+))) - - (+body-almost-done+ - (casev byte - (+dash+ (go-state +body-done+ 0)) - (otherwise (error 'invalid-multipart-body)))) - - (+body-done+ - (when (ll-multipart-parser-body-mark parser) - ;; got a part - (setf (ll-multipart-parser-body-buffer parser) nil) - (call-body-cb) - (when-let ((callback (callbacks-message-complete callbacks))) - (handler-case (funcall callback parser) - (error (e) - (error 'cb-message-complete :error e)))) - (setf (ll-multipart-parser-body-mark parser) nil)) - (go exit-loop)))) - exit-loop) - (when (ll-multipart-parser-body-mark parser) - (when (<= +looking-for-delimiter+ - (ll-multipart-parser-state parser) - +maybe-delimiter-second-dash+) - (call-body-cb (or (ll-multipart-parser-boundary-mark parser) p))) - ;; buffer the last part - (when (ll-multipart-parser-boundary-mark parser) - (setf (ll-multipart-parser-body-buffer parser) - (if (ll-multipart-parser-body-buffer parser) - (concatenate 'simple-byte-vector - (ll-multipart-parser-body-buffer parser) - (subseq data (ll-multipart-parser-boundary-mark parser))) - (subseq data (ll-multipart-parser-boundary-mark parser))))) - - (setf (ll-multipart-parser-body-mark parser) 0 - (ll-multipart-parser-boundary-mark parser) nil)) - p)))) diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/net/util.lisp --- a/lisp/lib/net/util.lisp Tue May 21 17:13:34 2024 -0400 +++ b/lisp/lib/net/util.lisp Tue May 21 22:20:29 2024 -0400 @@ -4,6 +4,7 @@ ;; from usocket (defun get-address-by-name (name) + "Return the address of a host by NAME." (multiple-value-bind (host4 host6) (get-host-by-name name) (let ((addr4 (when host4 @@ -14,7 +15,7 @@ ;; from https://github.com/eudoxia0/find-port (defun port-open-p (port &key (host *localhost*)) - "Determine if the port is open." + "Determine if a PORT is open on the given HOST." (handler-case (let ((socket (make-instance 'inet-socket :type :stream))) (setf (sockopt-reuse-address socket) t) diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/parse/bytes.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/lib/parse/bytes.lisp Tue May 21 22:20:29 2024 -0400 @@ -0,0 +1,482 @@ +;;; parse/bytes.lisp --- Procedural Parser + +;; swiped from Fukamachi's proc-parser.lisp. Will re-implement at a later +;; date. + +;;; License: +;; Copyright 2015 Eitaro Fukamachi + +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: + +;; 1. Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. + +;; 2. Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. + +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;; POSSIBILITY OF SUCH DAMAGE. + +;;; Code: +(in-package :parse/bytes) + +(define-condition match-failed (error) + ((elem :initarg :elem + :initform nil) + (expected :initarg :expected + :initform nil)) + (:report (lambda (condition stream) + (with-slots (elem expected) condition + (format stream + "Match failed~:[~;~:*: ~S~]~:[~;~:* (expected: ~{~S~^, ~})~]" + (ensure-char-elem elem) expected))))) + +(defun convert-case-conditions (var chars) + (cond + ((consp chars) + `(or ,@(loop for ch in chars + if (characterp ch) + collect `(char= ,var ,ch) + else + collect `(= ,var ,ch)))) + ((eq chars 'otherwise) + t) + (t (if (characterp chars) + `(char= ,var ,chars) + `(= ,var ,chars))))) + +(defun typed-case-tagbodies (var &rest cases) + (cond + ((null cases) nil) + ((= 1 (length cases)) + `((when ,(convert-case-conditions var (car (first cases))) + ,@(cdr (first cases))))) + ((and (= 2 (length cases)) + (eq (car (second cases)) 'otherwise)) + `((unless ,(convert-case-conditions var (car (first cases))) + ,@(cdr (second cases))) + ,@(cdr (first cases)))) + (t + (let ((tags (make-array (length cases) :initial-contents (loop repeat (length cases) + collect (gensym)))) + (end (gensym "END"))) + `(,@(loop for (chars . body) in cases + for i from 0 + collect `(when ,(convert-case-conditions var chars) + (go ,(aref tags i)))) + ,@(loop for case in cases + for i from 0 + append `(,(aref tags i) + ,@(cdr case) + (go ,end))) + ,end))))) + +(defmacro vector-case (elem-var vec-and-options &body cases) + (destructuring-bind (vec &key case-insensitive) + (ensure-cons vec-and-options) + (with-gensyms (otherwise end-tag vector-case-block) + (labels ((case-candidates (el) + (cond + ((not case-insensitive) el) + ((characterp el) + (cond + ((char<= #\a el #\z) + `(,el + ,(code-char + (- (char-code el) + #.(- (char-code #\a) (char-code #\A)))))) + ((char<= #\A el #\Z) + `(,el + ,(code-char + (+ (char-code el) + #.(- (char-code #\a) (char-code #\A)))))) + (t el))) + ((typep el '(unsigned-byte 8)) + (cond + ((<= #.(char-code #\a) el #.(char-code #\z)) + `(,el + ,(- el #.(- (char-code #\a) (char-code #\A))))) + ((<= #.(char-code #\A) el #.(char-code #\Z)) + `(,el + ,(+ el #.(- (char-code #\a) (char-code #\A))))) + (t el))) + (t el))) + (build-case (i cases vec) + (when cases + (let ((map (make-hash-table))) + (map nil + (lambda (case) + (unless (vectorp (car case)) + (error "The first element of cases must be a constant vector")) + (unless (<= (length (car case)) i) + (push case (gethash (aref (car case) i) map)))) + cases) + (let (res-cases) + (maphash (lambda (el cases) + (let ((next-case (build-case (1+ i) cases vec))) + (cond + (next-case + (push + `(,(case-candidates el) + (unless (advance*) + ,(if (= (length (caar cases)) (1+ i)) + `(progn ,@(cdr (car cases)) + (go ,end-tag)) + `(go :eof))) + ,@(apply #'typed-case-tagbodies elem-var + (append + next-case + `((otherwise (go ,otherwise)))))) + res-cases)) + (t + (push `(,(case-candidates el) + (advance*) + (return-from ,vector-case-block + (progn ,@(cdr (car cases))))) + res-cases))))) + map) + res-cases))))) + (let ((otherwise-case nil)) + (when (eq (caar (last cases)) 'otherwise) + (setq otherwise-case (car (last cases)) + cases (butlast cases))) + `(block ,vector-case-block + (tagbody + ,@(apply #'typed-case-tagbodies elem-var + (append + (build-case 0 cases vec) + `((otherwise (go ,otherwise))))) + (go ,end-tag) + ,otherwise + ,@(when otherwise-case + `(unless (eofp) + (return-from ,vector-case-block + (progn ,@(cdr otherwise-case))))) + ,end-tag))))))) + +(defun variable-type (var &optional env) + (declare (ignorable env)) + (cond + ((constantp var) (type-of var)) + #+(or sbcl openmcl cmu allegro) + ((and (symbolp var) + #+allegro (cadr (assoc 'type (nth-value 2 (variable-information var env)))) + #-allegro (cdr (assoc 'type (nth-value 2 (variable-information var env)))))) + ((and (listp var) + (eq (car var) 'the) + (cadr var))))) + +(deftype octets (&optional (len '*)) + `(simple-array (unsigned-byte 8) (,len))) + +(defun variable-type* (var &optional env) + (let ((type (variable-type var env))) + (cond + ((null type) nil) + ((subtypep type 'string) 'string) + ((subtypep type 'octets) 'octets)))) + +(defun check-skip-elems (elems) + (or (every (lambda (elem) + (or (characterp elem) + (and (consp elem) + (null (cddr elem)) + (eq (first elem) 'not) + (characterp (second elem))))) + elems) + (error "'skip' takes only constant characters, or a cons starts with 'not'."))) + +(defun check-match-cases (cases) + (or (every (lambda (case) + (and (consp case) + (or (eq (car case) 'otherwise) + (stringp (car case))))) + cases) + (error "'match-case' takes only constant strings at the car position.~% ~S" cases))) + + +(defmacro bind ((symb &body bind-forms) &body body) + (declare (ignore symb bind-forms body))) + +(defmacro subseq* (data start &optional end) + `(subseq ,data ,start ,end)) +(defmacro get-elem (form) form) +(defun ensure-char-elem (elem) + (if (characterp elem) + elem + (code-char elem))) + +(defmacro tagbody-with-match-failed (elem &body body) + (with-gensyms (block) + `(block ,block + (tagbody + (return-from ,block ,@body) + :match-failed + (error 'match-failed :elem ,elem))))) + +(defmacro parsing-macrolet ((elem data p end) + (&rest macros) &body body) + `(macrolet ((advance (&optional (step 1)) + `(or (advance* ,step) + (go :eof))) + (advance* (&optional (step 1)) + `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) + (incf ,',p ,step) + ,@(if (eql step 0) + () + `((if (<= ,',end ,',p) + nil + (progn + (setq ,',elem + (aref ,',data ,',p)) + t)))))) + (advance-to (to) + `(or (advance-to* ,to) + (go :eof))) + (advance-to* (to) + (once-only (to) + `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) + (check-type ,to fixnum) + (setq ,',p ,to) + (if (<= ,',end ,',p) + nil + (progn + (setq ,',elem + (aref ,',data ,',p)) + t))))) + (skip (&rest elems) + (check-skip-elems elems) + `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) + (if (skip-conditions ,',elem ,elems) + (advance) + (error 'match-failed + :elem ,',elem + :expected ',elems)))) + (skip* (&rest elems) + (check-skip-elems elems) + `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) + (unless (eofp) + (loop + (unless (skip-conditions ,',elem ,elems) + (return)) + (or (advance*) (go :eof)))))) + (skip+ (&rest elems) + `(progn + (skip ,@elems) + (skip* ,@elems))) + (skip? (&rest elems) + (check-skip-elems elems) + `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) + (when (skip-conditions ,',elem ,elems) + (or (advance*) (go :eof))))) + (skip-until (fn) + `(loop until ,(if (symbolp fn) + `(,fn (get-elem ,',elem)) + `(funcall ,fn (get-elem ,',elem))) + do (or (advance*) (go :eof)))) + (skip-while (fn) + `(loop while ,(if (symbolp fn) + `(,fn (get-elem ,',elem)) + `(funcall ,fn (get-elem ,',elem))) + do (or (advance*) (go :eof)))) + (bind ((symb &body bind-forms) &body body) + (with-gensyms (start) + `(let ((,start ,',p)) + (tagbody + ,@bind-forms + :eof) + (prog1 + (let ((,symb (subseq* ,',data ,start ,',p))) + ,@body) + (when (eofp) + (go :eof)))))) + (%match (&rest vectors) + `(%match-case + ,@(loop for vec in vectors + collect `(,vec)))) + (match (&rest vectors) + `(block match-block + (tagbody + (return-from match-block (%match ,@vectors)) + :match-failed + (error 'match-failed :elem ,',elem)))) + (match? (&rest vectors) + (with-gensyms (start start-elem) + `(let ((,start ,',p) + (,start-elem ,',elem)) + (block match?-block + (tagbody + (%match ,@vectors) + (return-from match?-block t) + :match-failed + (setq ,',p ,start + ,',elem ,start-elem)))))) + (match-i (&rest vectors) + `(match-i-case + ,@(loop for vec in vectors + collect `(,vec)))) + ,@macros) + #+sbcl (declare (sb-ext:muffle-conditions sb-ext:code-deletion-note)) + (labels ((eofp () + (declare (optimize (speed 3) (safety 0) (debug 0))) + (<= ,end ,p)) + (current () (get-elem ,elem)) + (peek (&key eof-value) + (declare (optimize (speed 3) (safety 0) (debug 0))) + (let ((len (length ,data))) + (declare (type fixnum len)) + (if (or (eofp) (>= ,p (- ,end 1)) (= ,p (- len 1))) + eof-value + (aref ,data (+ 1 ,p))))) + (pos () (the fixnum ,p))) + (declare (inline eofp current pos)) + ,@body))) + +(defmacro with-string-parsing ((data &key start end) &body body) + (with-gensyms (g-end elem p body-block) + (once-only (data) + `(let ((,elem #\Nul) + (,p ,(if start + `(or ,start 0) + 0)) + (,g-end ,(if end + `(or ,end (length ,data)) + `(length ,data)))) + (declare (type simple-string ,data) + (type fixnum ,p ,g-end) + (type character ,elem)) + (parsing-macrolet (,elem ,data ,p ,g-end) + ((skip-conditions (elem-var elems) + `(or ,@(loop for el in elems + if (and (consp el) + (eq (car el) 'not)) + collect `(not (char= ,(cadr el) ,elem-var)) + else + collect `(char= ,el ,elem-var)))) + (%match-case (&rest cases) + (check-match-cases cases) + `(prog1 + (vector-case ,',elem (,',data) + ,@(if (find 'otherwise cases :key #'car :test #'eq) + cases + (append cases + '((otherwise (go :match-failed)))))) + (when (eofp) (go :eof)))) + (%match-i-case (&rest cases) + (check-match-cases cases) + `(prog1 + (vector-case ,',elem (,',data :case-insensitive t) + ,@(if (find 'otherwise cases :key #'car :test #'eq) + cases + (append cases + '((otherwise (go :match-failed)))))) + (when (eofp) (go :eof)))) + (match-case + (&rest cases) + `(tagbody-with-match-failed ,',elem (%match-case ,@cases))) + (match-i-case + (&rest cases) + `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases)))) + (block ,body-block + (tagbody + (when (eofp) + (go :eof)) + (setq ,elem (aref ,data ,p)) + (return-from ,body-block (progn ,@body)) + :eof))))))) + +(defmacro with-octets-parsing ((data &key start end) &body body) + (with-gensyms (g-end elem p body-block) + (once-only (data) + `(let ((,elem 0) + (,p ,(if start + `(or ,start 0) + 0)) + (,g-end ,(if end + `(or ,end (length ,data)) + `(length ,data)))) + (declare (type octets ,data) + (type fixnum ,p ,g-end) + (type (unsigned-byte 8) ,elem)) + (parsing-macrolet (,elem ,data ,p ,g-end) + ((skip-conditions (elem-var elems) + `(or ,@(loop for el in elems + if (and (consp el) + (eq (car el) 'not)) + collect `(not (= ,(char-code (cadr el)) ,elem-var)) + else + collect `(= ,(char-code el) ,elem-var)))) + (%match-case (&rest cases) + (check-match-cases cases) + (setf cases + (loop for case in cases + if (stringp (car case)) + collect (cons (babel:string-to-octets (car case)) + (cdr case)) + else + collect case)) + `(prog1 + (vector-case ,',elem (,',data) + ,@(if (find 'otherwise cases :key #'car :test #'eq) + cases + (append cases + '((otherwise (go :match-failed)))))) + (when (eofp) (go :eof)))) + (%match-i-case (&rest cases) + (check-match-cases cases) + (setf cases + (loop for case in cases + if (stringp (car case)) + collect (cons (babel:string-to-octets (car case)) + (cdr case)) + else + collect case)) + `(prog1 + (vector-case ,',elem (,',data :case-insensitive t) + ,@(if (find 'otherwise cases :key #'car :test #'eq) + cases + (append cases + '((otherwise (go :match-failed)))))) + (when (eofp) (go :eof)))) + (match-case + (&rest cases) + `(tagbody-with-match-failed ,',elem (%match-case ,@cases))) + (match-i-case + (&rest cases) + `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases)))) + (block ,body-block + (tagbody + (when (eofp) + (go :eof)) + (setq ,elem (aref ,data ,p)) + (return-from ,body-block (progn ,@body)) + :match-failed + (error 'match-failed :elem ,elem) + :eof))))))) + +(defmacro with-vector-parsing ((data &key (start 0) end) &body body &environment env) + (let ((data-type (variable-type* data env))) + (case data-type + (string `(with-string-parsing (,data :start ,start :end ,end) ,@body)) + (octets `(macrolet ((get-elem (form) `(code-char ,form)) + (subseq* (data start &optional end) + `(babel:octets-to-string ,data :start ,start :end ,end))) + (with-octets-parsing (,data :start ,start :end ,end) ,@body))) + (otherwise (once-only (data) + `(etypecase ,data + (string (with-string-parsing (,data :start ,start :end ,end) ,@body)) + (octets (macrolet ((get-elem (form) `(code-char ,form)) + (subseq* (data start &optional end) + `(babel:octets-to-string ,data :start ,start :end ,end))) + (with-octets-parsing (,data :start ,start :end ,end) ,@body))))))))) diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/parse/parse.asd --- a/lisp/lib/parse/parse.asd Tue May 21 17:13:34 2024 -0400 +++ b/lisp/lib/parse/parse.asd Tue May 21 22:20:29 2024 -0400 @@ -4,6 +4,7 @@ :bug-tracker "https://lab.rwest.io/ellis/packy/issues" :depends-on (:cl-ppcre :std) :components ((:file "pkg") + (:file "bytes") (:file "lex") (:file "yacc")) :in-order-to ((test-op (test-op :parse/tests)))) diff -r aac665e2f5bf -r 7c1383c08493 lisp/lib/parse/pkg.lisp --- a/lisp/lib/parse/pkg.lisp Tue May 21 17:13:34 2024 -0400 +++ b/lisp/lib/parse/pkg.lisp Tue May 21 22:20:29 2024 -0400 @@ -34,6 +34,39 @@ :yacc-runtime-error :yacc-parse-error :yacc-parse-error-terminal :yacc-parse-error-value :yacc-parse-error-expected-terminals)) +(defpackage parse/bytes + (:use :cl) + (:import-from :sb-cltl2 + :variable-information) + (:import-from :std :with-gensyms :once-only + :ensure-cons :ignore-some-conditions) + (:export :with-vector-parsing + :with-string-parsing + :with-octets-parsing + :eofp + :current + :peek + :eof-value + :pos + :advance + :advance* + :advance-to + :advance-to* + :skip + :skip* + :skip+ + :skip? + :skip-until + :skip-while + :bind + :match + :match-i + :match? + :match-case + :match-i-case + :match-failed)) + + (uiop:define-package :parse (:use :cl :std) - (:use-reexport :parse/lex :parse/yacc)) + (:use-reexport :parse/lex :parse/yacc :parse/bytes)) diff -r aac665e2f5bf -r 7c1383c08493 lisp/std/macs/collecting.lisp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/std/macs/collecting.lisp Tue May 21 22:20:29 2024 -0400 @@ -0,0 +1,67 @@ +;;; std/macs/collecting.lisp --- Collecting Macros + +;; ported from CL-UTILITIES + +;;; Code: +(in-package :std/macs) + +;; This should only be called inside of COLLECTING macros, but we +;; define it here to provide an informative error message and to make +;; it easier for SLIME (et al.) to get documentation for the COLLECT +;; function when it's used in the COLLECTING macro. +(defun collect (thing) + "Collect THING in the context established by the COLLECTING macro" + (error "Can't collect ~S outside the context of the COLLECTING macro" + thing)) + +(defmacro collecting (&body body) + "Collect things into a list forwards. Within the body of this macro, +the COLLECT function will collect its argument into the list returned +by COLLECTING." + (with-gensyms (collector tail) + `(let (,collector ,tail) + (labels ((collect (thing) + (if ,collector + (setf (cdr ,tail) + (setf ,tail (list thing))) + (setf ,collector + (setf ,tail (list thing)))))) + ,@body) + ,collector))) + +(defmacro with-collectors ((&rest collectors) &body body) + "Collect some things into lists forwards. The names in COLLECTORS +are defined as local functions which each collect into a separate +list. Returns as many values as there are collectors, in the order +they were given." + (%with-collectors-check-collectors collectors) + (let ((gensyms-alist (%with-collectors-gensyms-alist collectors))) + `(let ,(loop for collector in collectors + for tail = (cdr (assoc collector gensyms-alist)) + nconc (list collector tail)) + (labels ,(loop for collector in collectors + for tail = (cdr (assoc collector gensyms-alist)) + collect `(,collector (thing) + (if ,collector + (setf (cdr ,tail) + (setf ,tail (list thing))) + (setf ,collector + (setf ,tail (list thing)))))) + ,@body) + (values ,@collectors)))) + +(defun %with-collectors-check-collectors (collectors) + "Check that all of the COLLECTORS are symbols. If not, raise an error." + (let ((bad-collector (find-if-not #'symbolp collectors))) + (when bad-collector + (error 'type-error + :datum bad-collector + :expected-type 'symbol)))) + +(defun %with-collectors-gensyms-alist (collectors) + "Return an alist mapping the symbols in COLLECTORS to gensyms" + (mapcar #'cons collectors + (mapcar (compose #'gensym + #'(lambda (x) + (format nil "~A-TAIL-" x))) + collectors))) diff -r aac665e2f5bf -r 7c1383c08493 lisp/std/pkg.lisp --- a/lisp/std/pkg.lisp Tue May 21 17:13:34 2024 -0400 +++ b/lisp/std/pkg.lisp Tue May 21 22:20:29 2024 -0400 @@ -183,9 +183,23 @@ :task-pool-oracle :task-pool-jobs :task-pool-stages :task-pool-workers :task-pool-results)) +(defpkg :std/fu + (:use :cl) + (:import-from :std/sym :make-gensym-list) + (:export + :ensure-function + :ensure-functionf + :disjoin + :conjoin + :compose + :multiple-value-compose + :curry + :rcurry)) + (defpkg :std/macs (:use :cl) (:import-from :std/sym :symb :mkstr :make-gensym-list :once-only :with-gensyms) + (:import-from :std/fu :compose) (:import-from :std/named-readtables :in-readtable :parse-body) (:import-from :std/list :flatten :defmacro!) (:export @@ -246,20 +260,9 @@ :pandoric-hotpatch :pandoric-recode :plambda - :pandoric-eval)) - -(defpkg :std/fu - (:use :cl) - (:import-from :std/macs :make-gensym-list) - (:export - :ensure-function - :ensure-functionf - :disjoin - :conjoin - :compose - :multiple-value-compose - :curry - :rcurry)) + :pandoric-eval + :with-collectors + :collecting)) (defpkg :std/readtable (:use :cl) diff -r aac665e2f5bf -r 7c1383c08493 lisp/std/std.asd --- a/lisp/std/std.asd Tue May 21 17:13:34 2024 -0400 +++ b/lisp/std/std.asd Tue May 21 22:20:29 2024 -0400 @@ -36,7 +36,8 @@ :components ((:file "ana") (:file "pan") - (:file "const"))) + (:file "const") + (:file "collecting"))) (:file "bit") (:file "fmt") (:file "path")