Mercurial > core / lisp/lib/parse/bytes.lisp
changeset 357: |
7c1383c08493 |
child: |
5b6a2a8ba83e |
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Tue, 21 May 2024 22:20:29 -0400 |
permissions: |
-rw-r--r-- |
description: |
port xsubseq, proc-parse. work on http and clap |
1 ;;; parse/bytes.lisp --- Procedural Parser 3 ;; swiped from Fukamachi's proc-parser.lisp. Will re-implement at a later 7 ;; Copyright 2015 Eitaro Fukamachi 9 ;; Redistribution and use in source and binary forms, with or without 10 ;; modification, are permitted provided that the following conditions are met: 12 ;; 1. Redistributions of source code must retain the above copyright notice, 13 ;; this list of conditions and the following disclaimer. 15 ;; 2. Redistributions in binary form must reproduce the above copyright 16 ;; notice, this list of conditions and the following disclaimer in the 17 ;; documentation and/or other materials provided with the distribution. 19 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 ;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 23 ;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 ;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 ;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 ;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 ;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 ;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 ;; POSSIBILITY OF SUCH DAMAGE. 32 (in-package :parse/bytes) 34 (define-condition match-failed (error) 37 (expected :initarg :expected 39 (:report (lambda (condition stream) 40 (with-slots (elem expected) condition 42 "Match failed~:[~;~:*: ~S~]~:[~;~:* (expected: ~{~S~^, ~})~]" 43 (ensure-char-elem elem) expected))))) 45 (defun convert-case-conditions (var chars) 48 `(or ,@(loop for ch in chars 50 collect `(char= ,var ,ch) 52 collect `(= ,var ,ch)))) 53 ((eq chars 'otherwise) 55 (t (if (characterp chars) 59 (defun typed-case-tagbodies (var &rest cases) 63 `((when ,(convert-case-conditions var (car (first cases))) 64 ,@(cdr (first cases))))) 65 ((and (= 2 (length cases)) 66 (eq (car (second cases)) 'otherwise)) 67 `((unless ,(convert-case-conditions var (car (first cases))) 68 ,@(cdr (second cases))) 69 ,@(cdr (first cases)))) 71 (let ((tags (make-array (length cases) :initial-contents (loop repeat (length cases) 74 `(,@(loop for (chars . body) in cases 76 collect `(when ,(convert-case-conditions var chars) 78 ,@(loop for case in cases 80 append `(,(aref tags i) 85 (defmacro vector-case (elem-var vec-and-options &body cases) 86 (destructuring-bind (vec &key case-insensitive) 87 (ensure-cons vec-and-options) 88 (with-gensyms (otherwise end-tag vector-case-block) 89 (labels ((case-candidates (el) 91 ((not case-insensitive) el) 98 #.(- (char-code #\a) (char-code #\A)))))) 103 #.(- (char-code #\a) (char-code #\A)))))) 105 ((typep el '(unsigned-byte 8)) 107 ((<= #.(char-code #\a) el #.(char-code #\z)) 109 ,(- el #.(- (char-code #\a) (char-code #\A))))) 110 ((<= #.(char-code #\A) el #.(char-code #\Z)) 112 ,(+ el #.(- (char-code #\a) (char-code #\A))))) 115 (build-case (i cases vec) 117 (let ((map (make-hash-table))) 120 (unless (vectorp (car case)) 121 (error "The first element of cases must be a constant vector")) 122 (unless (<= (length (car case)) i) 123 (push case (gethash (aref (car case) i) map)))) 126 (maphash (lambda (el cases) 127 (let ((next-case (build-case (1+ i) cases vec))) 131 `(,(case-candidates el) 133 ,(if (= (length (caar cases)) (1+ i)) 134 `(progn ,@(cdr (car cases)) 137 ,@(apply #'typed-case-tagbodies elem-var 140 `((otherwise (go ,otherwise)))))) 143 (push `(,(case-candidates el) 145 (return-from ,vector-case-block 146 (progn ,@(cdr (car cases))))) 150 (let ((otherwise-case nil)) 151 (when (eq (caar (last cases)) 'otherwise) 152 (setq otherwise-case (car (last cases)) 153 cases (butlast cases))) 154 `(block ,vector-case-block 156 ,@(apply #'typed-case-tagbodies elem-var 158 (build-case 0 cases vec) 159 `((otherwise (go ,otherwise))))) 162 ,@(when otherwise-case 164 (return-from ,vector-case-block 165 (progn ,@(cdr otherwise-case))))) 168 (defun variable-type (var &optional env) 169 (declare (ignorable env)) 171 ((constantp var) (type-of var)) 172 #+(or sbcl openmcl cmu allegro) 174 #+allegro (cadr (assoc 'type (nth-value 2 (variable-information var env)))) 175 #-allegro (cdr (assoc 'type (nth-value 2 (variable-information var env)))))) 180 (deftype octets (&optional (len '*)) 181 `(simple-array (unsigned-byte 8) (,len))) 183 (defun variable-type* (var &optional env) 184 (let ((type (variable-type var env))) 187 ((subtypep type 'string) 'string) 188 ((subtypep type 'octets) 'octets)))) 190 (defun check-skip-elems (elems) 191 (or (every (lambda (elem) 192 (or (characterp elem) 195 (eq (first elem) 'not) 196 (characterp (second elem))))) 198 (error "'skip' takes only constant characters, or a cons starts with 'not'."))) 200 (defun check-match-cases (cases) 201 (or (every (lambda (case) 203 (or (eq (car case) 'otherwise) 204 (stringp (car case))))) 206 (error "'match-case' takes only constant strings at the car position.~% ~S" cases))) 209 (defmacro bind ((symb &body bind-forms) &body body) 210 (declare (ignore symb bind-forms body))) 212 (defmacro subseq* (data start &optional end) 213 `(subseq ,data ,start ,end)) 214 (defmacro get-elem (form) form) 215 (defun ensure-char-elem (elem) 216 (if (characterp elem) 220 (defmacro tagbody-with-match-failed (elem &body body) 221 (with-gensyms (block) 224 (return-from ,block ,@body) 226 (error 'match-failed :elem ,elem))))) 228 (defmacro parsing-macrolet ((elem data p end) 229 (&rest macros) &body body) 230 `(macrolet ((advance (&optional (step 1)) 231 `(or (advance* ,step) 233 (advance* (&optional (step 1)) 234 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 238 `((if (<= ,',end ,',p) 245 `(or (advance-to* ,to) 249 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 250 (check-type ,to fixnum) 259 (check-skip-elems elems) 260 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 261 (if (skip-conditions ,',elem ,elems) 265 :expected ',elems)))) 267 (check-skip-elems elems) 268 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 271 (unless (skip-conditions ,',elem ,elems) 273 (or (advance*) (go :eof)))))) 279 (check-skip-elems elems) 280 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 281 (when (skip-conditions ,',elem ,elems) 282 (or (advance*) (go :eof))))) 284 `(loop until ,(if (symbolp fn) 285 `(,fn (get-elem ,',elem)) 286 `(funcall ,fn (get-elem ,',elem))) 287 do (or (advance*) (go :eof)))) 289 `(loop while ,(if (symbolp fn) 290 `(,fn (get-elem ,',elem)) 291 `(funcall ,fn (get-elem ,',elem))) 292 do (or (advance*) (go :eof)))) 293 (bind ((symb &body bind-forms) &body body) 294 (with-gensyms (start) 295 `(let ((,start ,',p)) 300 (let ((,symb (subseq* ,',data ,start ,',p))) 304 (%match (&rest vectors) 306 ,@(loop for vec in vectors 308 (match (&rest vectors) 311 (return-from match-block (%match ,@vectors)) 313 (error 'match-failed :elem ,',elem)))) 314 (match? (&rest vectors) 315 (with-gensyms (start start-elem) 317 (,start-elem ,',elem)) 321 (return-from match?-block t) 324 ,',elem ,start-elem)))))) 325 (match-i (&rest vectors) 327 ,@(loop for vec in vectors 330 #+sbcl (declare (sb-ext:muffle-conditions sb-ext:code-deletion-note)) 332 (declare (optimize (speed 3) (safety 0) (debug 0))) 334 (current () (get-elem ,elem)) 335 (peek (&key eof-value) 336 (declare (optimize (speed 3) (safety 0) (debug 0))) 337 (let ((len (length ,data))) 338 (declare (type fixnum len)) 339 (if (or (eofp) (>= ,p (- ,end 1)) (= ,p (- len 1))) 341 (aref ,data (+ 1 ,p))))) 342 (pos () (the fixnum ,p))) 343 (declare (inline eofp current pos)) 346 (defmacro with-string-parsing ((data &key start end) &body body) 347 (with-gensyms (g-end elem p body-block) 354 `(or ,end (length ,data)) 356 (declare (type simple-string ,data) 357 (type fixnum ,p ,g-end) 358 (type character ,elem)) 359 (parsing-macrolet (,elem ,data ,p ,g-end) 360 ((skip-conditions (elem-var elems) 361 `(or ,@(loop for el in elems 364 collect `(not (char= ,(cadr el) ,elem-var)) 366 collect `(char= ,el ,elem-var)))) 367 (%match-case (&rest cases) 368 (check-match-cases cases) 370 (vector-case ,',elem (,',data) 371 ,@(if (find 'otherwise cases :key #'car :test #'eq) 374 '((otherwise (go :match-failed)))))) 375 (when (eofp) (go :eof)))) 376 (%match-i-case (&rest cases) 377 (check-match-cases cases) 379 (vector-case ,',elem (,',data :case-insensitive t) 380 ,@(if (find 'otherwise cases :key #'car :test #'eq) 383 '((otherwise (go :match-failed)))))) 384 (when (eofp) (go :eof)))) 387 `(tagbody-with-match-failed ,',elem (%match-case ,@cases))) 390 `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases)))) 395 (setq ,elem (aref ,data ,p)) 396 (return-from ,body-block (progn ,@body)) 399 (defmacro with-octets-parsing ((data &key start end) &body body) 400 (with-gensyms (g-end elem p body-block) 407 `(or ,end (length ,data)) 409 (declare (type octets ,data) 410 (type fixnum ,p ,g-end) 411 (type (unsigned-byte 8) ,elem)) 412 (parsing-macrolet (,elem ,data ,p ,g-end) 413 ((skip-conditions (elem-var elems) 414 `(or ,@(loop for el in elems 417 collect `(not (= ,(char-code (cadr el)) ,elem-var)) 419 collect `(= ,(char-code el) ,elem-var)))) 420 (%match-case (&rest cases) 421 (check-match-cases cases) 423 (loop for case in cases 424 if (stringp (car case)) 425 collect (cons (babel:string-to-octets (car case)) 430 (vector-case ,',elem (,',data) 431 ,@(if (find 'otherwise cases :key #'car :test #'eq) 434 '((otherwise (go :match-failed)))))) 435 (when (eofp) (go :eof)))) 436 (%match-i-case (&rest cases) 437 (check-match-cases cases) 439 (loop for case in cases 440 if (stringp (car case)) 441 collect (cons (babel:string-to-octets (car case)) 446 (vector-case ,',elem (,',data :case-insensitive t) 447 ,@(if (find 'otherwise cases :key #'car :test #'eq) 450 '((otherwise (go :match-failed)))))) 451 (when (eofp) (go :eof)))) 454 `(tagbody-with-match-failed ,',elem (%match-case ,@cases))) 457 `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases)))) 462 (setq ,elem (aref ,data ,p)) 463 (return-from ,body-block (progn ,@body)) 465 (error 'match-failed :elem ,elem) 468 (defmacro with-vector-parsing ((data &key (start 0) end) &body body &environment env) 469 (let ((data-type (variable-type* data env))) 471 (string `(with-string-parsing (,data :start ,start :end ,end) ,@body)) 472 (octets `(macrolet ((get-elem (form) `(code-char ,form)) 473 (subseq* (data start &optional end) 474 `(babel:octets-to-string ,data :start ,start :end ,end))) 475 (with-octets-parsing (,data :start ,start :end ,end) ,@body))) 476 (otherwise (once-only (data) 478 (string (with-string-parsing (,data :start ,start :end ,end) ,@body)) 479 (octets (macrolet ((get-elem (form) `(code-char ,form)) 480 (subseq* (data start &optional end) 481 `(babel:octets-to-string ,data :start ,start :end ,end))) 482 (with-octets-parsing (,data :start ,start :end ,end) ,@body)))))))))