Mercurial > core / lisp/lib/parse/bytes.lisp
changeset 698: |
96958d3eb5b0 |
parent: |
5b6a2a8ba83e
|
author: |
Richard Westhaver <ellis@rwest.io> |
date: |
Fri, 04 Oct 2024 22:04:59 -0400 |
permissions: |
-rw-r--r-- |
description: |
fixes |
1 ;;; parse/bytes.lisp --- Procedural Parser 3 ;; swiped from Fukamachi's proc-parser.lisp. Will re-implement at a later 7 ;; Copyright 2015 Eitaro Fukamachi 9 ;; Redistribution and use in source and binary forms, with or without 10 ;; modification, are permitted provided that the following conditions are met: 12 ;; 1. Redistributions of source code must retain the above copyright notice, 13 ;; this list of conditions and the following disclaimer. 15 ;; 2. Redistributions in binary form must reproduce the above copyright 16 ;; notice, this list of conditions and the following disclaimer in the 17 ;; documentation and/or other materials provided with the distribution. 19 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 ;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 23 ;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 ;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 ;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 ;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 ;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 ;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 ;; POSSIBILITY OF SUCH DAMAGE. 32 (in-package :parse/bytes) 34 (define-condition match-failed (error) 37 (expected :initarg :expected 39 (:report (lambda (condition stream) 40 (with-slots (elem expected) condition 42 "Match failed~:[~;~:*: ~S~]~:[~;~:* (expected: ~{~S~^, ~})~]" 43 (ensure-char-elem elem) expected))))) 45 (defun convert-case-conditions (var chars) 48 `(or ,@(loop for ch in chars 50 collect `(char= ,var ,ch) 52 collect `(= ,var ,ch)))) 53 ((eq chars 'otherwise) 55 (t (if (characterp chars) 59 (defun typed-case-tagbodies (var &rest cases) 63 `((when ,(convert-case-conditions var (car (first cases))) 64 ,@(cdr (first cases))))) 65 ((and (= 2 (length cases)) 66 (eq (car (second cases)) 'otherwise)) 67 `((unless ,(convert-case-conditions var (car (first cases))) 68 ,@(cdr (second cases))) 69 ,@(cdr (first cases)))) 71 (let ((tags (make-array (length cases) :initial-contents (loop repeat (length cases) 74 `(,@(loop for (chars . body) in cases 76 collect `(when ,(convert-case-conditions var chars) 78 ,@(loop for case in cases 80 append `(,(aref tags i) 85 (defmacro vector-case (elem-var vec-and-options &body cases) 86 (destructuring-bind (vec &key case-insensitive) 87 (ensure-cons vec-and-options) 88 (with-gensyms (otherwise end-tag vector-case-block) 89 (labels ((case-candidates (el) 91 ((not case-insensitive) el) 98 #.(- (char-code #\a) (char-code #\A)))))) 103 #.(- (char-code #\a) (char-code #\A)))))) 105 ((typep el '(unsigned-byte 8)) 107 ((<= #.(char-code #\a) el #.(char-code #\z)) 109 ,(- el #.(- (char-code #\a) (char-code #\A))))) 110 ((<= #.(char-code #\A) el #.(char-code #\Z)) 112 ,(+ el #.(- (char-code #\a) (char-code #\A))))) 115 (build-case (i cases vec) 117 (let ((map (make-hash-table))) 120 (unless (vectorp (car case)) 121 (error "The first element of cases must be a constant vector")) 122 (unless (<= (length (car case)) i) 123 (push case (gethash (aref (car case) i) map)))) 126 (maphash (lambda (el cases) 127 (let ((next-case (build-case (1+ i) cases vec))) 131 `(,(case-candidates el) 133 ,(if (= (length (caar cases)) (1+ i)) 134 `(progn ,@(cdr (car cases)) 137 ,@(apply #'typed-case-tagbodies elem-var 140 `((otherwise (go ,otherwise)))))) 143 (push `(,(case-candidates el) 145 (return-from ,vector-case-block 146 (progn ,@(cdr (car cases))))) 150 (let ((otherwise-case nil)) 151 (when (eq (caar (last cases)) 'otherwise) 152 (setq otherwise-case (car (last cases)) 153 cases (butlast cases))) 154 `(block ,vector-case-block 156 ,@(apply #'typed-case-tagbodies elem-var 158 (build-case 0 cases vec) 159 `((otherwise (go ,otherwise))))) 162 ,@(when otherwise-case 164 (return-from ,vector-case-block 165 (progn ,@(cdr otherwise-case))))) 168 (defun variable-type (var &optional env) 169 (declare (ignorable env)) 171 ((constantp var) (type-of var)) 172 #+(or sbcl openmcl cmu allegro) 174 #+allegro (cadr (assoc 'type (nth-value 2 (variable-information var env)))) 175 #-allegro (cdr (assoc 'type (nth-value 2 (variable-information var env)))))) 180 (defun variable-type* (var &optional env) 181 (let ((type (variable-type var env))) 184 ((subtypep type 'string) 'string) 185 ((subtypep type 'octet-vector) 'octet-vector)))) 187 (defun check-skip-elems (elems) 188 (or (every (lambda (elem) 189 (or (characterp elem) 192 (eq (first elem) 'not) 193 (characterp (second elem))))) 195 (error "'skip' takes only constant characters, or a cons starts with 'not'."))) 197 (defun check-match-cases (cases) 198 (or (every (lambda (case) 200 (or (eq (car case) 'otherwise) 201 (stringp (car case))))) 203 (error "'match-case' takes only constant strings at the car position.~% ~S" cases))) 206 (defmacro bind ((symb &body bind-forms) &body body) 207 (declare (ignore symb bind-forms body))) 209 (defmacro subseq* (data start &optional end) 210 `(subseq ,data ,start ,end)) 211 (defmacro get-elem (form) form) 212 (defun ensure-char-elem (elem) 213 (if (characterp elem) 217 (defmacro tagbody-with-match-failed (elem &body body) 218 (with-gensyms (block) 221 (return-from ,block ,@body) 223 (error 'match-failed :elem ,elem))))) 225 (defmacro parsing-macrolet ((elem data p end) 226 (&rest macros) &body body) 227 `(macrolet ((advance (&optional (step 1)) 228 `(or (advance* ,step) 230 (advance* (&optional (step 1)) 231 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 235 `((if (<= ,',end ,',p) 242 `(or (advance-to* ,to) 246 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 247 (check-type ,to fixnum) 256 (check-skip-elems elems) 257 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 258 (if (skip-conditions ,',elem ,elems) 262 :expected ',elems)))) 264 (check-skip-elems elems) 265 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 268 (unless (skip-conditions ,',elem ,elems) 270 (or (advance*) (go :eof)))))) 276 (check-skip-elems elems) 277 `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0))) 278 (when (skip-conditions ,',elem ,elems) 279 (or (advance*) (go :eof))))) 281 `(loop until ,(if (symbolp fn) 282 `(,fn (get-elem ,',elem)) 283 `(funcall ,fn (get-elem ,',elem))) 284 do (or (advance*) (go :eof)))) 286 `(loop while ,(if (symbolp fn) 287 `(,fn (get-elem ,',elem)) 288 `(funcall ,fn (get-elem ,',elem))) 289 do (or (advance*) (go :eof)))) 290 (bind ((symb &body bind-forms) &body body) 291 (with-gensyms (start) 292 `(let ((,start ,',p)) 297 (let ((,symb (subseq* ,',data ,start ,',p))) 301 (%match (&rest vectors) 303 ,@(loop for vec in vectors 305 (match (&rest vectors) 308 (return-from match-block (%match ,@vectors)) 310 (error 'match-failed :elem ,',elem)))) 311 (match? (&rest vectors) 312 (with-gensyms (start start-elem) 314 (,start-elem ,',elem)) 318 (return-from match?-block t) 321 ,',elem ,start-elem)))))) 322 (match-i (&rest vectors) 324 ,@(loop for vec in vectors 327 #+sbcl (declare (sb-ext:muffle-conditions sb-ext:code-deletion-note)) 329 (declare (optimize (speed 3) (safety 0) (debug 0))) 331 (current () (get-elem ,elem)) 332 (peek (&key eof-value) 333 (declare (optimize (speed 3) (safety 0) (debug 0))) 334 (let ((len (length ,data))) 335 (declare (type fixnum len)) 336 (if (or (eofp) (>= ,p (- ,end 1)) (= ,p (- len 1))) 338 (aref ,data (+ 1 ,p))))) 339 (pos () (the fixnum ,p))) 340 (declare (inline eofp current pos)) 343 (defmacro with-string-parsing ((data &key start end) &body body) 344 (with-gensyms (g-end elem p body-block) 351 `(or ,end (length ,data)) 353 (declare (type simple-string ,data) 354 (type fixnum ,p ,g-end) 355 (type character ,elem)) 356 (parsing-macrolet (,elem ,data ,p ,g-end) 357 ((skip-conditions (elem-var elems) 358 `(or ,@(loop for el in elems 361 collect `(not (char= ,(cadr el) ,elem-var)) 363 collect `(char= ,el ,elem-var)))) 364 (%match-case (&rest cases) 365 (check-match-cases cases) 367 (vector-case ,',elem (,',data) 368 ,@(if (find 'otherwise cases :key #'car :test #'eq) 371 '((otherwise (go :match-failed)))))) 372 (when (eofp) (go :eof)))) 373 (%match-i-case (&rest cases) 374 (check-match-cases cases) 376 (vector-case ,',elem (,',data :case-insensitive t) 377 ,@(if (find 'otherwise cases :key #'car :test #'eq) 380 '((otherwise (go :match-failed)))))) 381 (when (eofp) (go :eof)))) 384 `(tagbody-with-match-failed ,',elem (%match-case ,@cases))) 387 `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases)))) 392 (setq ,elem (aref ,data ,p)) 393 (return-from ,body-block (progn ,@body)) 396 (defmacro with-octets-parsing ((data &key start end) &body body) 397 (with-gensyms (g-end elem p body-block) 404 `(or ,end (length ,data)) 406 (declare (type octet-vector ,data) 407 (type fixnum ,p ,g-end) 408 (type (unsigned-byte 8) ,elem)) 409 (parsing-macrolet (,elem ,data ,p ,g-end) 410 ((skip-conditions (elem-var elems) 411 `(or ,@(loop for el in elems 414 collect `(not (= ,(char-code (cadr el)) ,elem-var)) 416 collect `(= ,(char-code el) ,elem-var)))) 417 (%match-case (&rest cases) 418 (check-match-cases cases) 420 (loop for case in cases 421 if (stringp (car case)) 422 collect (cons (babel:string-to-octets (car case)) 427 (vector-case ,',elem (,',data) 428 ,@(if (find 'otherwise cases :key #'car :test #'eq) 431 '((otherwise (go :match-failed)))))) 432 (when (eofp) (go :eof)))) 433 (%match-i-case (&rest cases) 434 (check-match-cases cases) 436 (loop for case in cases 437 if (stringp (car case)) 438 collect (cons (babel:string-to-octets (car case)) 443 (vector-case ,',elem (,',data :case-insensitive t) 444 ,@(if (find 'otherwise cases :key #'car :test #'eq) 447 '((otherwise (go :match-failed)))))) 448 (when (eofp) (go :eof)))) 451 `(tagbody-with-match-failed ,',elem (%match-case ,@cases))) 454 `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases)))) 459 (setq ,elem (aref ,data ,p)) 460 (return-from ,body-block (progn ,@body)) 462 (error 'match-failed :elem ,elem) 465 (defmacro with-vector-parsing ((data &key (start 0) end) &body body &environment env) 466 (let ((data-type (variable-type* data env))) 468 (string `(with-string-parsing (,data :start ,start :end ,end) ,@body)) 469 (octet-vector `(macrolet ((get-elem (form) `(code-char ,form)) 470 (subseq* (data start &optional end) 471 `(babel:octets-to-string ,data :start ,start :end ,end))) 472 (with-octets-parsing (,data :start ,start :end ,end) ,@body))) 473 (otherwise (once-only (data) 475 (string (with-string-parsing (,data :start ,start :end ,end) ,@body)) 476 (octet-vector (macrolet ((get-elem (form) `(code-char ,form)) 477 (subseq* (data start &optional end) 478 `(babel:octets-to-string ,data :start ,start :end ,end))) 479 (with-octets-parsing (,data :start ,start :end ,end) ,@body)))))))))