changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/parse/bytes.lisp

changeset 360: 5b6a2a8ba83e
parent: 7c1383c08493
author: Richard Westhaver <ellis@rwest.io>
date: Wed, 22 May 2024 22:46:17 -0400
permissions: -rw-r--r--
description: fix net/req bugs, fetch works now
1 ;;; parse/bytes.lisp --- Procedural Parser
2 
3 ;; swiped from Fukamachi's proc-parser.lisp. Will re-implement at a later
4 ;; date.
5 
6 ;;; License:
7 ;; Copyright 2015 Eitaro Fukamachi
8 
9 ;; Redistribution and use in source and binary forms, with or without
10 ;; modification, are permitted provided that the following conditions are met:
11 
12 ;; 1. Redistributions of source code must retain the above copyright notice,
13 ;; this list of conditions and the following disclaimer.
14 
15 ;; 2. Redistributions in binary form must reproduce the above copyright
16 ;; notice, this list of conditions and the following disclaimer in the
17 ;; documentation and/or other materials provided with the distribution.
18 
19 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 ;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 ;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 ;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 ;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 ;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 ;; POSSIBILITY OF SUCH DAMAGE.
30 
31 ;;; Code:
32 (in-package :parse/bytes)
33 
34 (define-condition match-failed (error)
35  ((elem :initarg :elem
36  :initform nil)
37  (expected :initarg :expected
38  :initform nil))
39  (:report (lambda (condition stream)
40  (with-slots (elem expected) condition
41  (format stream
42  "Match failed~:[~;~:*: ~S~]~:[~;~:* (expected: ~{~S~^, ~})~]"
43  (ensure-char-elem elem) expected)))))
44 
45 (defun convert-case-conditions (var chars)
46  (cond
47  ((consp chars)
48  `(or ,@(loop for ch in chars
49  if (characterp ch)
50  collect `(char= ,var ,ch)
51  else
52  collect `(= ,var ,ch))))
53  ((eq chars 'otherwise)
54  t)
55  (t (if (characterp chars)
56  `(char= ,var ,chars)
57  `(= ,var ,chars)))))
58 
59 (defun typed-case-tagbodies (var &rest cases)
60  (cond
61  ((null cases) nil)
62  ((= 1 (length cases))
63  `((when ,(convert-case-conditions var (car (first cases)))
64  ,@(cdr (first cases)))))
65  ((and (= 2 (length cases))
66  (eq (car (second cases)) 'otherwise))
67  `((unless ,(convert-case-conditions var (car (first cases)))
68  ,@(cdr (second cases)))
69  ,@(cdr (first cases))))
70  (t
71  (let ((tags (make-array (length cases) :initial-contents (loop repeat (length cases)
72  collect (gensym))))
73  (end (gensym "END")))
74  `(,@(loop for (chars . body) in cases
75  for i from 0
76  collect `(when ,(convert-case-conditions var chars)
77  (go ,(aref tags i))))
78  ,@(loop for case in cases
79  for i from 0
80  append `(,(aref tags i)
81  ,@(cdr case)
82  (go ,end)))
83  ,end)))))
84 
85 (defmacro vector-case (elem-var vec-and-options &body cases)
86  (destructuring-bind (vec &key case-insensitive)
87  (ensure-cons vec-and-options)
88  (with-gensyms (otherwise end-tag vector-case-block)
89  (labels ((case-candidates (el)
90  (cond
91  ((not case-insensitive) el)
92  ((characterp el)
93  (cond
94  ((char<= #\a el #\z)
95  `(,el
96  ,(code-char
97  (- (char-code el)
98  #.(- (char-code #\a) (char-code #\A))))))
99  ((char<= #\A el #\Z)
100  `(,el
101  ,(code-char
102  (+ (char-code el)
103  #.(- (char-code #\a) (char-code #\A))))))
104  (t el)))
105  ((typep el '(unsigned-byte 8))
106  (cond
107  ((<= #.(char-code #\a) el #.(char-code #\z))
108  `(,el
109  ,(- el #.(- (char-code #\a) (char-code #\A)))))
110  ((<= #.(char-code #\A) el #.(char-code #\Z))
111  `(,el
112  ,(+ el #.(- (char-code #\a) (char-code #\A)))))
113  (t el)))
114  (t el)))
115  (build-case (i cases vec)
116  (when cases
117  (let ((map (make-hash-table)))
118  (map nil
119  (lambda (case)
120  (unless (vectorp (car case))
121  (error "The first element of cases must be a constant vector"))
122  (unless (<= (length (car case)) i)
123  (push case (gethash (aref (car case) i) map))))
124  cases)
125  (let (res-cases)
126  (maphash (lambda (el cases)
127  (let ((next-case (build-case (1+ i) cases vec)))
128  (cond
129  (next-case
130  (push
131  `(,(case-candidates el)
132  (unless (advance*)
133  ,(if (= (length (caar cases)) (1+ i))
134  `(progn ,@(cdr (car cases))
135  (go ,end-tag))
136  `(go :eof)))
137  ,@(apply #'typed-case-tagbodies elem-var
138  (append
139  next-case
140  `((otherwise (go ,otherwise))))))
141  res-cases))
142  (t
143  (push `(,(case-candidates el)
144  (advance*)
145  (return-from ,vector-case-block
146  (progn ,@(cdr (car cases)))))
147  res-cases)))))
148  map)
149  res-cases)))))
150  (let ((otherwise-case nil))
151  (when (eq (caar (last cases)) 'otherwise)
152  (setq otherwise-case (car (last cases))
153  cases (butlast cases)))
154  `(block ,vector-case-block
155  (tagbody
156  ,@(apply #'typed-case-tagbodies elem-var
157  (append
158  (build-case 0 cases vec)
159  `((otherwise (go ,otherwise)))))
160  (go ,end-tag)
161  ,otherwise
162  ,@(when otherwise-case
163  `(unless (eofp)
164  (return-from ,vector-case-block
165  (progn ,@(cdr otherwise-case)))))
166  ,end-tag)))))))
167 
168 (defun variable-type (var &optional env)
169  (declare (ignorable env))
170  (cond
171  ((constantp var) (type-of var))
172  #+(or sbcl openmcl cmu allegro)
173  ((and (symbolp var)
174  #+allegro (cadr (assoc 'type (nth-value 2 (variable-information var env))))
175  #-allegro (cdr (assoc 'type (nth-value 2 (variable-information var env))))))
176  ((and (listp var)
177  (eq (car var) 'the)
178  (cadr var)))))
179 
180 (defun variable-type* (var &optional env)
181  (let ((type (variable-type var env)))
182  (cond
183  ((null type) nil)
184  ((subtypep type 'string) 'string)
185  ((subtypep type 'octet-vector) 'octet-vector))))
186 
187 (defun check-skip-elems (elems)
188  (or (every (lambda (elem)
189  (or (characterp elem)
190  (and (consp elem)
191  (null (cddr elem))
192  (eq (first elem) 'not)
193  (characterp (second elem)))))
194  elems)
195  (error "'skip' takes only constant characters, or a cons starts with 'not'.")))
196 
197 (defun check-match-cases (cases)
198  (or (every (lambda (case)
199  (and (consp case)
200  (or (eq (car case) 'otherwise)
201  (stringp (car case)))))
202  cases)
203  (error "'match-case' takes only constant strings at the car position.~% ~S" cases)))
204 
205 
206 (defmacro bind ((symb &body bind-forms) &body body)
207  (declare (ignore symb bind-forms body)))
208 
209 (defmacro subseq* (data start &optional end)
210  `(subseq ,data ,start ,end))
211 (defmacro get-elem (form) form)
212 (defun ensure-char-elem (elem)
213  (if (characterp elem)
214  elem
215  (code-char elem)))
216 
217 (defmacro tagbody-with-match-failed (elem &body body)
218  (with-gensyms (block)
219  `(block ,block
220  (tagbody
221  (return-from ,block ,@body)
222  :match-failed
223  (error 'match-failed :elem ,elem)))))
224 
225 (defmacro parsing-macrolet ((elem data p end)
226  (&rest macros) &body body)
227  `(macrolet ((advance (&optional (step 1))
228  `(or (advance* ,step)
229  (go :eof)))
230  (advance* (&optional (step 1))
231  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
232  (incf ,',p ,step)
233  ,@(if (eql step 0)
234  ()
235  `((if (<= ,',end ,',p)
236  nil
237  (progn
238  (setq ,',elem
239  (aref ,',data ,',p))
240  t))))))
241  (advance-to (to)
242  `(or (advance-to* ,to)
243  (go :eof)))
244  (advance-to* (to)
245  (once-only (to)
246  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
247  (check-type ,to fixnum)
248  (setq ,',p ,to)
249  (if (<= ,',end ,',p)
250  nil
251  (progn
252  (setq ,',elem
253  (aref ,',data ,',p))
254  t)))))
255  (skip (&rest elems)
256  (check-skip-elems elems)
257  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
258  (if (skip-conditions ,',elem ,elems)
259  (advance)
260  (error 'match-failed
261  :elem ,',elem
262  :expected ',elems))))
263  (skip* (&rest elems)
264  (check-skip-elems elems)
265  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
266  (unless (eofp)
267  (loop
268  (unless (skip-conditions ,',elem ,elems)
269  (return))
270  (or (advance*) (go :eof))))))
271  (skip+ (&rest elems)
272  `(progn
273  (skip ,@elems)
274  (skip* ,@elems)))
275  (skip? (&rest elems)
276  (check-skip-elems elems)
277  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
278  (when (skip-conditions ,',elem ,elems)
279  (or (advance*) (go :eof)))))
280  (skip-until (fn)
281  `(loop until ,(if (symbolp fn)
282  `(,fn (get-elem ,',elem))
283  `(funcall ,fn (get-elem ,',elem)))
284  do (or (advance*) (go :eof))))
285  (skip-while (fn)
286  `(loop while ,(if (symbolp fn)
287  `(,fn (get-elem ,',elem))
288  `(funcall ,fn (get-elem ,',elem)))
289  do (or (advance*) (go :eof))))
290  (bind ((symb &body bind-forms) &body body)
291  (with-gensyms (start)
292  `(let ((,start ,',p))
293  (tagbody
294  ,@bind-forms
295  :eof)
296  (prog1
297  (let ((,symb (subseq* ,',data ,start ,',p)))
298  ,@body)
299  (when (eofp)
300  (go :eof))))))
301  (%match (&rest vectors)
302  `(%match-case
303  ,@(loop for vec in vectors
304  collect `(,vec))))
305  (match (&rest vectors)
306  `(block match-block
307  (tagbody
308  (return-from match-block (%match ,@vectors))
309  :match-failed
310  (error 'match-failed :elem ,',elem))))
311  (match? (&rest vectors)
312  (with-gensyms (start start-elem)
313  `(let ((,start ,',p)
314  (,start-elem ,',elem))
315  (block match?-block
316  (tagbody
317  (%match ,@vectors)
318  (return-from match?-block t)
319  :match-failed
320  (setq ,',p ,start
321  ,',elem ,start-elem))))))
322  (match-i (&rest vectors)
323  `(match-i-case
324  ,@(loop for vec in vectors
325  collect `(,vec))))
326  ,@macros)
327  #+sbcl (declare (sb-ext:muffle-conditions sb-ext:code-deletion-note))
328  (labels ((eofp ()
329  (declare (optimize (speed 3) (safety 0) (debug 0)))
330  (<= ,end ,p))
331  (current () (get-elem ,elem))
332  (peek (&key eof-value)
333  (declare (optimize (speed 3) (safety 0) (debug 0)))
334  (let ((len (length ,data)))
335  (declare (type fixnum len))
336  (if (or (eofp) (>= ,p (- ,end 1)) (= ,p (- len 1)))
337  eof-value
338  (aref ,data (+ 1 ,p)))))
339  (pos () (the fixnum ,p)))
340  (declare (inline eofp current pos))
341  ,@body)))
342 
343 (defmacro with-string-parsing ((data &key start end) &body body)
344  (with-gensyms (g-end elem p body-block)
345  (once-only (data)
346  `(let ((,elem #\Nul)
347  (,p ,(if start
348  `(or ,start 0)
349  0))
350  (,g-end ,(if end
351  `(or ,end (length ,data))
352  `(length ,data))))
353  (declare (type simple-string ,data)
354  (type fixnum ,p ,g-end)
355  (type character ,elem))
356  (parsing-macrolet (,elem ,data ,p ,g-end)
357  ((skip-conditions (elem-var elems)
358  `(or ,@(loop for el in elems
359  if (and (consp el)
360  (eq (car el) 'not))
361  collect `(not (char= ,(cadr el) ,elem-var))
362  else
363  collect `(char= ,el ,elem-var))))
364  (%match-case (&rest cases)
365  (check-match-cases cases)
366  `(prog1
367  (vector-case ,',elem (,',data)
368  ,@(if (find 'otherwise cases :key #'car :test #'eq)
369  cases
370  (append cases
371  '((otherwise (go :match-failed))))))
372  (when (eofp) (go :eof))))
373  (%match-i-case (&rest cases)
374  (check-match-cases cases)
375  `(prog1
376  (vector-case ,',elem (,',data :case-insensitive t)
377  ,@(if (find 'otherwise cases :key #'car :test #'eq)
378  cases
379  (append cases
380  '((otherwise (go :match-failed))))))
381  (when (eofp) (go :eof))))
382  (match-case
383  (&rest cases)
384  `(tagbody-with-match-failed ,',elem (%match-case ,@cases)))
385  (match-i-case
386  (&rest cases)
387  `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases))))
388  (block ,body-block
389  (tagbody
390  (when (eofp)
391  (go :eof))
392  (setq ,elem (aref ,data ,p))
393  (return-from ,body-block (progn ,@body))
394  :eof)))))))
395 
396 (defmacro with-octets-parsing ((data &key start end) &body body)
397  (with-gensyms (g-end elem p body-block)
398  (once-only (data)
399  `(let ((,elem 0)
400  (,p ,(if start
401  `(or ,start 0)
402  0))
403  (,g-end ,(if end
404  `(or ,end (length ,data))
405  `(length ,data))))
406  (declare (type octet-vector ,data)
407  (type fixnum ,p ,g-end)
408  (type (unsigned-byte 8) ,elem))
409  (parsing-macrolet (,elem ,data ,p ,g-end)
410  ((skip-conditions (elem-var elems)
411  `(or ,@(loop for el in elems
412  if (and (consp el)
413  (eq (car el) 'not))
414  collect `(not (= ,(char-code (cadr el)) ,elem-var))
415  else
416  collect `(= ,(char-code el) ,elem-var))))
417  (%match-case (&rest cases)
418  (check-match-cases cases)
419  (setf cases
420  (loop for case in cases
421  if (stringp (car case))
422  collect (cons (babel:string-to-octets (car case))
423  (cdr case))
424  else
425  collect case))
426  `(prog1
427  (vector-case ,',elem (,',data)
428  ,@(if (find 'otherwise cases :key #'car :test #'eq)
429  cases
430  (append cases
431  '((otherwise (go :match-failed))))))
432  (when (eofp) (go :eof))))
433  (%match-i-case (&rest cases)
434  (check-match-cases cases)
435  (setf cases
436  (loop for case in cases
437  if (stringp (car case))
438  collect (cons (babel:string-to-octets (car case))
439  (cdr case))
440  else
441  collect case))
442  `(prog1
443  (vector-case ,',elem (,',data :case-insensitive t)
444  ,@(if (find 'otherwise cases :key #'car :test #'eq)
445  cases
446  (append cases
447  '((otherwise (go :match-failed))))))
448  (when (eofp) (go :eof))))
449  (match-case
450  (&rest cases)
451  `(tagbody-with-match-failed ,',elem (%match-case ,@cases)))
452  (match-i-case
453  (&rest cases)
454  `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases))))
455  (block ,body-block
456  (tagbody
457  (when (eofp)
458  (go :eof))
459  (setq ,elem (aref ,data ,p))
460  (return-from ,body-block (progn ,@body))
461  :match-failed
462  (error 'match-failed :elem ,elem)
463  :eof)))))))
464 
465 (defmacro with-vector-parsing ((data &key (start 0) end) &body body &environment env)
466  (let ((data-type (variable-type* data env)))
467  (case data-type
468  (string `(with-string-parsing (,data :start ,start :end ,end) ,@body))
469  (octet-vector `(macrolet ((get-elem (form) `(code-char ,form))
470  (subseq* (data start &optional end)
471  `(babel:octets-to-string ,data :start ,start :end ,end)))
472  (with-octets-parsing (,data :start ,start :end ,end) ,@body)))
473  (otherwise (once-only (data)
474  `(etypecase ,data
475  (string (with-string-parsing (,data :start ,start :end ,end) ,@body))
476  (octet-vector (macrolet ((get-elem (form) `(code-char ,form))
477  (subseq* (data start &optional end)
478  `(babel:octets-to-string ,data :start ,start :end ,end)))
479  (with-octets-parsing (,data :start ,start :end ,end) ,@body)))))))))