changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/parse/bytes.lisp

changeset 357: 7c1383c08493
child: 5b6a2a8ba83e
author: Richard Westhaver <ellis@rwest.io>
date: Tue, 21 May 2024 22:20:29 -0400
permissions: -rw-r--r--
description: port xsubseq, proc-parse. work on http and clap
1 ;;; parse/bytes.lisp --- Procedural Parser
2 
3 ;; swiped from Fukamachi's proc-parser.lisp. Will re-implement at a later
4 ;; date.
5 
6 ;;; License:
7 ;; Copyright 2015 Eitaro Fukamachi
8 
9 ;; Redistribution and use in source and binary forms, with or without
10 ;; modification, are permitted provided that the following conditions are met:
11 
12 ;; 1. Redistributions of source code must retain the above copyright notice,
13 ;; this list of conditions and the following disclaimer.
14 
15 ;; 2. Redistributions in binary form must reproduce the above copyright
16 ;; notice, this list of conditions and the following disclaimer in the
17 ;; documentation and/or other materials provided with the distribution.
18 
19 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 ;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 ;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 ;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 ;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 ;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 ;; POSSIBILITY OF SUCH DAMAGE.
30 
31 ;;; Code:
32 (in-package :parse/bytes)
33 
34 (define-condition match-failed (error)
35  ((elem :initarg :elem
36  :initform nil)
37  (expected :initarg :expected
38  :initform nil))
39  (:report (lambda (condition stream)
40  (with-slots (elem expected) condition
41  (format stream
42  "Match failed~:[~;~:*: ~S~]~:[~;~:* (expected: ~{~S~^, ~})~]"
43  (ensure-char-elem elem) expected)))))
44 
45 (defun convert-case-conditions (var chars)
46  (cond
47  ((consp chars)
48  `(or ,@(loop for ch in chars
49  if (characterp ch)
50  collect `(char= ,var ,ch)
51  else
52  collect `(= ,var ,ch))))
53  ((eq chars 'otherwise)
54  t)
55  (t (if (characterp chars)
56  `(char= ,var ,chars)
57  `(= ,var ,chars)))))
58 
59 (defun typed-case-tagbodies (var &rest cases)
60  (cond
61  ((null cases) nil)
62  ((= 1 (length cases))
63  `((when ,(convert-case-conditions var (car (first cases)))
64  ,@(cdr (first cases)))))
65  ((and (= 2 (length cases))
66  (eq (car (second cases)) 'otherwise))
67  `((unless ,(convert-case-conditions var (car (first cases)))
68  ,@(cdr (second cases)))
69  ,@(cdr (first cases))))
70  (t
71  (let ((tags (make-array (length cases) :initial-contents (loop repeat (length cases)
72  collect (gensym))))
73  (end (gensym "END")))
74  `(,@(loop for (chars . body) in cases
75  for i from 0
76  collect `(when ,(convert-case-conditions var chars)
77  (go ,(aref tags i))))
78  ,@(loop for case in cases
79  for i from 0
80  append `(,(aref tags i)
81  ,@(cdr case)
82  (go ,end)))
83  ,end)))))
84 
85 (defmacro vector-case (elem-var vec-and-options &body cases)
86  (destructuring-bind (vec &key case-insensitive)
87  (ensure-cons vec-and-options)
88  (with-gensyms (otherwise end-tag vector-case-block)
89  (labels ((case-candidates (el)
90  (cond
91  ((not case-insensitive) el)
92  ((characterp el)
93  (cond
94  ((char<= #\a el #\z)
95  `(,el
96  ,(code-char
97  (- (char-code el)
98  #.(- (char-code #\a) (char-code #\A))))))
99  ((char<= #\A el #\Z)
100  `(,el
101  ,(code-char
102  (+ (char-code el)
103  #.(- (char-code #\a) (char-code #\A))))))
104  (t el)))
105  ((typep el '(unsigned-byte 8))
106  (cond
107  ((<= #.(char-code #\a) el #.(char-code #\z))
108  `(,el
109  ,(- el #.(- (char-code #\a) (char-code #\A)))))
110  ((<= #.(char-code #\A) el #.(char-code #\Z))
111  `(,el
112  ,(+ el #.(- (char-code #\a) (char-code #\A)))))
113  (t el)))
114  (t el)))
115  (build-case (i cases vec)
116  (when cases
117  (let ((map (make-hash-table)))
118  (map nil
119  (lambda (case)
120  (unless (vectorp (car case))
121  (error "The first element of cases must be a constant vector"))
122  (unless (<= (length (car case)) i)
123  (push case (gethash (aref (car case) i) map))))
124  cases)
125  (let (res-cases)
126  (maphash (lambda (el cases)
127  (let ((next-case (build-case (1+ i) cases vec)))
128  (cond
129  (next-case
130  (push
131  `(,(case-candidates el)
132  (unless (advance*)
133  ,(if (= (length (caar cases)) (1+ i))
134  `(progn ,@(cdr (car cases))
135  (go ,end-tag))
136  `(go :eof)))
137  ,@(apply #'typed-case-tagbodies elem-var
138  (append
139  next-case
140  `((otherwise (go ,otherwise))))))
141  res-cases))
142  (t
143  (push `(,(case-candidates el)
144  (advance*)
145  (return-from ,vector-case-block
146  (progn ,@(cdr (car cases)))))
147  res-cases)))))
148  map)
149  res-cases)))))
150  (let ((otherwise-case nil))
151  (when (eq (caar (last cases)) 'otherwise)
152  (setq otherwise-case (car (last cases))
153  cases (butlast cases)))
154  `(block ,vector-case-block
155  (tagbody
156  ,@(apply #'typed-case-tagbodies elem-var
157  (append
158  (build-case 0 cases vec)
159  `((otherwise (go ,otherwise)))))
160  (go ,end-tag)
161  ,otherwise
162  ,@(when otherwise-case
163  `(unless (eofp)
164  (return-from ,vector-case-block
165  (progn ,@(cdr otherwise-case)))))
166  ,end-tag)))))))
167 
168 (defun variable-type (var &optional env)
169  (declare (ignorable env))
170  (cond
171  ((constantp var) (type-of var))
172  #+(or sbcl openmcl cmu allegro)
173  ((and (symbolp var)
174  #+allegro (cadr (assoc 'type (nth-value 2 (variable-information var env))))
175  #-allegro (cdr (assoc 'type (nth-value 2 (variable-information var env))))))
176  ((and (listp var)
177  (eq (car var) 'the)
178  (cadr var)))))
179 
180 (deftype octets (&optional (len '*))
181  `(simple-array (unsigned-byte 8) (,len)))
182 
183 (defun variable-type* (var &optional env)
184  (let ((type (variable-type var env)))
185  (cond
186  ((null type) nil)
187  ((subtypep type 'string) 'string)
188  ((subtypep type 'octets) 'octets))))
189 
190 (defun check-skip-elems (elems)
191  (or (every (lambda (elem)
192  (or (characterp elem)
193  (and (consp elem)
194  (null (cddr elem))
195  (eq (first elem) 'not)
196  (characterp (second elem)))))
197  elems)
198  (error "'skip' takes only constant characters, or a cons starts with 'not'.")))
199 
200 (defun check-match-cases (cases)
201  (or (every (lambda (case)
202  (and (consp case)
203  (or (eq (car case) 'otherwise)
204  (stringp (car case)))))
205  cases)
206  (error "'match-case' takes only constant strings at the car position.~% ~S" cases)))
207 
208 
209 (defmacro bind ((symb &body bind-forms) &body body)
210  (declare (ignore symb bind-forms body)))
211 
212 (defmacro subseq* (data start &optional end)
213  `(subseq ,data ,start ,end))
214 (defmacro get-elem (form) form)
215 (defun ensure-char-elem (elem)
216  (if (characterp elem)
217  elem
218  (code-char elem)))
219 
220 (defmacro tagbody-with-match-failed (elem &body body)
221  (with-gensyms (block)
222  `(block ,block
223  (tagbody
224  (return-from ,block ,@body)
225  :match-failed
226  (error 'match-failed :elem ,elem)))))
227 
228 (defmacro parsing-macrolet ((elem data p end)
229  (&rest macros) &body body)
230  `(macrolet ((advance (&optional (step 1))
231  `(or (advance* ,step)
232  (go :eof)))
233  (advance* (&optional (step 1))
234  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
235  (incf ,',p ,step)
236  ,@(if (eql step 0)
237  ()
238  `((if (<= ,',end ,',p)
239  nil
240  (progn
241  (setq ,',elem
242  (aref ,',data ,',p))
243  t))))))
244  (advance-to (to)
245  `(or (advance-to* ,to)
246  (go :eof)))
247  (advance-to* (to)
248  (once-only (to)
249  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
250  (check-type ,to fixnum)
251  (setq ,',p ,to)
252  (if (<= ,',end ,',p)
253  nil
254  (progn
255  (setq ,',elem
256  (aref ,',data ,',p))
257  t)))))
258  (skip (&rest elems)
259  (check-skip-elems elems)
260  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
261  (if (skip-conditions ,',elem ,elems)
262  (advance)
263  (error 'match-failed
264  :elem ,',elem
265  :expected ',elems))))
266  (skip* (&rest elems)
267  (check-skip-elems elems)
268  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
269  (unless (eofp)
270  (loop
271  (unless (skip-conditions ,',elem ,elems)
272  (return))
273  (or (advance*) (go :eof))))))
274  (skip+ (&rest elems)
275  `(progn
276  (skip ,@elems)
277  (skip* ,@elems)))
278  (skip? (&rest elems)
279  (check-skip-elems elems)
280  `(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
281  (when (skip-conditions ,',elem ,elems)
282  (or (advance*) (go :eof)))))
283  (skip-until (fn)
284  `(loop until ,(if (symbolp fn)
285  `(,fn (get-elem ,',elem))
286  `(funcall ,fn (get-elem ,',elem)))
287  do (or (advance*) (go :eof))))
288  (skip-while (fn)
289  `(loop while ,(if (symbolp fn)
290  `(,fn (get-elem ,',elem))
291  `(funcall ,fn (get-elem ,',elem)))
292  do (or (advance*) (go :eof))))
293  (bind ((symb &body bind-forms) &body body)
294  (with-gensyms (start)
295  `(let ((,start ,',p))
296  (tagbody
297  ,@bind-forms
298  :eof)
299  (prog1
300  (let ((,symb (subseq* ,',data ,start ,',p)))
301  ,@body)
302  (when (eofp)
303  (go :eof))))))
304  (%match (&rest vectors)
305  `(%match-case
306  ,@(loop for vec in vectors
307  collect `(,vec))))
308  (match (&rest vectors)
309  `(block match-block
310  (tagbody
311  (return-from match-block (%match ,@vectors))
312  :match-failed
313  (error 'match-failed :elem ,',elem))))
314  (match? (&rest vectors)
315  (with-gensyms (start start-elem)
316  `(let ((,start ,',p)
317  (,start-elem ,',elem))
318  (block match?-block
319  (tagbody
320  (%match ,@vectors)
321  (return-from match?-block t)
322  :match-failed
323  (setq ,',p ,start
324  ,',elem ,start-elem))))))
325  (match-i (&rest vectors)
326  `(match-i-case
327  ,@(loop for vec in vectors
328  collect `(,vec))))
329  ,@macros)
330  #+sbcl (declare (sb-ext:muffle-conditions sb-ext:code-deletion-note))
331  (labels ((eofp ()
332  (declare (optimize (speed 3) (safety 0) (debug 0)))
333  (<= ,end ,p))
334  (current () (get-elem ,elem))
335  (peek (&key eof-value)
336  (declare (optimize (speed 3) (safety 0) (debug 0)))
337  (let ((len (length ,data)))
338  (declare (type fixnum len))
339  (if (or (eofp) (>= ,p (- ,end 1)) (= ,p (- len 1)))
340  eof-value
341  (aref ,data (+ 1 ,p)))))
342  (pos () (the fixnum ,p)))
343  (declare (inline eofp current pos))
344  ,@body)))
345 
346 (defmacro with-string-parsing ((data &key start end) &body body)
347  (with-gensyms (g-end elem p body-block)
348  (once-only (data)
349  `(let ((,elem #\Nul)
350  (,p ,(if start
351  `(or ,start 0)
352  0))
353  (,g-end ,(if end
354  `(or ,end (length ,data))
355  `(length ,data))))
356  (declare (type simple-string ,data)
357  (type fixnum ,p ,g-end)
358  (type character ,elem))
359  (parsing-macrolet (,elem ,data ,p ,g-end)
360  ((skip-conditions (elem-var elems)
361  `(or ,@(loop for el in elems
362  if (and (consp el)
363  (eq (car el) 'not))
364  collect `(not (char= ,(cadr el) ,elem-var))
365  else
366  collect `(char= ,el ,elem-var))))
367  (%match-case (&rest cases)
368  (check-match-cases cases)
369  `(prog1
370  (vector-case ,',elem (,',data)
371  ,@(if (find 'otherwise cases :key #'car :test #'eq)
372  cases
373  (append cases
374  '((otherwise (go :match-failed))))))
375  (when (eofp) (go :eof))))
376  (%match-i-case (&rest cases)
377  (check-match-cases cases)
378  `(prog1
379  (vector-case ,',elem (,',data :case-insensitive t)
380  ,@(if (find 'otherwise cases :key #'car :test #'eq)
381  cases
382  (append cases
383  '((otherwise (go :match-failed))))))
384  (when (eofp) (go :eof))))
385  (match-case
386  (&rest cases)
387  `(tagbody-with-match-failed ,',elem (%match-case ,@cases)))
388  (match-i-case
389  (&rest cases)
390  `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases))))
391  (block ,body-block
392  (tagbody
393  (when (eofp)
394  (go :eof))
395  (setq ,elem (aref ,data ,p))
396  (return-from ,body-block (progn ,@body))
397  :eof)))))))
398 
399 (defmacro with-octets-parsing ((data &key start end) &body body)
400  (with-gensyms (g-end elem p body-block)
401  (once-only (data)
402  `(let ((,elem 0)
403  (,p ,(if start
404  `(or ,start 0)
405  0))
406  (,g-end ,(if end
407  `(or ,end (length ,data))
408  `(length ,data))))
409  (declare (type octets ,data)
410  (type fixnum ,p ,g-end)
411  (type (unsigned-byte 8) ,elem))
412  (parsing-macrolet (,elem ,data ,p ,g-end)
413  ((skip-conditions (elem-var elems)
414  `(or ,@(loop for el in elems
415  if (and (consp el)
416  (eq (car el) 'not))
417  collect `(not (= ,(char-code (cadr el)) ,elem-var))
418  else
419  collect `(= ,(char-code el) ,elem-var))))
420  (%match-case (&rest cases)
421  (check-match-cases cases)
422  (setf cases
423  (loop for case in cases
424  if (stringp (car case))
425  collect (cons (babel:string-to-octets (car case))
426  (cdr case))
427  else
428  collect case))
429  `(prog1
430  (vector-case ,',elem (,',data)
431  ,@(if (find 'otherwise cases :key #'car :test #'eq)
432  cases
433  (append cases
434  '((otherwise (go :match-failed))))))
435  (when (eofp) (go :eof))))
436  (%match-i-case (&rest cases)
437  (check-match-cases cases)
438  (setf cases
439  (loop for case in cases
440  if (stringp (car case))
441  collect (cons (babel:string-to-octets (car case))
442  (cdr case))
443  else
444  collect case))
445  `(prog1
446  (vector-case ,',elem (,',data :case-insensitive t)
447  ,@(if (find 'otherwise cases :key #'car :test #'eq)
448  cases
449  (append cases
450  '((otherwise (go :match-failed))))))
451  (when (eofp) (go :eof))))
452  (match-case
453  (&rest cases)
454  `(tagbody-with-match-failed ,',elem (%match-case ,@cases)))
455  (match-i-case
456  (&rest cases)
457  `(tagbody-with-match-failed ,',elem (%match-i-case ,@cases))))
458  (block ,body-block
459  (tagbody
460  (when (eofp)
461  (go :eof))
462  (setq ,elem (aref ,data ,p))
463  (return-from ,body-block (progn ,@body))
464  :match-failed
465  (error 'match-failed :elem ,elem)
466  :eof)))))))
467 
468 (defmacro with-vector-parsing ((data &key (start 0) end) &body body &environment env)
469  (let ((data-type (variable-type* data env)))
470  (case data-type
471  (string `(with-string-parsing (,data :start ,start :end ,end) ,@body))
472  (octets `(macrolet ((get-elem (form) `(code-char ,form))
473  (subseq* (data start &optional end)
474  `(babel:octets-to-string ,data :start ,start :end ,end)))
475  (with-octets-parsing (,data :start ,start :end ,end) ,@body)))
476  (otherwise (once-only (data)
477  `(etypecase ,data
478  (string (with-string-parsing (,data :start ,start :end ,end) ,@body))
479  (octets (macrolet ((get-elem (form) `(code-char ,form))
480  (subseq* (data start &optional end)
481  `(babel:octets-to-string ,data :start ,start :end ,end)))
482  (with-octets-parsing (,data :start ,start :end ,end) ,@body)))))))))