changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/io/xsubseq.lisp

changeset 357: 7c1383c08493
child: a37b1d3371fc
author: Richard Westhaver <ellis@rwest.io>
date: Tue, 21 May 2024 22:20:29 -0400
permissions: -rw-r--r--
description: port xsubseq, proc-parse. work on http and clap
1 ;;; io/xsubseq.lisp --- Subseq Optimizations
2 
3 ;; This is ported from Fukamachi's XSUBSEQ
4 
5 ;;; Code:
6 (defpackage io/xsubseq
7  (:use :cl)
8  (:import-from :sb-cltl2 :variable-information)
9  (:import-from :std/type :octet-vector)
10  (:export :xsubseq
11  :octet-xsubseq
12  :string-xsubseq
13  :concatenated-xsubseqs
14  :null-concatenated-xsubseqs
15  :octet-concatenated-xsubseqs
16  :string-concatenated-xsubseqs
17  :make-concatenated-xsubseqs
18  :xlength
19  :xnconc
20  :xnconcf
21  :coerce-to-sequence
22  :coerce-to-string
23  :with-xsubseqs))
24 (in-package :io/xsubseq)
25 
26 (defstruct (xsubseq (:constructor make-xsubseq (data start &optional (end (length data))
27  &aux (len (- end start)))))
28  (data nil)
29  (start 0 :type integer)
30  (end 0 :type integer)
31  (len 0 :type integer))
32 
33 (defstruct (octet-xsubseq (:include xsubseq)
34  (:constructor make-octet-xsubseq (data start &optional (end (length data))
35  &aux (len (- end start))))))
36 
37 (defstruct (string-xsubseq (:include xsubseq)
38  (:constructor make-string-xsubseq (data start &optional (end (length data))
39  &aux (len (- end start))))))
40 
41 (defstruct (concatenated-xsubseqs (:constructor %make-concatenated-xsubseqs))
42  (len 0 :type integer)
43  (last nil :type list)
44  (children nil :type list))
45 
46 (defun make-concatenated-xsubseqs (&rest children)
47  (if (null children)
48  (make-null-concatenated-xsubseqs)
49  (%make-concatenated-xsubseqs :children children
50  :last (last children)
51  :len (reduce #'+
52  children
53  :key #'xsubseq-len
54  :initial-value 0))))
55 
56 (defstruct (null-concatenated-xsubseqs (:include concatenated-xsubseqs)))
57 
58 (defstruct (octet-concatenated-xsubseqs (:include concatenated-xsubseqs)))
59 
60 (defstruct (string-concatenated-xsubseqs (:include concatenated-xsubseqs)))
61 
62 (defun xsubseq (data start &optional (end (length data)))
63  (typecase data
64  (octet-vector (make-octet-xsubseq data start end))
65  (string (make-string-xsubseq data start end))
66  (t (make-xsubseq data start end))))
67 
68 #+(or sbcl openmcl cmu allegro)
69 (define-compiler-macro xsubseq (&whole form &environment env data start &optional end)
70  (let ((type (cond
71  ((constantp data) (type-of data))
72  ((and (symbolp data)
73  (assoc 'type (nth-value 2 (variable-information data env)))))
74  ((and (listp data)
75  (eq (car data) 'make-string))
76  'string)
77  ((and (listp data)
78  (eq (car data) 'the)
79  (cadr data)))
80  ((and (listp data)
81  (eq (car data) 'make-array)
82  (null (cadr (member :adjustable data)))
83  (null (cadr (member :fill-pointer data)))
84  (cadr (member :element-type data))))))
85  (g-data (gensym "DATA")))
86  (if (null type)
87  form
88  (cond
89  ((subtypep type 'octet-vector) `(let ((,g-data ,data))
90  (make-octet-xsubseq ,g-data ,start ,(or end `(length ,g-data)))))
91  ((subtypep type 'string) `(let ((,g-data ,data))
92  (make-string-xsubseq ,g-data ,start ,(or end `(length ,g-data)))))
93  (t form)))))
94 
95 (defun %xnconc2 (seq1 seq2)
96  (flet ((seq-values (seq)
97  (if (concatenated-xsubseqs-p seq)
98  (values (concatenated-xsubseqs-children seq)
99  (concatenated-xsubseqs-last seq)
100  (concatenated-xsubseqs-len seq))
101  (let ((children (list seq)))
102  (values children children
103  (xsubseq-len seq))))))
104  (macrolet ((make-concatenated (type seq1 seq2)
105  `(multiple-value-bind (children last len)
106  (seq-values ,seq2)
107  (,(cond
108  ((eq type 'octet-vector) 'make-octet-concatenated-xsubseqs)
109  ((eq type 'string) 'make-string-concatenated-xsubseqs)
110  (t '%make-concatenated-xsubseqs))
111  :len (+ (xsubseq-len ,seq1) len)
112  :children (cons ,seq1 children)
113  :last last))))
114  (etypecase seq1
115  (null-concatenated-xsubseqs seq2)
116  (concatenated-xsubseqs
117  (multiple-value-bind (children last len)
118  (seq-values seq2)
119  (if (concatenated-xsubseqs-last seq1)
120  (progn
121  (rplacd (concatenated-xsubseqs-last seq1)
122  children)
123  (setf (concatenated-xsubseqs-last seq1) last)
124  (incf (concatenated-xsubseqs-len seq1) len))
125  ;; empty concatenated-xsubseqs
126  (progn
127  (setf (concatenated-xsubseqs-children seq1) children
128  (concatenated-xsubseqs-len seq1) len
129  (concatenated-xsubseqs-last seq1) last)))
130  seq1))
131  (octet-xsubseq
132  (make-concatenated octet-vector seq1 seq2))
133  (string-xsubseq
134  (make-concatenated string seq1 seq2))
135  (xsubseq (make-concatenated t seq1 seq2))))))
136 
137 (defun xnconc (subseq &rest more-subseqs)
138  (reduce #'%xnconc2 more-subseqs :initial-value subseq))
139 
140 (define-modify-macro xnconcf (subseq &rest more-subseqs) xnconc)
141 
142 (defun xlength (seq)
143  (etypecase seq
144  (xsubseq (xsubseq-len seq))
145  (concatenated-xsubseqs (concatenated-xsubseqs-len seq))))
146 
147 (defun coerce-to-sequence (seq)
148  (etypecase seq
149  (octet-concatenated-xsubseqs (octet-concatenated-xsubseqs-to-sequence seq))
150  (string-concatenated-xsubseqs (string-concatenated-xsubseqs-to-sequence seq))
151  (concatenated-xsubseqs (concatenated-xsubseqs-to-sequence seq))
152  (xsubseq (xsubseq-to-sequence seq))))
153 
154 #+(or sbcl openmcl cmu allegro)
155 (define-compiler-macro coerce-to-sequence (&whole form &environment env seq)
156  (let ((type (cond
157  ((constantp seq) (type-of seq))
158  ((and (symbolp seq)
159  (assoc 'type (nth-value 2 (variable-information seq env)))))
160  ((and (listp seq)
161  (eq (car seq) 'the)
162  (cadr seq))))))
163  (if (null type)
164  form
165  (cond
166  ((subtypep type 'octet-concatenated-xsubseqs) `(octet-concatenated-xsubseqs-to-sequence ,seq))
167  ((subtypep type 'string-concatenated-xsubseqs) `(string-concatenated-xsubseqs-to-sequence ,seq))
168  ((subtypep type 'concatenated-xsubseqs) `(concatenated-xsubseqs-to-sequence ,seq))
169  ((subtypep type 'xsubseq) `(xsubseq-to-sequence ,seq))
170  (t form)))))
171 
172 (defun coerce-to-string (seq)
173  (etypecase seq
174  (null-concatenated-xsubseqs "")
175  (octet-concatenated-xsubseqs (octet-concatenated-xsubseqs-to-string seq))
176  (string-concatenated-xsubseqs (string-concatenated-xsubseqs-to-sequence seq))
177  (octet-xsubseq (octet-xsubseq-to-string seq))
178  (string-xsubseq (xsubseq-to-sequence seq))))
179 
180 #+(or sbcl openmcl cmu allegro)
181 (define-compiler-macro coerce-to-string (&whole form &environment env seq)
182  (let ((type (cond
183  ((constantp seq) (type-of seq))
184  ((and (symbolp seq)
185  (assoc 'type (nth-value 2 (variable-information seq env)))))
186  ((and (listp seq)
187  (eq (car seq) 'the)
188  (cadr seq))))))
189  (if (null type)
190  form
191  (cond
192  ((subtypep type 'octet-concatenated-xsubseqs) `(octet-concatenated-xsubseqs-to-string ,seq))
193  ((subtypep type 'string-concatenated-xsubseqs) `(string-concatenated-xsubseqs-to-sequence ,seq))
194  ((subtypep type 'octet-xsubseq) `(octet-xsubseq-to-string ,seq))
195  ((subtypep type 'string-xsubseq) `(xsubseq-to-sequence ,seq))
196  (t form)))))
197 
198 (defun xsubseq-to-sequence (seq)
199  (let ((result (make-array (xsubseq-len seq)
200  :element-type
201  (array-element-type (xsubseq-data seq)))))
202  (replace result (xsubseq-data seq)
203  :start2 (xsubseq-start seq)
204  :end2 (xsubseq-end seq))
205  result))
206 
207 (defun octet-xsubseq-to-string (seq)
208  (let ((result (make-array (xsubseq-len seq)
209  :element-type 'character)))
210  (declare (type simple-string result))
211  (let ((data (xsubseq-data seq))
212  (end (xsubseq-end seq)))
213  (do ((i (xsubseq-start seq) (1+ i))
214  (j 0 (1+ j)))
215  ((= i end) result)
216  (setf (aref result j)
217  (code-char
218  (the (unsigned-byte 8)
219  (aref (the octet-vector data) i))))))))
220 
221 (defun concatenated-xsubseqs-to-sequence (seq)
222  (let ((result (make-array (concatenated-xsubseqs-len seq)
223  :element-type
224  (array-element-type (xsubseq-data (car (concatenated-xsubseqs-children seq)))))))
225  (loop with current-pos = 0
226  for seq in (concatenated-xsubseqs-children seq)
227  do (replace result (xsubseq-data seq)
228  :start1 current-pos
229  :start2 (xsubseq-start seq)
230  :end2 (xsubseq-end seq))
231  (incf current-pos
232  (xsubseq-len seq)))
233  result))
234 
235 (defun octet-concatenated-xsubseqs-to-sequence (seq)
236  (let ((result (make-array (concatenated-xsubseqs-len seq)
237  :element-type '(unsigned-byte 8))))
238  (declare (type octet-vector result))
239  (loop with current-pos of-type integer = 0
240  for seq in (concatenated-xsubseqs-children seq)
241  do (replace result (the octet-vector (xsubseq-data seq))
242  :start1 current-pos
243  :start2 (xsubseq-start seq)
244  :end2 (xsubseq-end seq))
245  (incf current-pos
246  (xsubseq-len seq)))
247  result))
248 
249 (defun octet-concatenated-xsubseqs-to-string (seq)
250  (let ((result (make-array (concatenated-xsubseqs-len seq)
251  :element-type 'character)))
252  (declare (type simple-string result))
253  (loop with current-pos = 0
254  for seq in (concatenated-xsubseqs-children seq)
255  do (do ((i (xsubseq-start seq) (1+ i))
256  (j current-pos (1+ j)))
257  ((= i (xsubseq-end seq))
258  (setf current-pos j))
259  (setf (aref result j)
260  (code-char
261  (the (unsigned-byte 8)
262  (aref (the octet-vector (xsubseq-data seq)) i))))))
263  result))
264 
265 (defun string-concatenated-xsubseqs-to-sequence (seq)
266  (let ((result (make-string (concatenated-xsubseqs-len seq))))
267  (declare (type simple-string result))
268  (loop with current-pos of-type integer = 0
269  for seq in (concatenated-xsubseqs-children seq)
270  do (replace result (the simple-string (xsubseq-data seq))
271  :start1 current-pos
272  :start2 (xsubseq-start seq)
273  :end2 (xsubseq-end seq))
274  (incf current-pos
275  (xsubseq-len seq)))
276  result))
277 
278 (defmacro with-xsubseqs ((xsubseqs &key initial-value) &body body)
279  `(let ((,xsubseqs ,(or initial-value
280  `(make-null-concatenated-xsubseqs))))
281  ,@body
282 
283  (typecase ,xsubseqs
284  (null-concatenated-xsubseqs nil)
285  (xsubseq (xsubseq-to-sequence ,xsubseqs))
286  (t (concatenated-xsubseqs-to-sequence ,xsubseqs)))))