changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/std/num/parse.lisp

changeset 122: 4ba88cac5bc7
child: f3d814fb136a
author: ellis <ellis@rwest.io>
date: Sun, 24 Dec 2023 01:06:20 -0500
permissions: -rw-r--r--
description: num/parse, added DAT system, net/fetch, time/local, refactored trees
1 (in-package :std)
2 
3 (define-condition invalid-number (parse-error)
4  ((value :reader invalid-number-value
5  :initarg :value
6  :initform nil)
7  (reason :reader invalid-number-reason
8  :initarg :reason
9  :initform "Not specified"))
10  (:report (lambda (c s)
11  (format s "Invalid number: ~S [Reason: ~A]"
12  (invalid-number-value c)
13  (invalid-number-reason c)))))
14 
15 (declaim (type cons *white-space-characters*))
16 (defparameter *white-space-characters*
17  '(#\Space #\Tab #\Return #\Linefeed)
18  "A list of all of the whitespace characters.")
19 
20 (declaim (inline white-space-p))
21 (defun white-space-p (x)
22  "Is the given character a whitespace character?"
23  (declare (optimize (speed 3) (safety 0))
24  (type character x))
25  (and (find x *white-space-characters*) t))
26 
27 (declaim (inline parse-integer-and-places))
28 (defun parse-integer-and-places (string start end &key (radix 10))
29  "Parse an integer and return a 'parsed-integer'. This is an object
30  whose numerical value can be accessed with the function
31  number-value and whose length can be accessed with the function
32  place."
33  (declare (optimize (speed 3) (safety 1))
34  (type string string)
35  (type fixnum start end radix))
36  (multiple-value-bind (integer end-pos)
37  (if (= start end)
38  (values 0 0)
39  (parse-integer string
40  :start start
41  :end end
42  :radix radix))
43  ;; cl:parse-integer will consume trailing whitespace, thus end-pos may be
44  ;; larger than the number of digits. Instead of trimming whitespace
45  ;; beforehand we count it here
46  (let ((relevant-digits (- end-pos start
47  (loop :for pos :from (- end-pos 1) :downto start
48  :while (member (char string pos)
49  *white-space-characters*)
50  :count 1))))
51  (cons integer relevant-digits))))
52 
53 (defun parse-integers (string start end splitting-points &key (radix 10))
54  "Parse a string containing multiple integers where SPLITTING-POINTS
55  is a list of locations where each location is inbetween
56  consecutive integers. This will return a list of parsed-integers.
57  The last parsed-integer will have a negative value for its length."
58  (declare (optimize (speed 3) (safety 1))
59  (type string string)
60  (type fixnum start end radix))
61  (values-list (loop for left = start then (1+ right)
62  for point in splitting-points
63  for right = point
64  collect (parse-integer-and-places string
65  left
66  right
67  :radix radix)
68  into integers
69  finally (return
70  (nconc integers
71  (list
72  (parse-integer-and-places string
73  left
74  end
75  :radix radix
76  )))))))
77 
78 (declaim (inline number-value places))
79 (defun number-value (x) "Get the value of a parsed-integer." (car x))
80 (defun places (x) "Get the length of a parsed-integer." (cdr x))
81 
82 ;; Numbers which could've been parsed, but intentionally crippled not to:
83 ;; #xFF.AA
84 ;; #o12e3
85 
86 ;; Numbers which CL doesn't parse, but this does:
87 ;; #10r3.2
88 ;; #2r 11
89 
90 (defun parse-number (string &key (start 0) (end nil) (radix 10)
91  ((:float-format *read-default-float-format*)
92  *read-default-float-format*))
93  "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec."
94  (flet ((invalid-number (reason)
95  (error 'invalid-number
96  :value (subseq string start end)
97  :reason reason)))
98  (let ((end (or end (length string))))
99  (if (and (eql (char string start) #\#)
100  (member (char string (1+ start)) '(#\C #\c)))
101  (let ((\(-pos (position #\( string :start start :end end))
102  (\)-pos (position #\) string :start start :end end)))
103  (when (or (not \(-pos)
104  (not \)-pos)
105  (position #\( string :start (1+ \(-pos) :end end)
106  (position #\) string :start (1+ \)-pos) :end end))
107  (invalid-number "Mismatched/missing parenthesis"))
108  (let ((real-pos (position-if-not #'white-space-p string
109  :start (1+ \(-pos) :end \)-pos)))
110  (unless real-pos
111  (invalid-number "Missing real part"))
112  (let ((delimiting-space (position-if #'white-space-p string
113  :start (1+ real-pos)
114  :end \)-pos)))
115  (unless delimiting-space
116  (invalid-number "Missing imaginary part"))
117  (let ((img-pos (position-if-not #'white-space-p string
118  :start (1+ delimiting-space)
119  :end \)-pos)))
120  (unless img-pos
121  (invalid-number "Missing imaginary part"))
122  (let ((img-end-pos (position-if #'white-space-p string
123  :start (1+ img-pos)
124  :end \)-pos)))
125  (complex (parse-real-number string
126  :start real-pos
127  :end delimiting-space
128  :radix radix)
129  (parse-real-number string
130  :start img-pos
131  :end (or img-end-pos
132  \)-pos)
133  :radix radix)))))))
134  (parse-real-number string :start start :end end :radix radix)))))
135 
136 (defun parse-real-number (string &key (start 0) (end nil) (radix 10)
137  ((:float-format *read-default-float-format*)
138  *read-default-float-format*))
139  "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec -- except for complex numbers."
140  (let ((end (or end (length string))))
141  (case (char string start)
142  ((#\-)
143  (* -1 (parse-positive-real-number string
144  :start (1+ start)
145  :end end
146  :radix radix)))
147  ((#\+)
148  (parse-positive-real-number string
149  :start (1+ start)
150  :end end
151  :radix radix))
152  ((#\#)
153  (case (char string (1+ start))
154  ((#\x #\X)
155  (parse-real-number string
156  :start (+ start 2)
157  :end end
158  :radix 16))
159  ((#\b #\B)
160  (parse-real-number string
161  :start (+ start 2)
162  :end end
163  :radix 2))
164  ((#\o #\O)
165  (parse-real-number string
166  :start (+ start 2)
167  :end end
168  :radix 8))
169  (t (if (digit-char-p (char string (1+ start)))
170  (let ((r-pos (position #\r string
171  :start (1+ start)
172  :end end
173  :key #'char-downcase)))
174  (unless r-pos
175  (error 'invalid-number
176  :value (subseq string start end)
177  :reason "Missing R in #radixR"))
178  (parse-real-number string
179  :start (1+ r-pos)
180  :end end
181  :radix (parse-integer string
182  :start (1+ start)
183  :end r-pos)))))))
184  (t (parse-positive-real-number string
185  :start start
186  :end end
187  :radix radix)))))
188 
189 (defun base-for-exponent-marker (char)
190  "Return the base for an exponent-marker."
191  (case char
192  ((#\d #\D)
193  10.0d0)
194  ((#\e #\E)
195  (coerce 10 *read-default-float-format*))
196  ((#\f #\F)
197  10.0f0)
198  ((#\s #\S)
199  10.0s0)
200  ((#\l #\L)
201  10.0l0)))
202 
203 (defun make-float/frac (radix exp-marker whole-place frac-place exp-place)
204  "Create a float using EXP-MARKER as the exponent-marker and the
205  parsed-integers WHOLE-PLACE, FRAC-PLACE, and EXP-PLACE as the
206  integer part, fractional part, and exponent respectively."
207  (let* ((base (base-for-exponent-marker exp-marker))
208  (exp (expt base (number-value exp-place))))
209  (+ (* exp (number-value whole-place))
210  (/ (* exp (number-value frac-place))
211  (expt (float radix base)
212  (places frac-place))))))
213 
214 (defun make-float/whole (exp-marker whole-place exp-place)
215  "Create a float where EXP-MARKER is the exponent-marker and the
216  parsed-integers WHOLE-PLACE and EXP-PLACE as the integer part and
217  the exponent respectively."
218  (* (number-value whole-place)
219  (expt (base-for-exponent-marker exp-marker)
220  (number-value exp-place))))
221 
222 (defun parse-positive-real-number (string &key (start 0) (end nil) (radix 10)
223  ((:float-format *read-default-float-format*)
224  *read-default-float-format*))
225  "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec -- except for complex numbers and negative numbers."
226  (let ((end (or end (length string)))
227  (first-char (char string start)))
228  (flet ((invalid-number (reason)
229  (error 'invalid-number
230  :value (subseq string start end)
231  :reason reason)))
232  (when (position-if #'white-space-p string
233  :start (or (position-if-not #'white-space-p string
234  :start start
235  :end end)
236  0)
237  :end (position-if-not #'white-space-p string
238  :start start
239  :end end
240  :from-end t))
241  (invalid-number "Whitespace inside the number"))
242  (case first-char
243  ((#\-)
244  (invalid-number "Invalid usage of -"))
245  ((#\/)
246  (invalid-number "/ at beginning of number"))
247  ((#\d #\D #\e #\E #\l #\L #\f #\F #\s #\S)
248  (when (= radix 10)
249  (invalid-number "Exponent-marker at beginning of number"))))
250  (let (/-pos .-pos exp-pos exp-marker)
251  (loop for index from start below end
252  for char = (char string index)
253  do (case char
254  ((#\/)
255  (if /-pos
256  (invalid-number "Multiple /'s in number")
257  (setf /-pos index)))
258  ((#\.)
259  (if .-pos
260  (invalid-number "Multiple .'s in number")
261  (setf .-pos index)))
262  ((#\e #\E #\f #\F #\s #\S #\l #\L #\d #\D)
263  ;; We should only execute this if the base is
264  ;; not used for the given radix (ie the digit
265  ;; e is valid in base 15 and up).
266  (when (>= (+ 10
267  (- (char-code (char-upcase char))
268  (char-code #\A)))
269  radix)
270  (when exp-pos
271  (invalid-number
272  "Multiple exponent-markers in number"))
273  (setf exp-pos index)
274  (setf exp-marker (char-downcase char)))))
275  when (eql index (1- end))
276  do (case char
277  ((#\/)
278  (invalid-number "/ at end of number"))
279  ((#\d #\D #\e #\E #\s #\S #\l #\L #\f #\F)
280  (when (= radix 10)
281  (invalid-number "Exponent-marker at end of number")))))
282  (cond ((and /-pos .-pos)
283  (invalid-number "Both . and / cannot be present simultaneously"))
284  ((and /-pos exp-pos)
285  (invalid-number "Both an exponent-marker and / cannot be present simultaneously"))
286  ((and .-pos exp-pos)
287  (if (< exp-pos .-pos)
288  (invalid-number "Exponent-markers must occur after . in number")
289  (if (/= radix 10)
290  (invalid-number "Only decimal numbers can contain exponent-markers or decimal points")
291  (multiple-value-bind (whole-place frac-place exp-place)
292  (parse-integers string start end
293  (list .-pos exp-pos)
294  :radix radix)
295  (make-float/frac radix exp-marker whole-place frac-place exp-place)))))
296  (exp-pos
297  (if (/= radix 10)
298  (invalid-number "Only decimals can contain exponent-markers")
299  (multiple-value-bind (whole-place exp-place)
300  (parse-integers string start end
301  (list exp-pos)
302  :radix radix)
303  (make-float/whole exp-marker whole-place exp-place))))
304  (/-pos
305  (multiple-value-bind (numerator denominator)
306  (parse-integers string start end
307  (list /-pos)
308  :radix radix)
309  (if (>= (number-value denominator) 0)
310  (/ (number-value numerator)
311  (number-value denominator))
312  (invalid-number "Misplaced - sign"))))
313  (.-pos
314  (if (/= radix 10)
315  (invalid-number "Only decimal numbers can contain decimal points")
316  (multiple-value-bind (whole-part frac-part)
317  (parse-integers string start end
318  (list .-pos)
319  :radix 10)
320  (cond
321  ((minusp (places frac-part))
322  (if (and (zerop (number-value whole-part))
323  (zerop (places whole-part)))
324  (invalid-number "Only the . is present")
325  (number-value whole-part)))
326  ((>= (number-value frac-part) 0)
327  (coerce (+ (number-value whole-part)
328  (/ (number-value frac-part)
329  (expt 10 (places frac-part))))
330  *read-default-float-format*))
331  (t
332  (invalid-number "Misplaced - sign"))))))
333  (t
334  (values (parse-integer string
335  :start start
336  :end end
337  :radix radix))))))))