changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/std/num/parse.lisp

changeset 698: 96958d3eb5b0
parent: 0af47621fa8b
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 ;;; std/num/parse.lisp --- Number parsing functions
2 
3 ;;
4 
5 ;;; Code:
6 (in-package :std/num)
7 (declaim (optimize (speed 3) (safety 3)))
8 (define-condition invalid-number (parse-error)
9  ((value :reader invalid-number-value
10  :initarg :value
11  :initform nil)
12  (reason :reader invalid-number-reason
13  :initarg :reason
14  :initform "Not specified"))
15  (:report (lambda (c s)
16  (format s "Invalid number: ~S [Reason: ~A]"
17  (invalid-number-value c)
18  (invalid-number-reason c)))))
19 
20 (declaim (type (cons character) *white-space-characters*))
21 (defparameter *white-space-characters*
22  (list #\Space #\Tab #\Return #\Linefeed)
23  "A list of all of the whitespace characters.")
24 
25 (declaim (inline white-space-p))
26 (defun white-space-p (x)
27  "Is the given character a whitespace character?"
28  (declare (optimize (speed 3) (safety 0))
29  (type character x))
30  (and (find x *white-space-characters*) t))
31 
32 (declaim (inline parse-integer-and-places))
33 (defun parse-integer-and-places (string start end &key (radix 10))
34  "Parse an integer and return a 'parsed-integer'. This is an object
35  whose numerical value can be accessed with the function
36  number-value and whose length can be accessed with the function
37  place."
38  (declare (optimize (speed 3) (safety 1))
39  (type simple-string string)
40  (fixnum start end radix))
41  (multiple-value-bind (integer end-pos)
42  (if (= start end)
43  (values 0 0)
44  (parse-integer string
45  :start start
46  :end end
47  :radix radix))
48  (declare (fixnum integer end-pos))
49  ;; cl:parse-integer will consume trailing whitespace, thus end-pos may be
50  ;; larger than the number of digits. Instead of trimming whitespace
51  ;; beforehand we count it here
52  (let* ((count (loop for pos from (- end-pos 1) downto start
53  while (member (char string pos)
54  *white-space-characters*
55  :test 'char=)
56  :count 1))
57  (relevant-digits (the fixnum (- end-pos start count))))
58  (cons integer relevant-digits))))
59 
60 (defun parse-integers (string start end splitting-points &key (radix 10))
61  "Parse a string containing multiple integers where SPLITTING-POINTS
62  is a list of locations where each location is inbetween
63  consecutive integers. This will return a list of parsed-integers.
64  The last parsed-integer will have a negative value for its length."
65  (declare (optimize (speed 3) (safety 1))
66  (type simple-string string)
67  (fixnum start end radix)
68  (list splitting-points))
69  (values-list (loop for left = start then (1+ right)
70  for point in splitting-points
71  for right = point
72  collect (parse-integer-and-places string
73  left
74  right
75  :radix radix)
76  into integers
77  finally (return
78  (nconc integers
79  (list
80  (parse-integer-and-places string
81  left
82  end
83  :radix radix
84  )))))))
85 
86 (declaim (inline number-value places))
87 (defun number-value (x) "Get the value of a parsed-integer." (car x))
88 (defun places (x) "Get the length of a parsed-integer." (cdr x))
89 
90 ;; Numbers which could've been parsed, but intentionally crippled not to:
91 ;; #xFF.AA
92 ;; #o12e3
93 
94 ;; Numbers which CL doesn't parse, but this does:
95 ;; #10r3.2
96 ;; #2r 11
97 (defun parse-number (string &key (start 0) (end nil) (radix 10)
98  ((:float-format *read-default-float-format*)
99  *read-default-float-format*))
100  "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec."
101  (declare (type simple-string string))
102  (flet ((invalid-number (reason)
103  (error 'invalid-number
104  :value (subseq string start end)
105  :reason reason)))
106  (let ((end (or end (length string))))
107  (declare (fixnum start end radix))
108  (if (and (eql (char string start) #\#)
109  (member (char string (1+ start)) '(#\C #\c)))
110  (let ((\(-pos (position #\( string :start start :end end))
111  (\)-pos (position #\) string :start start :end end)))
112  (when (or (not \(-pos)
113  (not \)-pos)
114  (position #\( string :start (1+ \(-pos) :end end)
115  (position #\) string :start (1+ \)-pos) :end end))
116  (invalid-number "Mismatched/missing parenthesis"))
117 
118  (let ((real-pos (position-if-not #'white-space-p string
119  :start (1+ \(-pos) :end \)-pos)))
120  (unless real-pos
121  (invalid-number "Missing real part"))
122  (let ((delimiting-space (position-if #'white-space-p string
123  :start (1+ real-pos)
124  :end \)-pos)))
125  (unless delimiting-space
126  (invalid-number "Missing imaginary part"))
127  (let ((img-pos (position-if-not #'white-space-p string
128  :start (1+ delimiting-space)
129  :end \)-pos)))
130  (unless img-pos
131  (invalid-number "Missing imaginary part"))
132  (let ((img-end-pos (position-if #'white-space-p string
133  :start (1+ img-pos)
134  :end \)-pos)))
135  (complex (parse-real-number string
136  :start real-pos
137  :end delimiting-space
138  :radix radix)
139  (parse-real-number string
140  :start img-pos
141  :end (or img-end-pos
142  \)-pos)
143  :radix radix)))))))
144  (parse-real-number string :start start :end end :radix radix)))))
145 
146 (defun parse-real-number (string &key (start 0) (end nil) (radix 10)
147  ((:float-format *read-default-float-format*)
148  *read-default-float-format*))
149  "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec -- except for complex numbers."
150  (declare (simple-string string))
151  (let ((end (or end (length string))))
152  (case (char string start)
153  ((#\-)
154  (* -1 (the fixnum (parse-positive-real-number string
155  :start (1+ start)
156  :end end
157  :radix radix))))
158  ((#\+)
159  (parse-positive-real-number string
160  :start (1+ start)
161  :end end
162  :radix radix))
163  ((#\#)
164  (case (char string (1+ start))
165  ((#\x #\X)
166  (parse-real-number string
167  :start (+ start 2)
168  :end end
169  :radix 16))
170  ((#\b #\B)
171  (parse-real-number string
172  :start (+ start 2)
173  :end end
174  :radix 2))
175  ((#\o #\O)
176  (parse-real-number string
177  :start (+ start 2)
178  :end end
179  :radix 8))
180  (t (if (digit-char-p (char string (1+ start)))
181  (let ((r-pos (position #\r string
182  :start (1+ start)
183  :end end
184  :key #'char-downcase)))
185  (unless r-pos
186  (error 'invalid-number
187  :value (subseq string start end)
188  :reason "Missing R in #radixR"))
189  (parse-real-number string
190  :start (1+ r-pos)
191  :end end
192  :radix (parse-integer string
193  :start (1+ start)
194  :end r-pos)))))))
195  (t (parse-positive-real-number string
196  :start start
197  :end end
198  :radix radix)))))
199 
200 (defun base-for-exponent-marker (char)
201  "Return the base for an exponent-marker."
202  (case char
203  ((#\d #\D)
204  10.0d0)
205  ((#\e #\E)
206  (coerce 10 *read-default-float-format*))
207  ((#\f #\F)
208  10.0f0)
209  ((#\s #\S)
210  10.0s0)
211  ((#\l #\L)
212  10.0l0)))
213 
214 (defun make-float/frac (radix exp-marker whole-place frac-place exp-place)
215  "Create a float using EXP-MARKER as the exponent-marker and the
216  parsed-integers WHOLE-PLACE, FRAC-PLACE, and EXP-PLACE as the
217  integer part, fractional part, and exponent respectively."
218  (declare (fixnum radix))
219  (let* ((base (base-for-exponent-marker exp-marker))
220  (exp (expt base (number-value exp-place))))
221  (+ (* exp (number-value whole-place))
222  (/ (* exp (number-value frac-place))
223  (expt (float radix base)
224  (places frac-place))))))
225 
226 (defun make-float/whole (exp-marker whole-place exp-place)
227  "Create a float where EXP-MARKER is the exponent-marker and the
228  parsed-integers WHOLE-PLACE and EXP-PLACE as the integer part and
229  the exponent respectively."
230  (* (number-value whole-place)
231  (expt (base-for-exponent-marker exp-marker)
232  (number-value exp-place))))
233 
234 (defun parse-positive-real-number (string &key (start 0) (end nil) (radix 10)
235  ((:float-format *read-default-float-format*)
236  *read-default-float-format*))
237  "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec -- except for complex numbers and negative numbers."
238  (declare (simple-string string)
239  (fixnum radix))
240  (let ((end (or end (length string)))
241  (first-char (char string start)))
242  (flet ((invalid-number (reason)
243  (error 'invalid-number
244  :value (subseq string start end)
245  :reason reason)))
246  (when (position-if #'white-space-p string
247  :start (or (position-if-not #'white-space-p string
248  :start start
249  :end end)
250  0)
251  :end (position-if-not #'white-space-p string
252  :start start
253  :end end
254  :from-end t))
255  (invalid-number "Whitespace inside the number"))
256  (case first-char
257  ((#\-)
258  (invalid-number "Invalid usage of -"))
259  ((#\/)
260  (invalid-number "/ at beginning of number"))
261  ((#\d #\D #\e #\E #\l #\L #\f #\F #\s #\S)
262  (when (= radix 10)
263  (invalid-number "Exponent-marker at beginning of number"))))
264  (let (/-pos .-pos exp-pos exp-marker)
265  (loop for index from start below end
266  for char = (char string index)
267  do (case char
268  ((#\/)
269  (if /-pos
270  (invalid-number "Multiple /'s in number")
271  (setf /-pos index)))
272  ((#\.)
273  (if .-pos
274  (invalid-number "Multiple .'s in number")
275  (setf .-pos index)))
276  ((#\e #\E #\f #\F #\s #\S #\l #\L #\d #\D)
277  ;; We should only execute this if the base is
278  ;; not used for the given radix (ie the digit
279  ;; e is valid in base 15 and up).
280  (when (>= (+ 10
281  (- (char-code (char-upcase char))
282  (char-code #\A)))
283  radix)
284  (when exp-pos
285  (invalid-number
286  "Multiple exponent-markers in number"))
287  (setf exp-pos index)
288  (setf exp-marker (char-downcase char)))))
289  when (eql index (1- end))
290  do (case char
291  ((#\/)
292  (invalid-number "/ at end of number"))
293  ((#\d #\D #\e #\E #\s #\S #\l #\L #\f #\F)
294  (when (= radix 10)
295  (invalid-number "Exponent-marker at end of number")))))
296  (cond ((and /-pos .-pos)
297  (invalid-number "Both . and / cannot be present simultaneously"))
298  ((and /-pos exp-pos)
299  (invalid-number "Both an exponent-marker and / cannot be present simultaneously"))
300  ((and .-pos exp-pos)
301  (if (< exp-pos .-pos)
302  (invalid-number "Exponent-markers must occur after . in number")
303  (if (/= radix 10)
304  (invalid-number "Only decimal numbers can contain exponent-markers or decimal points")
305  (multiple-value-bind (whole-place frac-place exp-place)
306  (parse-integers string start end
307  (list .-pos exp-pos)
308  :radix radix)
309  (make-float/frac radix exp-marker whole-place frac-place exp-place)))))
310  (exp-pos
311  (if (/= radix 10)
312  (invalid-number "Only decimals can contain exponent-markers")
313  (multiple-value-bind (whole-place exp-place)
314  (parse-integers string start end
315  (list exp-pos)
316  :radix radix)
317  (make-float/whole exp-marker whole-place exp-place))))
318  (/-pos
319  (multiple-value-bind (numerator denominator)
320  (parse-integers string start end
321  (list /-pos)
322  :radix radix)
323  (if (>= (number-value denominator) 0)
324  (/ (number-value numerator)
325  (number-value denominator))
326  (invalid-number "Misplaced - sign"))))
327  (.-pos
328  (if (/= radix 10)
329  (invalid-number "Only decimal numbers can contain decimal points")
330  (multiple-value-bind (whole-part frac-part)
331  (parse-integers string start end
332  (list .-pos)
333  :radix 10)
334  (cond
335  ((minusp (places frac-part))
336  (if (and (zerop (number-value whole-part))
337  (zerop (places whole-part)))
338  (invalid-number "Only the . is present")
339  (number-value whole-part)))
340  ((>= (number-value frac-part) 0)
341  (coerce (+ (number-value whole-part)
342  (/ (number-value frac-part)
343  (expt 10 (places frac-part))))
344  *read-default-float-format*))
345  (t
346  (invalid-number "Misplaced - sign"))))))
347  (t
348  (values (parse-integer string
349  :start start
350  :end end
351  :radix radix))))))))