core:lisp/std/num/parse.lisp

changelog shortlog graph tags branches changeset files revisions annotate raw help
Mercurial > core / lisp/std/num/parse.lisp

changeset 122:	4ba88cac5bc7
child:	f3d814fb136a
author:	ellis <ellis@rwest.io>
date:	Sun, 24 Dec 2023 01:06:20 -0500
permissions:	-rw-r--r--
description:	num/parse, added DAT system, net/fetch, time/local, refactored trees
     1 (in-package :std)
     2 
     3 (define-condition invalid-number (parse-error)
     4   ((value :reader invalid-number-value
     5           :initarg :value
     6           :initform nil)
     7    (reason :reader invalid-number-reason
     8            :initarg :reason
     9            :initform "Not specified"))
    10   (:report (lambda (c s)
    11              (format s "Invalid number: ~S [Reason: ~A]"
    12                      (invalid-number-value c)
    13                      (invalid-number-reason c)))))
    14 
    15 (declaim (type cons *white-space-characters*))
    16 (defparameter *white-space-characters*
    17   '(#\Space #\Tab #\Return #\Linefeed)
    18   "A list of all of the whitespace characters.")
    19 
    20 (declaim (inline white-space-p))
    21 (defun white-space-p (x)
    22   "Is the given character a whitespace character?"
    23   (declare (optimize (speed 3) (safety 0))
    24            (type character x))
    25   (and (find x *white-space-characters*) t))
    26 
    27 (declaim (inline parse-integer-and-places))
    28 (defun parse-integer-and-places (string start end &key (radix 10))
    29   "Parse an integer and return a 'parsed-integer'. This is an object
    30    whose numerical value can be accessed with the function
    31    number-value and whose length can be accessed with the function
    32    place."
    33   (declare (optimize (speed 3) (safety 1))
    34            (type string string)
    35            (type fixnum start end radix))
    36   (multiple-value-bind (integer end-pos)
    37       (if (= start end)
    38           (values 0 0)
    39           (parse-integer string
    40                          :start start
    41                          :end end
    42                          :radix radix))
    43     ;; cl:parse-integer will consume trailing whitespace, thus end-pos may be
    44     ;; larger than the number of digits. Instead of trimming whitespace
    45     ;; beforehand we count it here
    46     (let ((relevant-digits (- end-pos start
    47                               (loop :for pos :from (- end-pos 1) :downto start
    48                                  :while (member (char string pos)
    49                                                 *white-space-characters*)
    50                                  :count 1))))
    51       (cons integer relevant-digits))))
    52 
    53 (defun parse-integers (string start end splitting-points &key (radix 10))
    54   "Parse a string containing multiple integers where SPLITTING-POINTS
    55    is a list of locations where each location is inbetween
    56    consecutive integers. This will return a list of parsed-integers.
    57    The last parsed-integer will have a negative value for its length."
    58   (declare (optimize (speed 3) (safety 1))
    59            (type string string)
    60            (type fixnum start end radix))
    61   (values-list (loop for left = start then (1+ right)
    62                      for point in splitting-points
    63                      for right = point
    64                      collect (parse-integer-and-places string
    65                                                        left
    66                                                        right
    67                                                        :radix radix)
    68                      into integers
    69                      finally (return
    70                                (nconc integers
    71                                       (list
    72                                        (parse-integer-and-places string
    73                                                                  left
    74                                                                  end
    75                                                                  :radix radix
    76                                                                  )))))))
    77 
    78 (declaim (inline number-value places))
    79 (defun number-value (x) "Get the value of a parsed-integer." (car x))
    80 (defun places (x) "Get the length of a parsed-integer." (cdr x))
    81 
    82 ;; Numbers which could've been parsed, but intentionally crippled not to:
    83 ;; #xFF.AA
    84 ;; #o12e3
    85 
    86 ;; Numbers which CL doesn't parse, but this does:
    87 ;; #10r3.2
    88 ;; #2r  11
    89 
    90 (defun parse-number (string &key (start 0) (end nil) (radix 10)
    91                                  ((:float-format *read-default-float-format*)
    92                                   *read-default-float-format*))
    93   "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec."
    94   (flet ((invalid-number (reason)
    95            (error 'invalid-number
    96                   :value (subseq string start end)
    97                   :reason reason)))
    98     (let ((end (or end (length string))))
    99       (if (and (eql (char string start) #\#)
   100                (member (char string (1+ start)) '(#\C #\c)))
   101           (let ((\(-pos (position #\( string :start start :end end))
   102                 (\)-pos (position #\) string :start start :end end)))
   103             (when (or (not \(-pos)
   104                       (not \)-pos)
   105                       (position #\( string :start (1+ \(-pos) :end end)
   106                       (position #\) string :start (1+ \)-pos) :end end))
   107               (invalid-number "Mismatched/missing parenthesis"))
   108             (let ((real-pos (position-if-not #'white-space-p string
   109                                              :start (1+ \(-pos) :end \)-pos)))
   110               (unless real-pos
   111                 (invalid-number "Missing real part"))
   112               (let ((delimiting-space (position-if #'white-space-p string
   113                                                    :start (1+ real-pos)
   114                                                    :end \)-pos)))
   115                 (unless delimiting-space
   116                   (invalid-number "Missing imaginary part"))
   117                 (let ((img-pos (position-if-not #'white-space-p string
   118                                                 :start (1+ delimiting-space)
   119                                                 :end \)-pos)))
   120                   (unless img-pos
   121                     (invalid-number "Missing imaginary part"))
   122                   (let ((img-end-pos (position-if #'white-space-p string
   123                                                   :start (1+ img-pos)
   124                                                   :end \)-pos)))
   125                     (complex (parse-real-number string
   126                                                 :start real-pos
   127                                                 :end delimiting-space
   128                                                 :radix radix)
   129                              (parse-real-number string
   130                                                 :start img-pos
   131                                                 :end (or img-end-pos
   132                                                          \)-pos)
   133                                                 :radix radix)))))))
   134           (parse-real-number string :start start :end end :radix radix)))))
   135 
   136 (defun parse-real-number (string &key (start 0) (end nil) (radix 10)
   137                                       ((:float-format *read-default-float-format*)
   138                                        *read-default-float-format*))
   139   "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec -- except for complex numbers."
   140   (let ((end (or end (length string))))
   141     (case (char string start)
   142       ((#\-)
   143        (* -1 (parse-positive-real-number string
   144                                          :start (1+ start)
   145                                          :end end
   146                                          :radix radix)))
   147       ((#\+)
   148        (parse-positive-real-number string
   149                                    :start (1+ start)
   150                                    :end end
   151                                    :radix radix))
   152       ((#\#)
   153        (case (char string (1+ start))
   154          ((#\x #\X)
   155           (parse-real-number string
   156                              :start (+ start 2)
   157                              :end end
   158                              :radix 16))
   159          ((#\b #\B)
   160           (parse-real-number string
   161                              :start (+ start 2)
   162                              :end end
   163                              :radix 2))
   164          ((#\o #\O)
   165           (parse-real-number string
   166                              :start (+ start 2)
   167                              :end end
   168                              :radix 8))
   169          (t (if (digit-char-p (char string (1+ start)))
   170                 (let ((r-pos (position #\r string
   171                                        :start (1+ start)
   172                                        :end end
   173                                        :key #'char-downcase)))
   174                   (unless r-pos
   175                     (error 'invalid-number
   176                            :value (subseq string start end)
   177                            :reason "Missing R in #radixR"))
   178                   (parse-real-number string
   179                                      :start (1+ r-pos)
   180                                      :end end
   181                                      :radix (parse-integer string
   182                                                            :start (1+ start)
   183                                                            :end r-pos)))))))
   184       (t (parse-positive-real-number string
   185                                      :start start
   186                                      :end end
   187                                      :radix radix)))))
   188 
   189 (defun base-for-exponent-marker (char)
   190   "Return the base for an exponent-marker."
   191   (case char
   192     ((#\d #\D)
   193      10.0d0)
   194     ((#\e #\E)
   195      (coerce 10 *read-default-float-format*))
   196     ((#\f #\F)
   197      10.0f0)
   198     ((#\s #\S)
   199      10.0s0)
   200     ((#\l #\L)
   201      10.0l0)))
   202 
   203 (defun make-float/frac (radix exp-marker whole-place frac-place exp-place)
   204   "Create a float using EXP-MARKER as the exponent-marker and the
   205    parsed-integers WHOLE-PLACE, FRAC-PLACE, and EXP-PLACE as the
   206    integer part, fractional part, and exponent respectively."
   207   (let* ((base (base-for-exponent-marker exp-marker))
   208          (exp (expt base (number-value exp-place))))
   209     (+ (* exp (number-value whole-place))
   210        (/ (* exp (number-value frac-place))
   211           (expt (float radix base)
   212                 (places frac-place))))))
   213 
   214 (defun make-float/whole (exp-marker whole-place exp-place)
   215   "Create a float where EXP-MARKER is the exponent-marker and the
   216    parsed-integers WHOLE-PLACE and EXP-PLACE as the integer part and
   217    the exponent respectively."
   218   (* (number-value whole-place)
   219      (expt (base-for-exponent-marker exp-marker)
   220            (number-value exp-place))))
   221 
   222 (defun parse-positive-real-number (string &key (start 0) (end nil) (radix 10)
   223                                                ((:float-format *read-default-float-format*)
   224                                                 *read-default-float-format*))
   225   "Given a string, and start, end, and radix parameters, produce a number according to the syntax definitions in the Common Lisp Hyperspec -- except for complex numbers and negative numbers."
   226   (let ((end (or end (length string)))
   227         (first-char (char string start)))
   228     (flet ((invalid-number (reason)
   229              (error 'invalid-number
   230                     :value (subseq string start end)
   231                     :reason reason)))
   232       (when (position-if #'white-space-p string
   233                          :start (or (position-if-not #'white-space-p string
   234                                                      :start start
   235                                                      :end end)
   236                                     0)
   237                          :end   (position-if-not #'white-space-p string
   238                                                  :start start
   239                                                  :end end
   240                                                  :from-end t))
   241         (invalid-number "Whitespace inside the number"))
   242       (case first-char
   243         ((#\-)
   244          (invalid-number "Invalid usage of -"))
   245         ((#\/)
   246          (invalid-number "/ at beginning of number"))
   247         ((#\d #\D #\e #\E #\l #\L #\f #\F #\s #\S)
   248          (when (= radix 10)
   249            (invalid-number "Exponent-marker at beginning of number"))))
   250       (let (/-pos .-pos exp-pos exp-marker)
   251         (loop for index from start below end
   252               for char = (char string index)
   253               do (case char
   254                    ((#\/)
   255                     (if /-pos
   256                         (invalid-number "Multiple /'s in number")
   257                         (setf /-pos index)))
   258                    ((#\.)
   259                     (if .-pos
   260                         (invalid-number "Multiple .'s in number")
   261                         (setf .-pos index)))
   262                    ((#\e #\E #\f #\F #\s #\S #\l #\L #\d #\D)
   263                     ;; We should only execute this if the base is
   264                     ;; not used for the given radix (ie the digit
   265                     ;; e is valid in base 15 and up).
   266                     (when (>= (+ 10
   267                                  (- (char-code (char-upcase char))
   268                                     (char-code #\A)))
   269                               radix)
   270                       (when exp-pos
   271                         (invalid-number
   272                          "Multiple exponent-markers in number"))
   273                       (setf exp-pos index)
   274                       (setf exp-marker (char-downcase char)))))
   275               when (eql index (1- end))
   276               do (case char
   277                    ((#\/)
   278                     (invalid-number "/ at end of number"))
   279                    ((#\d #\D #\e #\E #\s #\S #\l #\L #\f #\F)
   280                     (when (= radix 10)
   281                       (invalid-number "Exponent-marker at end of number")))))
   282         (cond ((and /-pos .-pos)
   283                (invalid-number "Both . and / cannot be present simultaneously"))
   284               ((and /-pos exp-pos)
   285                (invalid-number "Both an exponent-marker and / cannot be present simultaneously"))
   286               ((and .-pos exp-pos)
   287                (if (< exp-pos .-pos)
   288                    (invalid-number "Exponent-markers must occur after . in number")
   289                    (if (/= radix 10)
   290                        (invalid-number "Only decimal numbers can contain exponent-markers or decimal points")
   291                        (multiple-value-bind (whole-place frac-place exp-place)
   292                            (parse-integers string start end
   293                                            (list .-pos exp-pos)
   294                                            :radix radix)
   295                          (make-float/frac radix exp-marker whole-place frac-place exp-place)))))
   296               (exp-pos
   297                (if (/= radix 10)
   298                    (invalid-number "Only decimals can contain exponent-markers")
   299                    (multiple-value-bind (whole-place exp-place)
   300                        (parse-integers string start end
   301                                        (list exp-pos)
   302                                        :radix radix)
   303                      (make-float/whole exp-marker whole-place exp-place))))
   304               (/-pos
   305                (multiple-value-bind (numerator denominator)
   306                    (parse-integers string start end
   307                                    (list /-pos)
   308                                    :radix radix)
   309                  (if (>= (number-value denominator) 0)
   310                      (/ (number-value numerator)
   311                         (number-value denominator))
   312                      (invalid-number "Misplaced - sign"))))
   313               (.-pos
   314                (if (/= radix 10)
   315                    (invalid-number "Only decimal numbers can contain decimal points")
   316                    (multiple-value-bind (whole-part frac-part)
   317                        (parse-integers string start end
   318                                        (list .-pos)
   319                                        :radix 10)
   320                      (cond
   321                        ((minusp (places frac-part))
   322                         (if (and (zerop (number-value whole-part))
   323                                  (zerop (places whole-part)))
   324                             (invalid-number "Only the . is present")
   325                             (number-value whole-part)))
   326                        ((>= (number-value frac-part) 0)
   327                         (coerce (+ (number-value whole-part)
   328                                    (/ (number-value frac-part)
   329                                       (expt 10 (places frac-part))))
   330                                 *read-default-float-format*))
   331                        (t
   332                         (invalid-number "Misplaced - sign"))))))
   333               (t
   334                (values (parse-integer string
   335                                       :start start
   336                                       :end end
   337                                       :radix radix))))))))