changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / rust/lib/sxp/src/err.rs

changeset 698: 96958d3eb5b0
parent: 3d78bed56188
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 //! err.rs --- When serializing or deserializing SXP goes wrong...
2 
3 use crate::io;
4 use alloc::{
5  boxed::Box,
6  string::{String, ToString},
7 };
8 use core::{
9  fmt::{self, Debug, Display},
10  result,
11  str::FromStr,
12 };
13 use serde::{de, ser};
14 #[cfg(feature = "std")]
15 use std::error;
16 #[cfg(feature = "std")]
17 use std::io::ErrorKind;
18 
19 pub type Result<T> = result::Result<T, Error>;
20 
21 /// This type represents all possible errors that can occur when serializing or
22 /// deserializing SXP data.
23 pub struct Error {
24  /// This `Box` allows us to keep the size of `Error` as small as possible. A
25  /// larger `Error` type was substantially slower due to all the functions
26  /// that pass around `Result<T, Error>`.
27  err: Box<ErrorImpl>,
28 }
29 
30 impl Error {
31  /// One-based line number at which the error was detected.
32  ///
33  /// Characters in the first line of the input (before the first newline
34  /// character) are in line 1.
35  pub fn line(&self) -> usize {
36  self.err.line
37  }
38 
39  // TODO 2023-07-14: I kinda want to change this to Zero-based, as
40  // I'm pretty sure that's the default functionality of emacs (with
41  // cursor at '(point-min)' the position is '(:line 1 :col 0)'..
42  /// One-based column number at which the error was detected.
43  ///
44  /// The first character in the input and any characters immediately
45  /// following a newline character are in column 1.
46  ///
47  /// Note that errors may occur in column 0, for example if a read from an
48  /// I/O stream fails immediately following a previously read newline
49  /// character.
50  pub fn column(&self) -> usize {
51  self.err.column
52  }
53 
54  /// Categorizes the cause of this error.
55  ///
56  /// - `Category::Io` - failure to read or write bytes on an I/O stream
57  /// - `Category::Syntax` - input that is not syntactically valid SXP
58  /// - `Category::Data` - input data that is semantically incorrect
59  /// - `Category::Eof` - unexpected end of the input data
60  pub fn classify(&self) -> Category {
61  match self.err.code {
62  ErrorCode::Message(_) => Category::Data,
63  ErrorCode::Io(_) => Category::Io,
64  ErrorCode::EofWhileParsingList
65  | ErrorCode::EofWhileParsingObject
66  | ErrorCode::EofWhileParsingString
67  | ErrorCode::EofWhileParsingValue => Category::Eof,
68  ErrorCode::ExpectedListEnd
69  | ErrorCode::ExpectedSomeSymbol
70  | ErrorCode::ExpectedSomeValue
71  | ErrorCode::ExpectedDoubleQuote
72  | ErrorCode::InvalidEscape
73  | ErrorCode::InvalidNumber
74  | ErrorCode::NumberOutOfRange
75  | ErrorCode::InvalidUnicodeCodePoint
76  | ErrorCode::ControlCharacterWhileParsingString
77  | ErrorCode::KeyMustBeASymbol
78  | ErrorCode::LoneLeadingSurrogateInHexEscape
79  | ErrorCode::TrailingCharacters
80  | ErrorCode::UnexpectedEndOfHexEscape
81  | ErrorCode::RecursionLimitExceeded => Category::Syntax,
82  }
83  }
84 
85  /// Returns true if this error was caused by a failure to read or write
86  /// bytes on an I/O stream.
87  pub fn is_io(&self) -> bool {
88  self.classify() == Category::Io
89  }
90 
91  /// Returns true if this error was caused by input that was not
92  /// syntactically valid SXP.
93  pub fn is_syntax(&self) -> bool {
94  self.classify() == Category::Syntax
95  }
96 
97  /// Returns true if this error was caused by input data that was
98  /// semantically incorrect.
99  ///
100  /// For example, SXP containing a number is semantically incorrect when the
101  /// type being deserialized into holds a String.
102  pub fn is_data(&self) -> bool {
103  self.classify() == Category::Data
104  }
105 
106  /// Returns true if this error was caused by prematurely reaching the end of
107  /// the input data.
108  ///
109  /// Callers that process streaming input may be interested in retrying the
110  /// deserialization once more data is available.
111  pub fn is_eof(&self) -> bool {
112  self.classify() == Category::Eof
113  }
114 
115  /// The kind reported by the underlying standard library I/O error, if this
116  /// error was caused by a failure to read or write bytes on an I/O stream.
117  #[cfg(feature = "std")]
118  pub fn io_error_kind(&self) -> Option<ErrorKind> {
119  if let ErrorCode::Io(io_error) = &self.err.code {
120  Some(io_error.kind())
121  } else {
122  None
123  }
124  }
125 }
126 
127 /// Categorizes the cause of a `sxp::Error`.
128 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
129 pub enum Category {
130  /// The error was caused by a failure to read or write bytes on an I/O
131  /// stream.
132  Io,
133 
134  /// The error was caused by input that was not syntactically valid SXP.
135  Syntax,
136 
137  /// The error was caused by input data that was semantically incorrect.
138  ///
139  /// For example, SXP containing a number is semantically incorrect when the
140  /// type being deserialized into holds a String.
141  Data,
142 
143  /// The error was caused by prematurely reaching the end of the input data.
144  ///
145  /// Callers that process streaming input may be interested in retrying the
146  /// deserialization once more data is available.
147  Eof,
148 }
149 
150 #[cfg(feature = "std")]
151 #[allow(clippy::fallible_impl_from)]
152 impl From<Error> for io::Error {
153  /// Convert a `sxp::Error` into an `io::Error`.
154  ///
155  /// SXP syntax and data errors are turned into `InvalidData` I/O errors.
156  /// EOF errors are turned into `UnexpectedEof` I/O errors.
157  fn from(j: Error) -> Self {
158  if let ErrorCode::Io(err) = j.err.code {
159  err
160  } else {
161  match j.classify() {
162  Category::Io => unreachable!(),
163  Category::Syntax | Category::Data => {
164  io::Error::new(ErrorKind::InvalidData, j)
165  }
166  Category::Eof => io::Error::new(ErrorKind::UnexpectedEof, j),
167  }
168  }
169  }
170 }
171 
172 struct ErrorImpl {
173  code: ErrorCode,
174  line: usize,
175  column: usize,
176 }
177 
178 #[allow(dead_code)]
179 pub(crate) enum ErrorCode {
180  /// Catchall for syntax error messages
181  Message(Box<str>),
182  /// Some I/O error occurred while serializing or deserializing.
183  Io(io::Error),
184  /// EOF while parsing a list.
185  EofWhileParsingList,
186  /// EOF while parsing an object.
187  EofWhileParsingObject,
188  /// EOF while parsing a string.
189  EofWhileParsingString,
190  /// EOF while parsing a SXP value.
191  EofWhileParsingValue,
192  /// Expected this character to be ')'.
193  ExpectedListEnd,
194  /// Expected to parse a symbol.
195  ExpectedSomeSymbol,
196  /// Expected this character to start a SXP value.
197  ExpectedSomeValue,
198  /// Expected this character to be '"'.
199  ExpectedDoubleQuote,
200  /// Invalid hex escape code.
201  InvalidEscape,
202  /// Invalid number.
203  InvalidNumber,
204  /// Number is bigger than the maximum value of its type.
205  NumberOutOfRange,
206  /// Invalid unicode code point.
207  InvalidUnicodeCodePoint,
208  /// Control character found while parsing a string.
209  ControlCharacterWhileParsingString,
210  /// Object key is not a symbol.
211  KeyMustBeASymbol,
212  /// Lone leading surrogate in hex escape.
213  LoneLeadingSurrogateInHexEscape,
214  /// SXP has non-whitespace trailing characters after the value.
215  TrailingCharacters,
216  /// Unexpected end of hex escape.
217  UnexpectedEndOfHexEscape,
218  /// Encountered nesting of SXP maps and arrays more than 128 layers deep.
219  RecursionLimitExceeded,
220 }
221 
222 impl Error {
223  #[cold]
224  pub(crate) fn syntax(code: ErrorCode, line: usize, column: usize) -> Self {
225  Error {
226  err: Box::new(ErrorImpl { code, line, column }),
227  }
228  }
229 
230  #[doc(hidden)]
231  #[cold]
232  pub fn io(error: io::Error) -> Self {
233  Error {
234  err: Box::new(ErrorImpl {
235  code: ErrorCode::Io(error),
236  line: 0,
237  column: 0,
238  }),
239  }
240  }
241 
242  // #[cold]
243  // pub(crate) fn fix_position<F>(self, f: F) -> Self
244  // where
245  // F: FnOnce(ErrorCode) -> Error,
246  // {
247  // if self.err.line == 0 {
248  // f(self.err.code)
249  // } else {
250  // self
251  // }
252  // }
253 }
254 
255 impl Display for ErrorCode {
256  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
257  match self {
258  ErrorCode::Message(msg) => f.write_str(msg),
259  ErrorCode::Io(err) => Display::fmt(err, f),
260  ErrorCode::EofWhileParsingList => f.write_str("EOF while parsing a list"),
261  ErrorCode::EofWhileParsingObject => {
262  f.write_str("EOF while parsing an object")
263  }
264  ErrorCode::EofWhileParsingString => {
265  f.write_str("EOF while parsing a string")
266  }
267  ErrorCode::EofWhileParsingValue => {
268  f.write_str("EOF while parsing a value")
269  }
270  ErrorCode::ExpectedListEnd => f.write_str("expected `)`"),
271  ErrorCode::ExpectedSomeSymbol => f.write_str("expected symbol"),
272  ErrorCode::ExpectedSomeValue => f.write_str("expected value"),
273  ErrorCode::ExpectedDoubleQuote => f.write_str("expected `\"`"),
274  ErrorCode::InvalidEscape => f.write_str("invalid escape"),
275  ErrorCode::InvalidNumber => f.write_str("invalid number"),
276  ErrorCode::NumberOutOfRange => f.write_str("number out of range"),
277  ErrorCode::InvalidUnicodeCodePoint => {
278  f.write_str("invalid unicode code point")
279  }
280  ErrorCode::ControlCharacterWhileParsingString => f.write_str(
281  "control character (\\u0000-\\u001F) found while parsing a string",
282  ),
283  ErrorCode::KeyMustBeASymbol => f.write_str("key must be a symbol"),
284  ErrorCode::LoneLeadingSurrogateInHexEscape => {
285  f.write_str("lone leading surrogate in hex escape")
286  }
287  ErrorCode::TrailingCharacters => f.write_str("trailing characters"),
288  ErrorCode::UnexpectedEndOfHexEscape => {
289  f.write_str("unexpected end of hex escape")
290  }
291  ErrorCode::RecursionLimitExceeded => {
292  f.write_str("recursion limit exceeded")
293  }
294  }
295  }
296 }
297 
298 impl serde::de::StdError for Error {
299  #[cfg(feature = "std")]
300  fn source(&self) -> Option<&(dyn error::Error + 'static)> {
301  match &self.err.code {
302  ErrorCode::Io(err) => err.source(),
303  _ => None,
304  }
305  }
306 }
307 
308 impl Display for Error {
309  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
310  Display::fmt(&*self.err, f)
311  }
312 }
313 
314 impl Display for ErrorImpl {
315  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
316  if self.line == 0 {
317  Display::fmt(&self.code, f)
318  } else {
319  write!(
320  f,
321  "{} at line {} column {}",
322  self.code, self.line, self.column
323  )
324  }
325  }
326 }
327 
328 // Remove two layers of verbosity from the debug representation. Humans often
329 // end up seeing this representation because it is what unwrap() shows.
330 impl Debug for Error {
331  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
332  write!(
333  f,
334  "(error {:?} (:line {} :column {}))",
335  self.err.code.to_string(),
336  self.err.line,
337  self.err.column
338  )
339  }
340 }
341 
342 impl de::Error for Error {
343  #[cold]
344  fn custom<T: Display>(msg: T) -> Error {
345  make_error(msg.to_string())
346  }
347 
348  #[cold]
349  fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
350  if let de::Unexpected::Unit = unexp {
351  Error::custom(format_args!("(:invalid-type nil :expected {})", exp))
352  } else {
353  Error::custom(format_args!("(:invalid-type {} :expected {})", unexp, exp))
354  }
355  }
356 }
357 
358 impl ser::Error for Error {
359  #[cold]
360  fn custom<T: Display>(msg: T) -> Error {
361  make_error(msg.to_string())
362  }
363 }
364 
365 // Parse our own error message that looks like "{} at line {} column {}" to work
366 // around erased-serde round-tripping the error through de::Error::custom.
367 fn make_error(mut msg: String) -> Error {
368  let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0));
369  Error {
370  err: Box::new(ErrorImpl {
371  code: ErrorCode::Message(msg.into_boxed_str()),
372  line,
373  column,
374  }),
375  }
376 }
377 
378 fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> {
379  let start_of_suffix = match msg.rfind(" at line ") {
380  Some(index) => index,
381  None => return None,
382  };
383 
384  // Find start and end of line number.
385  let start_of_line = start_of_suffix + " at line ".len();
386  let mut end_of_line = start_of_line;
387  while starts_with_digit(&msg[end_of_line..]) {
388  end_of_line += 1;
389  }
390 
391  if !msg[end_of_line..].starts_with(" column ") {
392  return None;
393  }
394 
395  // Find start and end of column number.
396  let start_of_column = end_of_line + " column ".len();
397  let mut end_of_column = start_of_column;
398  while starts_with_digit(&msg[end_of_column..]) {
399  end_of_column += 1;
400  }
401 
402  if end_of_column < msg.len() {
403  return None;
404  }
405 
406  // Parse numbers.
407  let line = match usize::from_str(&msg[start_of_line..end_of_line]) {
408  Ok(line) => line,
409  Err(_) => return None,
410  };
411  let column = match usize::from_str(&msg[start_of_column..end_of_column]) {
412  Ok(column) => column,
413  Err(_) => return None,
414  };
415 
416  msg.truncate(start_of_suffix);
417  Some((line, column))
418 }
419 
420 fn starts_with_digit(slice: &str) -> bool {
421  match slice.as_bytes().first() {
422  None => false,
423  Some(&byte) => byte.is_ascii_digit(),
424  }
425 }