changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / rust/lib/sxp/src/tok.rs

changeset 189: 3d78bed56188
parent: 0ccbbd142694
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 02 Feb 2024 19:47:03 -0500
permissions: -rw-r--r--
description: apply clippy fixes
1 //! tok.rs --- SXP Tokens
2 /// all possible Tokens. Note that we do not explicitly declare a
3 /// 'nil' variant. The modern rules of S-expressions are implicit in
4 /// the SXP specification.
5 use crate::{fmt::WriteFormatter, Error};
6 use serde::{
7  de::{self, Unexpected, Visitor},
8  forward_to_deserialize_any, Deserialize, Deserializer, Serialize, Serializer,
9 };
10 use std::{
11  fmt::{self, Debug, Display},
12  hash::{Hash, Hasher},
13  io::{self, Write},
14  str::FromStr,
15 };
16 
17 /// Token types collected from the output of a type which implements
18 /// Read+Format.
19 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
20 pub enum Token {
21  ListStart,
22  ListEnd,
23  Sym(String),
24  Str(String),
25  Num(String),
26 }
27 
28 impl Display for Token {
29  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
30  match self {
31  Token::ListStart => f.write_str("( "),
32  Token::ListEnd => f.write_str(")\n"),
33  Token::Sym(s) => f.write_str(&format!("{} ", &s)),
34  Token::Str(s) => f.write_str(&format!("\"{}\" ", &s)),
35  Token::Num(n) => Display::fmt(&n, f),
36  }
37  }
38 }
39 
40 impl FromStr for Token {
41  type Err = Error;
42  fn from_str(s: &str) -> Result<Self, Self::Err> {
43  match s {
44  "(" => Ok(Token::ListStart),
45  ")" => Ok(Token::ListEnd),
46  s => {
47  if s.starts_with('\"') {
48  Ok(Token::Str(s.trim_matches('"').to_owned()))
49  } else if s.chars().next().unwrap().is_numeric() {
50  Ok(Token::Num(s.to_owned()))
51  } else {
52  Ok(Token::Sym(s.to_owned()))
53  }
54  }
55  }
56  }
57 }
58 
59 /// Potential Number
60 #[derive(Copy, Clone)]
61 pub enum PotNum {
62  PosInt(u64),
63  NegInt(i64),
64  Float(f64),
65  // Complex,Rational,
66 }
67 
68 impl PartialEq for PotNum {
69  fn eq(&self, other: &Self) -> bool {
70  match (self, other) {
71  (Self::PosInt(a), Self::PosInt(b)) => a == b,
72  (Self::NegInt(a), Self::NegInt(b)) => a == b,
73  (Self::Float(a), Self::Float(b)) => a == b,
74  _ => false,
75  }
76  }
77 }
78 
79 impl Eq for PotNum {}
80 
81 impl Hash for PotNum {
82  fn hash<H: Hasher>(&self, h: &mut H) {
83  match *self {
84  PotNum::PosInt(i) => i.hash(h),
85  PotNum::NegInt(i) => i.hash(h),
86  PotNum::Float(f) => {
87  if f == 0.0f64 {
88  // use same hash for +/-0.0
89  0.0f64.to_bits().hash(h);
90  } else {
91  f.to_bits().hash(h);
92  }
93  }
94  }
95  }
96 }
97 
98 /// String
99 #[derive(Clone, PartialEq, Eq, Hash)]
100 pub struct Str(std::string::String);
101 /// Symbol
102 #[derive(Clone, PartialEq, Eq, Hash)]
103 pub struct Sym(std::string::String);
104 /// Number
105 #[derive(Clone, PartialEq, Eq, Hash)]
106 pub struct Num(PotNum);
107 
108 impl Num {
109  /// Returns true if the `Number` is an integer between `i64::MIN` and
110  /// `i64::MAX`.
111  ///
112  /// For any Number on which `is_i64` returns true, `as_i64` is guaranteed to
113  /// return the integer value.
114  #[inline]
115  pub fn is_i64(&self) -> bool {
116  // #[cfg(not(feature = "arbitrary_precision"))]
117  match self.0 {
118  PotNum::PosInt(v) => v <= i64::max_value() as u64,
119  PotNum::NegInt(_) => true,
120  PotNum::Float(_) => false,
121  }
122  // #[cfg(feature = "arbitrary_precision")]
123  // self.as_i64().is_some()
124  }
125 
126  /// Returns true if the `Number` is an integer between zero and `u64::MAX`.
127  ///
128  /// For any Number on which `is_u64` returns true, `as_u64` is guaranteed to
129  /// return the integer value.
130  #[inline]
131  pub fn is_u64(&self) -> bool {
132  // #[cfg(not(feature = "arbitrary_precision"))]
133  match self.0 {
134  PotNum::PosInt(_) => true,
135  PotNum::NegInt(_) | PotNum::Float(_) => false,
136  }
137  // #[cfg(feature = "arbitrary_precision")]
138  // self.as_u64().is_some()
139  }
140 
141  /// Returns true if the `Number` can be represented by f64.
142  ///
143  /// For any Number on which `is_f64` returns true, `as_f64` is guaranteed to
144  /// return the floating point value.
145  ///
146  /// Currently this function returns true if and only if both `is_i64` and
147  /// `is_u64` return false but this is not a guarantee in the future.
148  #[inline]
149  pub fn is_f64(&self) -> bool {
150  // #[cfg(not(feature = "arbitrary_precision"))]
151  match self.0 {
152  PotNum::Float(_) => true,
153  PotNum::PosInt(_) | PotNum::NegInt(_) => false,
154  }
155  // #[cfg(feature = "arbitrary_precision")]
156  // {
157  // for c in self.n.chars() {
158  // if c == '.' || c == 'e' || c == 'E' {
159  // return self.n.parse::<f64>().ok().map_or(false,
160  // f64::is_finite); }
161  // }
162  // false
163  // }
164  }
165 
166  /// If the `Number` is an integer, represent it as i64 if possible. Returns
167  /// None otherwise.
168  #[inline]
169  pub fn as_i64(&self) -> Option<i64> {
170  // #[cfg(not(feature = "arbitrary_precision"))]
171  match self.0 {
172  PotNum::PosInt(n) => {
173  if n <= i64::max_value() as u64 {
174  Some(n as i64)
175  } else {
176  None
177  }
178  }
179  PotNum::NegInt(n) => Some(n),
180  PotNum::Float(_) => None,
181  }
182  // #[cfg(feature = "arbitrary_precision")]
183  // self.n.parse().ok()
184  }
185 
186  /// If the `Number` is an integer, represent it as u64 if possible. Returns
187  /// None otherwise.
188  #[inline]
189  pub fn as_u64(&self) -> Option<u64> {
190  // #[cfg(not(feature = "arbitrary_precision"))]
191  match self.0 {
192  PotNum::PosInt(n) => Some(n),
193  PotNum::NegInt(_) | PotNum::Float(_) => None,
194  }
195  // #[cfg(feature = "arbitrary_precision")]
196  // self.0.parse().ok()
197  }
198 
199  /// Represents the number as f64 if possible. Returns None otherwise.
200  #[inline]
201  pub fn as_f64(&self) -> Option<f64> {
202  // #[cfg(not(feature = "arbitrary_precision"))]
203  match self.0 {
204  PotNum::PosInt(n) => Some(n as f64),
205  PotNum::NegInt(n) => Some(n as f64),
206  PotNum::Float(n) => Some(n),
207  }
208  // #[cfg(feature = "arbitrary_precision")]
209  // self.n.parse::<f64>().ok().filter(|float| float.is_finite())
210  }
211 
212  /// Converts a finite `f64` to a `Number`. Infinite or NaN values are not SXP
213  /// numbers.
214  #[inline]
215  pub fn from_f64(f: f64) -> Option<Num> {
216  if f.is_finite() {
217  let n = PotNum::Float(f);
218  // {
219  // #[cfg(not(feature = "arbitrary_precision"))]
220  //#[cfg(feature = "arbitrary_precision")]
221  // {
222  // ryu::Buffer::new().format_finite(f).to_owned()
223  // }
224  // };
225  Some(Num(n))
226  } else {
227  None
228  }
229  }
230 
231  pub fn as_f32(&self) -> Option<f32> {
232  // #[cfg(not(feature = "arbitrary_precision"))]
233  match self.0 {
234  PotNum::PosInt(n) => Some(n as f32),
235  PotNum::NegInt(n) => Some(n as f32),
236  PotNum::Float(n) => Some(n as f32),
237  }
238  // #[cfg(feature = "arbitrary_precision")]
239  // self.n.parse::<f32>().ok().filter(|float| float.is_finite())
240  }
241 
242  pub fn from_f32(f: f32) -> Option<Num> {
243  if f.is_finite() {
244  let n = PotNum::Float(f as f64);
245  // {
246  // #[cfg(not(feature = "arbitrary_precision"))]
247  // #[cfg(feature = "arbitrary_precision")]
248  // {
249  // ryu::Buffer::new().format_finite(f).to_owned()
250  // }
251  //};
252  Some(Num(n))
253  } else {
254  None
255  }
256  }
257 }
258 
259 impl Display for Num {
260  fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
261  match self.0 {
262  PotNum::PosInt(u) => formatter.write_str(itoa::Buffer::new().format(u)),
263  PotNum::NegInt(i) => formatter.write_str(itoa::Buffer::new().format(i)),
264  PotNum::Float(f) => {
265  formatter.write_str(ryu::Buffer::new().format_finite(f))
266  }
267  }
268  }
269 }
270 
271 impl Debug for Num {
272  #[inline]
273  fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
274  write!(formatter, "NUM({})", self)
275  }
276 }
277 
278 impl Serialize for Num {
279  fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
280  where
281  S: Serializer,
282  {
283  match self.0 {
284  PotNum::PosInt(u) => serializer.serialize_u64(u),
285  PotNum::NegInt(i) => serializer.serialize_i64(i),
286  PotNum::Float(f) => serializer.serialize_f64(f),
287  }
288  }
289 }
290 
291 impl<'de> Deserialize<'de> for Num {
292  #[inline]
293  fn deserialize<D: Deserializer<'de>>(
294  deserializer: D,
295  ) -> Result<Num, D::Error> {
296  struct NumVisitor;
297  impl<'de> Visitor<'de> for NumVisitor {
298  type Value = Num;
299  fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
300  formatter.write_str("a SXP number")
301  }
302  #[inline]
303  fn visit_i64<E>(self, value: i64) -> Result<Num, E> {
304  Ok(value.into())
305  }
306  #[inline]
307  fn visit_u64<E>(self, value: u64) -> Result<Num, E> {
308  Ok(value.into())
309  }
310  #[inline]
311  fn visit_f64<E: de::Error>(self, value: f64) -> Result<Num, E> {
312  Num::from_f64(value)
313  .ok_or_else(|| de::Error::custom("not a SXP number"))
314  }
315  }
316  deserializer.deserialize_any(NumVisitor)
317  }
318 }
319 
320 macro_rules! deserialize_any {
321  (@expand [$($num_string:tt)*]) => {
322  #[inline]
323  fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Error>
324  where
325  V: Visitor<'de>,
326  {
327  match self.0 {
328  PotNum::PosInt(u) => visitor.visit_u64(u),
329  PotNum::NegInt(i) => visitor.visit_i64(i),
330  PotNum::Float(f) => visitor.visit_f64(f),
331  }
332  }
333  };
334 
335  (owned) => {
336  deserialize_any!(@expand [n]);
337  };
338 
339  (ref) => {
340  deserialize_any!(@expand [n.clone()]);
341  };
342 }
343 
344 macro_rules! deserialize_number {
345  ($deserialize:ident => $visit:ident) => {
346  #[cfg(not(feature = "arbitrary_precision"))]
347  fn $deserialize<V>(self, visitor: V) -> Result<V::Value, Error>
348  where
349  V: Visitor<'de>,
350  {
351  self.deserialize_any(visitor)
352  }
353  };
354 }
355 
356 impl<'de> Deserializer<'de> for Num {
357  type Error = Error;
358 
359  deserialize_any!(owned);
360 
361  deserialize_number!(deserialize_i8 => visit_i8);
362  deserialize_number!(deserialize_i16 => visit_i16);
363  deserialize_number!(deserialize_i32 => visit_i32);
364  deserialize_number!(deserialize_i64 => visit_i64);
365  deserialize_number!(deserialize_i128 => visit_i128);
366  deserialize_number!(deserialize_u8 => visit_u8);
367  deserialize_number!(deserialize_u16 => visit_u16);
368  deserialize_number!(deserialize_u32 => visit_u32);
369  deserialize_number!(deserialize_u64 => visit_u64);
370  deserialize_number!(deserialize_u128 => visit_u128);
371  deserialize_number!(deserialize_f32 => visit_f32);
372  deserialize_number!(deserialize_f64 => visit_f64);
373 
374  forward_to_deserialize_any! {
375  bool char str string bytes byte_buf option unit unit_struct
376  newtype_struct seq tuple tuple_struct map struct enum identifier
377  ignored_any
378  }
379 }
380 
381 impl<'de, 'a> Deserializer<'de> for &'a Num {
382  type Error = Error;
383 
384  deserialize_any!(ref);
385 
386  deserialize_number!(deserialize_i8 => visit_i8);
387  deserialize_number!(deserialize_i16 => visit_i16);
388  deserialize_number!(deserialize_i32 => visit_i32);
389  deserialize_number!(deserialize_i64 => visit_i64);
390  deserialize_number!(deserialize_i128 => visit_i128);
391  deserialize_number!(deserialize_u8 => visit_u8);
392  deserialize_number!(deserialize_u16 => visit_u16);
393  deserialize_number!(deserialize_u32 => visit_u32);
394  deserialize_number!(deserialize_u64 => visit_u64);
395  deserialize_number!(deserialize_u128 => visit_u128);
396  deserialize_number!(deserialize_f32 => visit_f32);
397  deserialize_number!(deserialize_f64 => visit_f64);
398 
399  forward_to_deserialize_any! {
400  bool char str string bytes byte_buf option unit unit_struct
401  newtype_struct seq tuple tuple_struct map struct enum identifier
402  ignored_any
403  }
404 }
405 
406 macro_rules! impl_from_unsigned {
407  (
408  $($ty:ty),*
409  ) => {
410  $(
411  impl From<$ty> for Num {
412  #[inline]
413  fn from(u: $ty) -> Self {
414  let n = PotNum::PosInt(u as u64);
415  Num(n)
416  }
417  }
418  )*
419  };
420 }
421 
422 macro_rules! impl_from_signed {
423  (
424  $($ty:ty),*
425  ) => {
426  $(
427  impl From<$ty> for Num {
428  #[inline]
429  fn from(i: $ty) -> Self {
430  let n =if i < 0 {
431  PotNum::NegInt(i as i64)
432  } else {
433  PotNum::PosInt(i as u64)
434  };
435  Num(n)
436  }
437  }
438  )*
439  };
440 }
441 
442 impl_from_unsigned!(u8, u16, u32, u64, usize);
443 impl_from_signed!(i8, i16, i32, i64, isize);
444 
445 impl Num {
446  #[cold]
447  pub fn unexpected(&self) -> Unexpected {
448  match self.0 {
449  PotNum::PosInt(u) => Unexpected::Unsigned(u),
450  PotNum::NegInt(i) => Unexpected::Signed(i),
451  PotNum::Float(f) => Unexpected::Float(f),
452  }
453  }
454 }
455 
456 /// Represents a character escape code in a type-safe manner.
457 pub enum CharEscape {
458  /// An escaped quote `"`
459  Quote,
460  /// An escaped reverse solidus `\`
461  ReverseSolidus,
462  /// An escaped solidus `/`
463  Solidus,
464  /// An escaped backspace character (usually escaped as `\b`)
465  Backspace,
466  /// An escaped form feed character (usually escaped as `\f`)
467  FormFeed,
468  /// An escaped line feed character (usually escaped as `\n`)
469  LineFeed,
470  /// An escaped carriage return character (usually escaped as `\r`)
471  CarriageReturn,
472  /// An escaped tab character (usually escaped as `\t`)
473  Tab,
474  /// An escaped ASCII plane control character (usually escaped as
475  /// `\u00XX` where `XX` are two hex characters)
476  AsciiControl(u8),
477 }
478 
479 impl CharEscape {
480  #[inline]
481  fn from_escape_table(escape: u8, byte: u8) -> CharEscape {
482  match escape {
483  self::BB => CharEscape::Backspace,
484  self::TT => CharEscape::Tab,
485  self::NN => CharEscape::LineFeed,
486  self::FF => CharEscape::FormFeed,
487  self::RR => CharEscape::CarriageReturn,
488  self::QU => CharEscape::Quote,
489  self::BS => CharEscape::ReverseSolidus,
490  self::UU => CharEscape::AsciiControl(byte),
491  _ => unreachable!(),
492  }
493  }
494 }
495 
496 pub fn format_escaped_str<W, F>(
497  writer: &mut W,
498  formatter: &mut F,
499  value: &str,
500 ) -> io::Result<()>
501 where
502  W: ?Sized + Write,
503  F: ?Sized + WriteFormatter,
504 {
505  e!(formatter.begin_string(writer));
506  e!(format_escaped_str_contents(writer, formatter, value));
507  formatter.end_string(writer)
508 }
509 
510 pub fn format_escaped_str_contents<W, F>(
511  writer: &mut W,
512  formatter: &mut F,
513  value: &str,
514 ) -> io::Result<()>
515 where
516  W: ?Sized + io::Write,
517  F: ?Sized + WriteFormatter,
518 {
519  let bytes = value.as_bytes();
520 
521  let mut start = 0;
522 
523  for (i, &byte) in bytes.iter().enumerate() {
524  let escape = ESCAPE[byte as usize];
525  if escape == 0 {
526  continue;
527  }
528 
529  if start < i {
530  e!(formatter.write_string_fragment(writer, &value[start..i]));
531  }
532 
533  let char_escape = CharEscape::from_escape_table(escape, byte);
534  e!(formatter.write_char_escape(writer, char_escape));
535 
536  start = i + 1;
537  }
538 
539  if start == bytes.len() {
540  return Ok(());
541  }
542 
543  formatter.write_string_fragment(writer, &value[start..])
544 }
545 
546 const BB: u8 = b'b'; // \x08
547 const TT: u8 = b't'; // \x09
548 const NN: u8 = b'n'; // \x0A
549 const FF: u8 = b'f'; // \x0C
550 const RR: u8 = b'r'; // \x0D
551 const QU: u8 = b'"'; // \x22
552 const BS: u8 = b'\\'; // \x5C
553 const UU: u8 = b'u'; // \x00...\x1F except the ones above
554 const __: u8 = 0;
555 
556 // Lookup table of escape sequences. A value of b'x' at index i means that byte
557 // i is escaped as "\x" in SXP. A value of 0 means that byte i is not escaped.
558 static ESCAPE: [u8; 256] = [
559  // 1 2 3 4 5 6 7 8 9 A B C D E F
560  UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
561  UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
562  __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
563  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
564  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
565  __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
566  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
567  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
568  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
569  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
570  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
571  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
572  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
573  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
574  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
575  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
576 ];