From 7d82e46fd72673bfdd42e58054a235e3a0cef974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juhani=20Krekel=C3=A4?= Date: Sun, 14 May 2023 03:04:47 +0300 Subject: [PATCH] Implement parsing --- src/lib.rs | 6 +- src/main.rs | 10 +- src/parse.rs | 288 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 297 insertions(+), 7 deletions(-) create mode 100644 src/parse.rs diff --git a/src/lib.rs b/src/lib.rs index 42089ba..3eb2031 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ -mod units; mod conversions; - -pub use units::{NonMetric, NonMetricQuantity}; +mod parse; +mod units; pub use conversions::convert; +pub use parse::{parse, ParseError}; diff --git a/src/main.rs b/src/main.rs index d3f6a74..513f0d8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,9 @@ -use metrify::{NonMetric, NonMetricQuantity}; -use metrify::convert; +use metrify::parse; fn main() { - let quantity = NonMetricQuantity { amount: 6.0, unit: NonMetric::Foot }; - dbg!(convert(quantity)); + let parsed = parse("12 stones 1 pound"); + match parsed { + Ok(parsed) => println!("{parsed:?}"), + Err(err) => eprintln!("{err:?}"), + } } diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..c6308bb --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,288 @@ +use crate::units::{NonMetric, NonMetricQuantity}; + +enum Expect { + Number, + Unit, +} + +#[derive(Debug, PartialEq)] +pub enum ParseError { + NotValidNumber(String), + UnexpectedUnit(String), + UnknownUnit(String), + ExpectedUnit, +} + +pub fn parse(input: &str) -> Result, ParseError> { + let mut quantities = Vec::new(); + let mut state = Expect::Number; + let mut amount = None; + + for token in tokenize(input) { + match (&state, token) { + (Expect::Number, Token::Number(number)) => { + let number = match number.trim().parse() { + Ok(number) => number, + Err(_) => { + return Err(ParseError::NotValidNumber(number)); + } + }; + amount = Some(number); + state = Expect::Unit; + } + (Expect::Number, Token::Unit(unit)) => { + return Err(ParseError::UnexpectedUnit(unit)); + } + (Expect::Unit, Token::Number(_)) => { + unreachable!("token stream can't contain two numbers in a row"); + } + (Expect::Unit, Token::Unit(unit)) => { + let unit = match parse_unit(&unit) { + Some(unit) => unit, + None => { + return Err(ParseError::UnknownUnit(unit)); + } + }; + let quantity = NonMetricQuantity { + amount: amount.take().expect("must have read a number to be in this state"), + unit: unit, + }; + quantities.push(quantity); + state = Expect::Number; + } + } + } + + match state { + Expect::Number => {}, + Expect::Unit => { + return Err(ParseError::ExpectedUnit); + } + } + + Ok(quantities) +} + +fn parse_unit(input: &str) -> Option { + match input { + // Length + "inch" => Some(NonMetric::Inch), + "inches" => Some(NonMetric::Inch), + "in" => Some(NonMetric::Inch), + "\"" => Some(NonMetric::Inch), + "″" => Some(NonMetric::Inch), + + "foot" => Some(NonMetric::Foot), + "feet" => Some(NonMetric::Foot), + "ft" => Some(NonMetric::Foot), + "'" => Some(NonMetric::Foot), + "′" => Some(NonMetric::Foot), + + "yard" => Some(NonMetric::Yard), + "yards" => Some(NonMetric::Yard), + "yd" => Some(NonMetric::Yard), + + "mile" => Some(NonMetric::Mile), + "miles" => Some(NonMetric::Mile), + "mi" => Some(NonMetric::Mile), + "m" => Some(NonMetric::Mile), + + // Weight + "ounce" => Some(NonMetric::Ounce), + "ounces" => Some(NonMetric::Ounce), + "oz" => Some(NonMetric::Ounce), + + "pound" => Some(NonMetric::Pound), + "pounds" => Some(NonMetric::Pound), + "lb" => Some(NonMetric::Pound), + "lbs" => Some(NonMetric::Pound), + "#" => Some(NonMetric::Pound), + + "stone" => Some(NonMetric::Stone), + "stones" => Some(NonMetric::Stone), + "st" => Some(NonMetric::Stone), + + _ => None, + } +} + +#[derive(Debug, PartialEq)] +enum Token { + Number(String), + Unit(String), +} + +enum TokState { + Neutral, + Number, + Unit, +} + +fn tokenize(input: &str) -> Vec { + let mut tokens = Vec::new(); + let mut token = String::new(); + let mut state = TokState::Neutral; + + for c in input.chars() { + match state { + TokState::Neutral => { + if c.is_ascii_digit() || c == '-' { + token.push(c); + state = TokState::Number; + } else if !c.is_whitespace() { + token.push(c); + state = TokState::Unit; + } + } + TokState::Number => { + if c.is_ascii_digit() || + c.is_whitespace() || + c == '.' { + token.push(c); + } else { + tokens.push(Token::Number(token.trim().to_string())); + state = TokState::Unit; + token = String::new(); + token.push(c); + } + } + TokState::Unit => { + if c.is_ascii_digit() || c == '-' { + tokens.push(Token::Unit(token)); + state = TokState::Number; + token = String::new(); + token.push(c); + } + else if !c.is_whitespace() { + token.push(c); + } else { + tokens.push(Token::Unit(token)); + state = TokState::Neutral; + token = String::new(); + } + } + } + } + + match state { + TokState::Neutral => { assert!(token.len() == 0); } + TokState::Number => { tokens.push(Token::Number(token.trim().to_string())); } + TokState::Unit => { tokens.push(Token::Unit(token)); } + } + + tokens +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn parsing() { + assert_eq!(parse(""), Ok(vec![])); + assert_eq!(parse("5 ft"), Ok(vec![ + NonMetricQuantity { amount: 5.0, unit: NonMetric::Foot }, + ])); + assert_eq!(parse("5 ft 8 in"), Ok(vec![ + NonMetricQuantity { amount: 5.0, unit: NonMetric::Foot }, + NonMetricQuantity { amount: 8.0, unit: NonMetric::Inch }, + ])); + + assert_eq!(parse("12.0."), Err(ParseError::NotValidNumber("12.0.".to_string()))); + assert_eq!(parse("ft"), Err(ParseError::UnexpectedUnit("ft".to_string()))); + assert_eq!(parse("5 tf"), Err(ParseError::UnknownUnit("tf".to_string()))); + assert_eq!(parse("12"), Err(ParseError::ExpectedUnit)); + } + + #[test] + fn units() { + // Length + assert_eq!(parse_unit("inch"), Some(NonMetric::Inch)); + assert_eq!(parse_unit("inches"), Some(NonMetric::Inch)); + assert_eq!(parse_unit("in"), Some(NonMetric::Inch)); + assert_eq!(parse_unit("\""), Some(NonMetric::Inch)); + assert_eq!(parse_unit("″"), Some(NonMetric::Inch)); + + assert_eq!(parse_unit("foot"), Some(NonMetric::Foot)); + assert_eq!(parse_unit("feet"), Some(NonMetric::Foot)); + assert_eq!(parse_unit("ft"), Some(NonMetric::Foot)); + assert_eq!(parse_unit("'"), Some(NonMetric::Foot)); + assert_eq!(parse_unit("′"), Some(NonMetric::Foot)); + + assert_eq!(parse_unit("yard"), Some(NonMetric::Yard)); + assert_eq!(parse_unit("yards"), Some(NonMetric::Yard)); + assert_eq!(parse_unit("yd"), Some(NonMetric::Yard)); + + assert_eq!(parse_unit("mile"), Some(NonMetric::Mile)); + assert_eq!(parse_unit("miles"), Some(NonMetric::Mile)); + assert_eq!(parse_unit("mi"), Some(NonMetric::Mile)); + assert_eq!(parse_unit("m"), Some(NonMetric::Mile)); + + // Weight + assert_eq!(parse_unit("ounce"), Some(NonMetric::Ounce)); + assert_eq!(parse_unit("ounces"), Some(NonMetric::Ounce)); + assert_eq!(parse_unit("oz"), Some(NonMetric::Ounce)); + + assert_eq!(parse_unit("pound"), Some(NonMetric::Pound)); + assert_eq!(parse_unit("pounds"), Some(NonMetric::Pound)); + assert_eq!(parse_unit("lb"), Some(NonMetric::Pound)); + assert_eq!(parse_unit("lbs"), Some(NonMetric::Pound)); + assert_eq!(parse_unit("#"), Some(NonMetric::Pound)); + + assert_eq!(parse_unit("stone"), Some(NonMetric::Stone)); + assert_eq!(parse_unit("stones"), Some(NonMetric::Stone)); + assert_eq!(parse_unit("st"), Some(NonMetric::Stone)); + + // Unknown unit + assert_eq!(parse_unit("hutenosa"), None); + } + + #[test] + fn tokens() { + assert_eq!(tokenize(""), vec![]); + assert_eq!(tokenize("10"), vec![Token::Number("10".to_string())]); + assert_eq!(tokenize(" 10 "), vec![Token::Number("10".to_string())]); + assert_eq!(tokenize("10 000"), vec![Token::Number("10 000".to_string())]); + assert_eq!(tokenize("10.0.1"), vec![Token::Number("10.0.1".to_string())]); + assert_eq!(tokenize("ft"), vec![Token::Unit("ft".to_string())]); + assert_eq!( + tokenize("10 ft"), + vec![ + Token::Number("10".to_string()), + Token::Unit("ft".to_string()), + ] + ); + assert_eq!( + tokenize("ft in"), + vec![ + Token::Unit("ft".to_string()), + Token::Unit("in".to_string()), + ] + ); + assert_eq!( + tokenize("5 ft 7 in"), + vec![ + Token::Number("5".to_string()), + Token::Unit("ft".to_string()), + Token::Number("7".to_string()), + Token::Unit("in".to_string()), + ] + ); + assert_eq!( + tokenize("5\"7'"), + vec![ + Token::Number("5".to_string()), + Token::Unit("\"".to_string()), + Token::Number("7".to_string()), + Token::Unit("'".to_string()), + ] + ); + assert_eq!( + tokenize(" 2.2lbs "), + vec![ + Token::Number("2.2".to_string()), + Token::Unit("lbs".to_string()), + ] + ); + } +}