use crate::units::{NonMetric, NonMetricQuantity}; enum Expect { Number, Unit, } #[derive(Debug, PartialEq)] pub enum ParseError { NotValidNumber(String), UnexpectedUnit(String), UnknownUnit(String), ExpectedUnit, } pub fn parse(input: &str) -> Result, ParseError> { let mut quantities = Vec::new(); let mut state = Expect::Number; let mut amount = None; for token in tokenize(input) { match (&state, token) { (Expect::Number, Token::Number(number)) => { let number = match parse_number(&number) { Some(number) => number, None => { return Err(ParseError::NotValidNumber(number)); } }; amount = Some(number); state = Expect::Unit; } (Expect::Number, Token::Unit(unit)) => { return Err(ParseError::UnexpectedUnit(unit)); } (Expect::Unit, Token::Number(_)) => { unreachable!("token stream can't contain two numbers in a row"); } (Expect::Unit, Token::Unit(unit)) => { let unit = match parse_unit(&unit) { Some(unit) => unit, None => { return Err(ParseError::UnknownUnit(unit)); } }; let quantity = NonMetricQuantity { amount: amount.take().expect("must have read a number to be in this state"), unit: unit, }; quantities.push(quantity); state = Expect::Number; } } } match state { Expect::Number => {}, Expect::Unit => { return Err(ParseError::ExpectedUnit); } } Ok(quantities) } fn parse_number(input: &str) -> Option { let no_whitespace: String = input.chars().filter(|c| !c.is_whitespace()).collect(); no_whitespace.parse().ok() } fn parse_unit(input: &str) -> Option { match input { // Length "inch" => Some(NonMetric::Inch), "inches" => Some(NonMetric::Inch), "in" => Some(NonMetric::Inch), "\"" => Some(NonMetric::Inch), "″" => Some(NonMetric::Inch), "foot" => Some(NonMetric::Foot), "feet" => Some(NonMetric::Foot), "ft" => Some(NonMetric::Foot), "'" => Some(NonMetric::Foot), "′" => Some(NonMetric::Foot), "yard" => Some(NonMetric::Yard), "yards" => Some(NonMetric::Yard), "yd" => Some(NonMetric::Yard), "mile" => Some(NonMetric::Mile), "miles" => Some(NonMetric::Mile), "mi" => Some(NonMetric::Mile), "m" => Some(NonMetric::Mile), // Weight "ounce" => Some(NonMetric::Ounce), "ounces" => Some(NonMetric::Ounce), "oz" => Some(NonMetric::Ounce), "pound" => Some(NonMetric::Pound), "pounds" => Some(NonMetric::Pound), "lb" => Some(NonMetric::Pound), "lbs" => Some(NonMetric::Pound), "#" => Some(NonMetric::Pound), "stone" => Some(NonMetric::Stone), "stones" => Some(NonMetric::Stone), "st" => Some(NonMetric::Stone), _ => None, } } #[derive(Debug, PartialEq)] enum Token { Number(String), Unit(String), } enum TokState { Neutral, Number, Unit, } fn tokenize(input: &str) -> Vec { let mut tokens = Vec::new(); let mut token = String::new(); let mut state = TokState::Neutral; for c in input.chars() { match state { TokState::Neutral => { if c.is_ascii_digit() || c == '-' { token.push(c); state = TokState::Number; } else if !c.is_whitespace() { token.push(c); state = TokState::Unit; } } TokState::Number => { if c.is_ascii_digit() || c.is_whitespace() || c == '.' { token.push(c); } else { tokens.push(Token::Number(token.trim().to_string())); state = TokState::Unit; token = String::new(); token.push(c); } } TokState::Unit => { if c.is_ascii_digit() || c == '-' { tokens.push(Token::Unit(token)); state = TokState::Number; token = String::new(); token.push(c); } else if !c.is_whitespace() { token.push(c); } else { tokens.push(Token::Unit(token)); state = TokState::Neutral; token = String::new(); } } } } match state { TokState::Neutral => { assert!(token.len() == 0); } TokState::Number => { tokens.push(Token::Number(token.trim().to_string())); } TokState::Unit => { tokens.push(Token::Unit(token)); } } tokens } #[cfg(test)] mod test { use super::*; #[test] fn parsing() { assert_eq!(parse(""), Ok(vec![])); assert_eq!(parse("5 ft"), Ok(vec![ NonMetricQuantity { amount: 5.0, unit: NonMetric::Foot }, ])); assert_eq!(parse("5 ft 8 in"), Ok(vec![ NonMetricQuantity { amount: 5.0, unit: NonMetric::Foot }, NonMetricQuantity { amount: 8.0, unit: NonMetric::Inch }, ])); assert_eq!(parse("20 000 lbs"), Ok(vec![ NonMetricQuantity { amount: 20_000.0, unit: NonMetric::Pound }, ])); assert_eq!(parse("12.0."), Err(ParseError::NotValidNumber("12.0.".to_string()))); assert_eq!(parse("ft"), Err(ParseError::UnexpectedUnit("ft".to_string()))); assert_eq!(parse("5 tf"), Err(ParseError::UnknownUnit("tf".to_string()))); assert_eq!(parse("12"), Err(ParseError::ExpectedUnit)); } #[test] fn numbers() { assert_eq!(parse_number(""), None); assert_eq!(parse_number("1"), Some(1.0)); assert_eq!(parse_number("1.0"), Some(1.0)); assert_eq!(parse_number("0.1"), Some(0.1)); assert_eq!(parse_number("0.1."), None); assert_eq!(parse_number("-10"), Some(-10.0)); assert_eq!(parse_number("10\t00\u{1680}000"), Some(10_00_000.0)); } #[test] fn units() { // Length assert_eq!(parse_unit("inch"), Some(NonMetric::Inch)); assert_eq!(parse_unit("inches"), Some(NonMetric::Inch)); assert_eq!(parse_unit("in"), Some(NonMetric::Inch)); assert_eq!(parse_unit("\""), Some(NonMetric::Inch)); assert_eq!(parse_unit("″"), Some(NonMetric::Inch)); assert_eq!(parse_unit("foot"), Some(NonMetric::Foot)); assert_eq!(parse_unit("feet"), Some(NonMetric::Foot)); assert_eq!(parse_unit("ft"), Some(NonMetric::Foot)); assert_eq!(parse_unit("'"), Some(NonMetric::Foot)); assert_eq!(parse_unit("′"), Some(NonMetric::Foot)); assert_eq!(parse_unit("yard"), Some(NonMetric::Yard)); assert_eq!(parse_unit("yards"), Some(NonMetric::Yard)); assert_eq!(parse_unit("yd"), Some(NonMetric::Yard)); assert_eq!(parse_unit("mile"), Some(NonMetric::Mile)); assert_eq!(parse_unit("miles"), Some(NonMetric::Mile)); assert_eq!(parse_unit("mi"), Some(NonMetric::Mile)); assert_eq!(parse_unit("m"), Some(NonMetric::Mile)); // Weight assert_eq!(parse_unit("ounce"), Some(NonMetric::Ounce)); assert_eq!(parse_unit("ounces"), Some(NonMetric::Ounce)); assert_eq!(parse_unit("oz"), Some(NonMetric::Ounce)); assert_eq!(parse_unit("pound"), Some(NonMetric::Pound)); assert_eq!(parse_unit("pounds"), Some(NonMetric::Pound)); assert_eq!(parse_unit("lb"), Some(NonMetric::Pound)); assert_eq!(parse_unit("lbs"), Some(NonMetric::Pound)); assert_eq!(parse_unit("#"), Some(NonMetric::Pound)); assert_eq!(parse_unit("stone"), Some(NonMetric::Stone)); assert_eq!(parse_unit("stones"), Some(NonMetric::Stone)); assert_eq!(parse_unit("st"), Some(NonMetric::Stone)); // Unknown unit assert_eq!(parse_unit("hutenosa"), None); } #[test] fn tokens() { assert_eq!(tokenize(""), vec![]); assert_eq!(tokenize("10"), vec![Token::Number("10".to_string())]); assert_eq!(tokenize(" 10 "), vec![Token::Number("10".to_string())]); assert_eq!(tokenize("10 000"), vec![Token::Number("10 000".to_string())]); assert_eq!(tokenize("10\t000"), vec![Token::Number("10\t000".to_string())]); assert_eq!(tokenize("10\u{1680}000"), vec![Token::Number("10\u{1680}000".to_string())]); assert_eq!(tokenize("10.0.1"), vec![Token::Number("10.0.1".to_string())]); assert_eq!(tokenize("ft"), vec![Token::Unit("ft".to_string())]); assert_eq!( tokenize("10 ft"), vec![ Token::Number("10".to_string()), Token::Unit("ft".to_string()), ] ); assert_eq!( tokenize("ft in"), vec![ Token::Unit("ft".to_string()), Token::Unit("in".to_string()), ] ); assert_eq!( tokenize("5 ft 7 in"), vec![ Token::Number("5".to_string()), Token::Unit("ft".to_string()), Token::Number("7".to_string()), Token::Unit("in".to_string()), ] ); assert_eq!( tokenize("5\"7'"), vec![ Token::Number("5".to_string()), Token::Unit("\"".to_string()), Token::Number("7".to_string()), Token::Unit("'".to_string()), ] ); assert_eq!( tokenize(" 2.2lbs "), vec![ Token::Number("2.2".to_string()), Token::Unit("lbs".to_string()), ] ); } }