metrify/src/parse.rs

289 lines
7.4 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use crate::units::{NonMetric, NonMetricQuantity};
enum Expect {
Number,
Unit,
}
#[derive(Debug, PartialEq)]
pub enum ParseError {
NotValidNumber(String),
UnexpectedUnit(String),
UnknownUnit(String),
ExpectedUnit,
}
pub fn parse(input: &str) -> Result<Vec<NonMetricQuantity>, ParseError> {
let mut quantities = Vec::new();
let mut state = Expect::Number;
let mut amount = None;
for token in tokenize(input) {
match (&state, token) {
(Expect::Number, Token::Number(number)) => {
let number = match number.trim().parse() {
Ok(number) => number,
Err(_) => {
return Err(ParseError::NotValidNumber(number));
}
};
amount = Some(number);
state = Expect::Unit;
}
(Expect::Number, Token::Unit(unit)) => {
return Err(ParseError::UnexpectedUnit(unit));
}
(Expect::Unit, Token::Number(_)) => {
unreachable!("token stream can't contain two numbers in a row");
}
(Expect::Unit, Token::Unit(unit)) => {
let unit = match parse_unit(&unit) {
Some(unit) => unit,
None => {
return Err(ParseError::UnknownUnit(unit));
}
};
let quantity = NonMetricQuantity {
amount: amount.take().expect("must have read a number to be in this state"),
unit: unit,
};
quantities.push(quantity);
state = Expect::Number;
}
}
}
match state {
Expect::Number => {},
Expect::Unit => {
return Err(ParseError::ExpectedUnit);
}
}
Ok(quantities)
}
fn parse_unit(input: &str) -> Option<NonMetric> {
match input {
// Length
"inch" => Some(NonMetric::Inch),
"inches" => Some(NonMetric::Inch),
"in" => Some(NonMetric::Inch),
"\"" => Some(NonMetric::Inch),
"" => Some(NonMetric::Inch),
"foot" => Some(NonMetric::Foot),
"feet" => Some(NonMetric::Foot),
"ft" => Some(NonMetric::Foot),
"'" => Some(NonMetric::Foot),
"" => Some(NonMetric::Foot),
"yard" => Some(NonMetric::Yard),
"yards" => Some(NonMetric::Yard),
"yd" => Some(NonMetric::Yard),
"mile" => Some(NonMetric::Mile),
"miles" => Some(NonMetric::Mile),
"mi" => Some(NonMetric::Mile),
"m" => Some(NonMetric::Mile),
// Weight
"ounce" => Some(NonMetric::Ounce),
"ounces" => Some(NonMetric::Ounce),
"oz" => Some(NonMetric::Ounce),
"pound" => Some(NonMetric::Pound),
"pounds" => Some(NonMetric::Pound),
"lb" => Some(NonMetric::Pound),
"lbs" => Some(NonMetric::Pound),
"#" => Some(NonMetric::Pound),
"stone" => Some(NonMetric::Stone),
"stones" => Some(NonMetric::Stone),
"st" => Some(NonMetric::Stone),
_ => None,
}
}
#[derive(Debug, PartialEq)]
enum Token {
Number(String),
Unit(String),
}
enum TokState {
Neutral,
Number,
Unit,
}
fn tokenize(input: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut token = String::new();
let mut state = TokState::Neutral;
for c in input.chars() {
match state {
TokState::Neutral => {
if c.is_ascii_digit() || c == '-' {
token.push(c);
state = TokState::Number;
} else if !c.is_whitespace() {
token.push(c);
state = TokState::Unit;
}
}
TokState::Number => {
if c.is_ascii_digit() ||
c.is_whitespace() ||
c == '.' {
token.push(c);
} else {
tokens.push(Token::Number(token.trim().to_string()));
state = TokState::Unit;
token = String::new();
token.push(c);
}
}
TokState::Unit => {
if c.is_ascii_digit() || c == '-' {
tokens.push(Token::Unit(token));
state = TokState::Number;
token = String::new();
token.push(c);
}
else if !c.is_whitespace() {
token.push(c);
} else {
tokens.push(Token::Unit(token));
state = TokState::Neutral;
token = String::new();
}
}
}
}
match state {
TokState::Neutral => { assert!(token.len() == 0); }
TokState::Number => { tokens.push(Token::Number(token.trim().to_string())); }
TokState::Unit => { tokens.push(Token::Unit(token)); }
}
tokens
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parsing() {
assert_eq!(parse(""), Ok(vec![]));
assert_eq!(parse("5 ft"), Ok(vec![
NonMetricQuantity { amount: 5.0, unit: NonMetric::Foot },
]));
assert_eq!(parse("5 ft 8 in"), Ok(vec![
NonMetricQuantity { amount: 5.0, unit: NonMetric::Foot },
NonMetricQuantity { amount: 8.0, unit: NonMetric::Inch },
]));
assert_eq!(parse("12.0."), Err(ParseError::NotValidNumber("12.0.".to_string())));
assert_eq!(parse("ft"), Err(ParseError::UnexpectedUnit("ft".to_string())));
assert_eq!(parse("5 tf"), Err(ParseError::UnknownUnit("tf".to_string())));
assert_eq!(parse("12"), Err(ParseError::ExpectedUnit));
}
#[test]
fn units() {
// Length
assert_eq!(parse_unit("inch"), Some(NonMetric::Inch));
assert_eq!(parse_unit("inches"), Some(NonMetric::Inch));
assert_eq!(parse_unit("in"), Some(NonMetric::Inch));
assert_eq!(parse_unit("\""), Some(NonMetric::Inch));
assert_eq!(parse_unit(""), Some(NonMetric::Inch));
assert_eq!(parse_unit("foot"), Some(NonMetric::Foot));
assert_eq!(parse_unit("feet"), Some(NonMetric::Foot));
assert_eq!(parse_unit("ft"), Some(NonMetric::Foot));
assert_eq!(parse_unit("'"), Some(NonMetric::Foot));
assert_eq!(parse_unit(""), Some(NonMetric::Foot));
assert_eq!(parse_unit("yard"), Some(NonMetric::Yard));
assert_eq!(parse_unit("yards"), Some(NonMetric::Yard));
assert_eq!(parse_unit("yd"), Some(NonMetric::Yard));
assert_eq!(parse_unit("mile"), Some(NonMetric::Mile));
assert_eq!(parse_unit("miles"), Some(NonMetric::Mile));
assert_eq!(parse_unit("mi"), Some(NonMetric::Mile));
assert_eq!(parse_unit("m"), Some(NonMetric::Mile));
// Weight
assert_eq!(parse_unit("ounce"), Some(NonMetric::Ounce));
assert_eq!(parse_unit("ounces"), Some(NonMetric::Ounce));
assert_eq!(parse_unit("oz"), Some(NonMetric::Ounce));
assert_eq!(parse_unit("pound"), Some(NonMetric::Pound));
assert_eq!(parse_unit("pounds"), Some(NonMetric::Pound));
assert_eq!(parse_unit("lb"), Some(NonMetric::Pound));
assert_eq!(parse_unit("lbs"), Some(NonMetric::Pound));
assert_eq!(parse_unit("#"), Some(NonMetric::Pound));
assert_eq!(parse_unit("stone"), Some(NonMetric::Stone));
assert_eq!(parse_unit("stones"), Some(NonMetric::Stone));
assert_eq!(parse_unit("st"), Some(NonMetric::Stone));
// Unknown unit
assert_eq!(parse_unit("hutenosa"), None);
}
#[test]
fn tokens() {
assert_eq!(tokenize(""), vec![]);
assert_eq!(tokenize("10"), vec![Token::Number("10".to_string())]);
assert_eq!(tokenize(" 10 "), vec![Token::Number("10".to_string())]);
assert_eq!(tokenize("10 000"), vec![Token::Number("10 000".to_string())]);
assert_eq!(tokenize("10.0.1"), vec![Token::Number("10.0.1".to_string())]);
assert_eq!(tokenize("ft"), vec![Token::Unit("ft".to_string())]);
assert_eq!(
tokenize("10 ft"),
vec![
Token::Number("10".to_string()),
Token::Unit("ft".to_string()),
]
);
assert_eq!(
tokenize("ft in"),
vec![
Token::Unit("ft".to_string()),
Token::Unit("in".to_string()),
]
);
assert_eq!(
tokenize("5 ft 7 in"),
vec![
Token::Number("5".to_string()),
Token::Unit("ft".to_string()),
Token::Number("7".to_string()),
Token::Unit("in".to_string()),
]
);
assert_eq!(
tokenize("5\"7'"),
vec![
Token::Number("5".to_string()),
Token::Unit("\"".to_string()),
Token::Number("7".to_string()),
Token::Unit("'".to_string()),
]
);
assert_eq!(
tokenize(" 2.2lbs "),
vec![
Token::Number("2.2".to_string()),
Token::Unit("lbs".to_string()),
]
);
}
}