Last active
March 1, 2020 04:07
-
-
Save anirudhb/377dfc5f6514eb2ce3d8146acaa8003b to your computer and use it in GitHub Desktop.
Format string parser in Rust!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! Just for prototyping. I'll move this to lib later I promise! | |
// Pulls in IResult | |
use nom::IResult; | |
// Pulls in some useful pre-defined parsers. | |
use nom::{ | |
bytes::complete::{tag, take_while, take_while_m_n}, | |
character::{ | |
complete::{alpha1, digit1}, | |
is_alphabetic, is_digit, | |
}, | |
sequence::tuple, | |
}; | |
// Explanation: | |
// Takes a string to parse | |
// and spits out two new strings: the part that it parsed, and the part | |
// left over. If it fails it will return an Err as opposed to an Ok. | |
// ^ That applies to all parsers that return something with 'IResult' | |
// | |
// 'tag' is a pre-defined parser that simply parses a constant string. | |
// Note: the first '&str' in 'IResult<&str, &str>' is the remaining | |
// input, the other type can be anything else! | |
// 'tuple' chains together multiple parsers at once. | |
// 'alpha1' matches one or more letters. | |
// 'digit1' mathces one or more digits. | |
// 'is_alphabetic' checks if a character is a letter. | |
// 'is_digit' checks if a character is a digit. | |
/// This parses the single character '{' | |
fn open_brace(i: &str) -> IResult<&str, &str> { tag("{")(i) } | |
/// This parses the single character '}' | |
fn close_brace(i: &str) -> IResult<&str, &str> { tag("}")(i) } | |
/// This parses the single character ':' | |
fn colon(i: &str) -> IResult<&str, &str> { tag(":")(i) } | |
/// This checks if the character is '_' | |
fn is_underscore(c: char) -> bool { c == '_' } | |
/// This combines 'is_alphabetic', 'is_digit' and 'is_underscore' into | |
/// one. | |
fn is_ident_char(c: char) -> bool | |
{ | |
is_ident_begin_char(c) || is_digit(c as u8) | |
} | |
/// This checks whether this is a valid character to begin an | |
/// identifier. | |
fn is_ident_begin_char(c: char) -> bool | |
{ | |
is_alphabetic(c as u8) || is_underscore(c) | |
} | |
/// This parses identifiers: | |
/// one letter then | |
/// zero or more letters/numbers/'_'s | |
/// The return type is String not &str since we have to join two strings | |
/// together. | |
fn ident(i: &str) -> IResult<&str, String> | |
{ | |
let (remaining, (a, rest)) = tuple(( | |
take_while_m_n(1, 1, is_ident_begin_char), | |
take_while(is_ident_char), | |
))(i)?; | |
// Add a and rest together | |
let full = format!("{}{}", a, rest); | |
Ok((remaining, full)) | |
} | |
/// This parses a full format string! | |
/// See, it returns a FormatString not a &str! | |
fn format_string(i: &str) -> IResult<&str, FormatString> | |
{ | |
// P.S. '_' discards the value in that position | |
let (remaining, (_, name, _, bytes, format_specifier, _)) = tuple( | |
(open_brace, ident, colon, digit1, alpha1, close_brace), | |
)(i)?; | |
// Now we have to parse the number-string into a number | |
// We panic if it fails since it should never fail | |
// (that is what 'unwrap' does) | |
let bytes_num = bytes.parse::<u64>().unwrap(); | |
// Now we create our struct and respond with success! | |
// 'to_owned' makes a copy of a string. | |
let fs = FormatString { | |
name, /* to_owned not necessary since 'ident' already returns | |
* String */ | |
bytes: bytes_num, | |
format_specifier: format_specifier.to_owned(), | |
}; | |
return Ok((remaining, fs)); | |
} | |
/// Represents one format string (i.e. '{size:4u}' -> 32-bit positive | |
/// number called 'size') | |
/// What does v (that) do? It makes it so we can print the content of | |
/// the struct! | |
#[derive(Debug)] | |
struct FormatString | |
{ | |
name: String, | |
bytes: u64, | |
format_specifier: String, | |
} | |
// Return type is a catch all for any kind of error | |
fn main() -> std::result::Result<(), Box<dyn std::error::Error>> | |
{ | |
// Running the parser on three different strings | |
// The '?' makes it so if there was an error in the function, | |
// we simply return from main with that error. | |
// Now it is time to test! | |
// We will just test with the input provided. | |
// Read input to parse and bail out if error | |
let mut i = String::new(); | |
std::io::stdin().read_line(&mut i)?; | |
// Parse it and print if possible, otherwise print the error | |
match format_string(&i) | |
{ | |
Ok((_, fs)) => | |
{ | |
println!("yayyyyy it parsed!"); | |
println!("{:#?}", fs); | |
}, | |
Err(e) => | |
{ | |
println!("Aw it failed to parse :("); | |
println!("{:#?}", e); | |
}, | |
} | |
// No error (because we caught it!) | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment