Last active
May 25, 2022 09:53
-
-
Save PoignardAzur/49fb0b37a4d24d14952d65c59aa4779d to your computer and use it in GitHub Desktop.
An attempt to write a parser for Venial capable of consuming a list of Rust expressions without actually building a tree. It ended up being more trouble than it was worth.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use crate::{parse_utils::consume_stuff_until, types::Expression}; | |
| use proc_macro2::{Delimiter, Spacing, TokenTree}; | |
| use std::iter::Peekable; | |
| type TokenIter = Peekable<proc_macro2::token_stream::IntoIter>; | |
| // TODO - write expect_punct_next, expect_ident_next macros | |
| // eg -> expect_punct_next(tokens, '|' | ','); | |
| // TODO - explain assumptions | |
| // asumption: this is an expression and not a statement | |
| // TODO - document better | |
| // TODO - test | |
| // TODO - loop labels | |
| // TODO - async, unsafe, outer attributes | |
| // TODO - 'as' casts | |
| #[derive(Clone, Copy, Debug, PartialEq)] | |
| enum PrevToken { | |
| None, | |
| Expr, | |
| Separator, | |
| Prefix, | |
| // :: | |
| DoubleColon, | |
| // if / while / match | |
| TestKeyword, | |
| // let xxx = / for xxx in | |
| DeclPattern, | |
| } | |
| fn expect_expression_after(prev_token: PrevToken) -> bool { | |
| matches!( | |
| prev_token, | |
| PrevToken::Separator | |
| | PrevToken::Prefix | |
| | PrevToken::TestKeyword | |
| | PrevToken::DeclPattern | |
| | PrevToken::None | |
| ) | |
| } | |
| fn consume_decl_pattern(tokens: &mut TokenIter) -> Vec<TokenTree> { | |
| let mut output_tokens = Vec::new(); | |
| loop { | |
| let token = tokens.next(); | |
| match &token { | |
| Some(TokenTree::Punct(punct)) if punct.as_char() == '=' => { | |
| output_tokens.push(token.unwrap()); | |
| break; | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "in" => { | |
| output_tokens.push(token.unwrap()); | |
| break; | |
| } | |
| Some(_) => { | |
| output_tokens.push(token.unwrap()); | |
| } | |
| None => panic!("cannot parse declaration pattern: unexpected end of token stream"), | |
| }; | |
| } | |
| output_tokens | |
| } | |
| fn consume_closure_params_ret(tokens: &mut TokenIter) -> Vec<TokenTree> { | |
| let mut output_tokens = Vec::new(); | |
| let token = tokens.next(); | |
| match &token { | |
| Some(TokenTree::Punct(punct)) if punct.as_char() == '|' => { | |
| output_tokens.push(token.unwrap()); | |
| } | |
| _ => unreachable!(), | |
| } | |
| loop { | |
| let token = tokens.next(); | |
| match &token { | |
| Some(TokenTree::Punct(punct)) if punct.as_char() == '|' => { | |
| output_tokens.push(token.unwrap()); | |
| break; | |
| } | |
| Some(_) => { | |
| output_tokens.push(token.unwrap()); | |
| } | |
| None => panic!("cannot parse closure: unexpected end of token stream"), | |
| }; | |
| } | |
| // This code assumes that the only joint token starting with '-' is '->'. | |
| match tokens.peek() { | |
| Some(TokenTree::Punct(punct)) | |
| if punct.as_char() == '-' && punct.spacing() == Spacing::Joint => | |
| { | |
| output_tokens.push(tokens.next().unwrap()); | |
| let next_token = tokens.next().unwrap(); | |
| let next_punct = match &next_token { | |
| TokenTree::Punct(punct) => punct.clone(), | |
| _ => panic!("unexpected token {:?}", next_token), | |
| }; | |
| assert!(next_punct.as_char() == '>'); | |
| output_tokens.push(next_token); | |
| let mut return_type = consume_stuff_until( | |
| tokens, | |
| |token| match token { | |
| TokenTree::Group(group) if group.delimiter() == Delimiter::Brace => true, | |
| _ => false, | |
| }, | |
| true, | |
| ); | |
| output_tokens.append(&mut return_type); | |
| } | |
| _ => (), | |
| }; | |
| output_tokens | |
| } | |
| // Consumes tokens until a comma is reached, except in | |
| // various corner cases related to expression syntax. | |
| // eg consume_expression(...) will consume all | |
| // of `a + |b, c| d, e::<F, G>(), h,` except for the last comma | |
| pub(crate) fn consume_expression(tokens: &mut TokenIter) -> Expression { | |
| let mut output_tokens = Vec::new(); | |
| let mut prev_token = PrevToken::None; | |
| loop { | |
| let token = tokens.peek(); | |
| prev_token = match &token { | |
| Some(TokenTree::Punct(punct)) | |
| if punct.as_char() == ':' && punct.spacing() == Spacing::Joint => | |
| { | |
| output_tokens.push(tokens.next().unwrap()); | |
| let next_token = tokens.next().unwrap(); | |
| let next_punct = match &next_token { | |
| TokenTree::Punct(punct) => punct, | |
| _ => panic!("unexpected token {:?}", next_token), | |
| }; | |
| assert!(next_punct.as_char() == ':'); | |
| output_tokens.push(next_token); | |
| PrevToken::DoubleColon | |
| } | |
| Some(TokenTree::Punct(punct)) | |
| if matches!(punct.as_char(), '&' | '*' | '-' | '!') | |
| && matches!(prev_token, PrevToken::Expr) => | |
| { | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Prefix | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "mut" => { | |
| // 'mut' keyword is only valid in expression after '&' | |
| assert!(matches!(prev_token, PrevToken::Prefix)); | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Prefix | |
| } | |
| Some(TokenTree::Punct(punct)) if punct.as_char() == '?' => { | |
| // '?' is only valid after expression | |
| assert!(matches!(prev_token, PrevToken::Expr)); | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Expr | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "loop" => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Prefix | |
| } | |
| Some(TokenTree::Ident(ident)) | |
| if ident == "if" || ident == "while" || ident == "match" => | |
| { | |
| output_tokens.push(tokens.next().unwrap()); | |
| // We continue parsing. The parse loop will first consume the condition, | |
| // then the block, and continue as normal. | |
| PrevToken::TestKeyword | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "for" => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| let mut decl_contents = consume_decl_pattern(tokens); | |
| output_tokens.append(&mut decl_contents); | |
| PrevToken::DeclPattern | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "let" => { | |
| // 'let' in expressions is only valid in 'if let' or 'while let'. | |
| assert!(matches!(prev_token, PrevToken::TestKeyword)); | |
| let mut decl_contents = consume_decl_pattern(tokens); | |
| output_tokens.append(&mut decl_contents); | |
| PrevToken::DeclPattern | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "return" || ident == "yield" => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Prefix | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "break" => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| // Check for loop label | |
| let next_token = tokens.peek(); | |
| match next_token { | |
| Some(TokenTree::Punct(punct)) if punct.as_char() == '\'' => { | |
| // Consume loop label | |
| output_tokens.push(tokens.next().unwrap()); | |
| assert!(matches!(tokens.peek().unwrap(), TokenTree::Ident(_))); | |
| output_tokens.push(tokens.next().unwrap()); | |
| } | |
| _ => (), | |
| } | |
| PrevToken::Prefix | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "continue" => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| // Check for loop label | |
| let next_token = tokens.peek(); | |
| match next_token { | |
| Some(TokenTree::Punct(punct)) if punct.as_char() == '\'' => { | |
| // Consume loop label | |
| output_tokens.push(tokens.next().unwrap()); | |
| assert!(matches!(tokens.peek().unwrap(), TokenTree::Ident(_))); | |
| output_tokens.push(tokens.next().unwrap()); | |
| } | |
| _ => (), | |
| } | |
| PrevToken::Prefix | |
| } | |
| // See "NOTES ON RANGE_EXPR" for details | |
| Some(TokenTree::Punct(punct)) | |
| if punct.as_char() == '.' && punct.spacing() == Spacing::Joint => | |
| { | |
| output_tokens.push(tokens.next().unwrap()); | |
| let next_token = tokens.next().unwrap(); | |
| let next_punct = match &next_token { | |
| TokenTree::Punct(punct) if punct.as_char() == '.' => punct.clone(), | |
| _ => panic!("unexpected token {:?}", next_token), | |
| }; | |
| output_tokens.push(next_token); | |
| if next_punct.spacing() == Spacing::Joint { | |
| let next_token = tokens.next().unwrap(); | |
| let next_punct = match &next_token { | |
| TokenTree::Punct(punct) => punct.clone(), | |
| _ => panic!("unexpected token {:?}", next_token), | |
| }; | |
| output_tokens.push(next_token); | |
| assert!(next_punct.as_char() == '.' || next_punct.as_char() == '='); | |
| PrevToken::Separator | |
| } else if expect_expression_after(prev_token) { | |
| PrevToken::Prefix | |
| } else { | |
| PrevToken::Separator | |
| } | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "async" => { | |
| // 'async' keyword always starts a closure expression or a block | |
| assert!(expect_expression_after(prev_token)); | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Prefix | |
| } | |
| Some(TokenTree::Ident(ident)) if ident == "move" => { | |
| // 'move' keyword always starts a closure expression | |
| assert!(expect_expression_after(prev_token)); | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Prefix | |
| } | |
| Some(TokenTree::Punct(punct)) | |
| if punct.as_char() == '|' && expect_expression_after(prev_token) => | |
| { | |
| let mut closure_header = consume_closure_params_ret(tokens); | |
| output_tokens.append(&mut closure_header); | |
| PrevToken::Prefix | |
| } | |
| Some(TokenTree::Punct(punct)) | |
| if punct.as_char() == '<' | |
| && (expect_expression_after(prev_token) | |
| || prev_token == PrevToken::DoubleColon) => | |
| { | |
| let mut turbofish_contents = consume_stuff_until(tokens, |_| true, false); | |
| output_tokens.append(&mut turbofish_contents); | |
| PrevToken::Expr | |
| } | |
| Some(TokenTree::Punct(punct)) if punct.as_char() == ',' => break, | |
| None => break, | |
| Some(TokenTree::Punct(punct)) => { | |
| let punct = punct.clone(); | |
| output_tokens.push(tokens.next().unwrap()); | |
| // Except for all cases covered above, we treat joint punctuations | |
| // as a single separator | |
| if punct.spacing() == Spacing::Joint { | |
| let next_token = tokens.next().unwrap(); | |
| output_tokens.push(next_token.clone()); | |
| match next_token { | |
| TokenTree::Punct(punct) if punct.spacing() == Spacing::Joint => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| } | |
| _ => (), | |
| } | |
| } | |
| PrevToken::Separator | |
| } | |
| Some(TokenTree::Ident(_)) => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| // Check for macros | |
| let next_token = tokens.peek(); | |
| match next_token { | |
| Some(TokenTree::Punct(punct)) if punct.as_char() == '!' => { | |
| // Consume macro call | |
| output_tokens.push(tokens.next().unwrap()); | |
| assert!(matches!(tokens.peek().unwrap(), TokenTree::Group(_))); | |
| output_tokens.push(tokens.next().unwrap()); | |
| } | |
| _ => (), | |
| } | |
| // TODO - WAIT NONNONONONO | |
| PrevToken::Expr | |
| } | |
| Some(TokenTree::Group(_)) => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Expr | |
| } | |
| Some(TokenTree::Literal(_)) => { | |
| output_tokens.push(tokens.next().unwrap()); | |
| PrevToken::Expr | |
| } | |
| }; | |
| } | |
| Expression { | |
| tokens: output_tokens, | |
| } | |
| } | |
| // NOTES ON RANGE_EXPR | |
| // | |
| // Range expressions are a bit of a weird case, in that the | |
| // range operator '..' can either be a prefix, a suffix, an | |
| // infix, or *an expression on its own*. | |
| // | |
| // In practice though, if a range is found where an expression | |
| // would be expected, it will be treated as a prefix, *unless* | |
| // it's a the end of the token stream, in which case it will | |
| // be treated as an expression. | |
| // | |
| // Eg the following lines are valid rust syntax: | |
| // ..; | |
| // 0 + ..; | |
| // (..) + 0; | |
| // (..) + ..; | |
| // .. .. .. .. 0; | |
| // | |
| // But the following is invalid syntax: | |
| // | |
| // .. + 0; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment