Skip to content

Instantly share code, notes, and snippets.

@PoignardAzur
Last active May 25, 2022 09:53
Show Gist options
  • Select an option

  • Save PoignardAzur/49fb0b37a4d24d14952d65c59aa4779d to your computer and use it in GitHub Desktop.

Select an option

Save PoignardAzur/49fb0b37a4d24d14952d65c59aa4779d to your computer and use it in GitHub Desktop.
An attempt to write a parser for Venial capable of consuming a list of Rust expressions without actually building a tree. It ended up being more trouble than it was worth.
use crate::{parse_utils::consume_stuff_until, types::Expression};
use proc_macro2::{Delimiter, Spacing, TokenTree};
use std::iter::Peekable;
type TokenIter = Peekable<proc_macro2::token_stream::IntoIter>;
// TODO - write expect_punct_next, expect_ident_next macros
// eg -> expect_punct_next(tokens, '|' | ',');
// TODO - explain assumptions
// asumption: this is an expression and not a statement
// TODO - document better
// TODO - test
// TODO - loop labels
// TODO - async, unsafe, outer attributes
// TODO - 'as' casts
#[derive(Clone, Copy, Debug, PartialEq)]
enum PrevToken {
None,
Expr,
Separator,
Prefix,
// ::
DoubleColon,
// if / while / match
TestKeyword,
// let xxx = / for xxx in
DeclPattern,
}
fn expect_expression_after(prev_token: PrevToken) -> bool {
matches!(
prev_token,
PrevToken::Separator
| PrevToken::Prefix
| PrevToken::TestKeyword
| PrevToken::DeclPattern
| PrevToken::None
)
}
fn consume_decl_pattern(tokens: &mut TokenIter) -> Vec<TokenTree> {
let mut output_tokens = Vec::new();
loop {
let token = tokens.next();
match &token {
Some(TokenTree::Punct(punct)) if punct.as_char() == '=' => {
output_tokens.push(token.unwrap());
break;
}
Some(TokenTree::Ident(ident)) if ident == "in" => {
output_tokens.push(token.unwrap());
break;
}
Some(_) => {
output_tokens.push(token.unwrap());
}
None => panic!("cannot parse declaration pattern: unexpected end of token stream"),
};
}
output_tokens
}
fn consume_closure_params_ret(tokens: &mut TokenIter) -> Vec<TokenTree> {
let mut output_tokens = Vec::new();
let token = tokens.next();
match &token {
Some(TokenTree::Punct(punct)) if punct.as_char() == '|' => {
output_tokens.push(token.unwrap());
}
_ => unreachable!(),
}
loop {
let token = tokens.next();
match &token {
Some(TokenTree::Punct(punct)) if punct.as_char() == '|' => {
output_tokens.push(token.unwrap());
break;
}
Some(_) => {
output_tokens.push(token.unwrap());
}
None => panic!("cannot parse closure: unexpected end of token stream"),
};
}
// This code assumes that the only joint token starting with '-' is '->'.
match tokens.peek() {
Some(TokenTree::Punct(punct))
if punct.as_char() == '-' && punct.spacing() == Spacing::Joint =>
{
output_tokens.push(tokens.next().unwrap());
let next_token = tokens.next().unwrap();
let next_punct = match &next_token {
TokenTree::Punct(punct) => punct.clone(),
_ => panic!("unexpected token {:?}", next_token),
};
assert!(next_punct.as_char() == '>');
output_tokens.push(next_token);
let mut return_type = consume_stuff_until(
tokens,
|token| match token {
TokenTree::Group(group) if group.delimiter() == Delimiter::Brace => true,
_ => false,
},
true,
);
output_tokens.append(&mut return_type);
}
_ => (),
};
output_tokens
}
// Consumes tokens until a comma is reached, except in
// various corner cases related to expression syntax.
// eg consume_expression(...) will consume all
// of `a + |b, c| d, e::<F, G>(), h,` except for the last comma
pub(crate) fn consume_expression(tokens: &mut TokenIter) -> Expression {
let mut output_tokens = Vec::new();
let mut prev_token = PrevToken::None;
loop {
let token = tokens.peek();
prev_token = match &token {
Some(TokenTree::Punct(punct))
if punct.as_char() == ':' && punct.spacing() == Spacing::Joint =>
{
output_tokens.push(tokens.next().unwrap());
let next_token = tokens.next().unwrap();
let next_punct = match &next_token {
TokenTree::Punct(punct) => punct,
_ => panic!("unexpected token {:?}", next_token),
};
assert!(next_punct.as_char() == ':');
output_tokens.push(next_token);
PrevToken::DoubleColon
}
Some(TokenTree::Punct(punct))
if matches!(punct.as_char(), '&' | '*' | '-' | '!')
&& matches!(prev_token, PrevToken::Expr) =>
{
output_tokens.push(tokens.next().unwrap());
PrevToken::Prefix
}
Some(TokenTree::Ident(ident)) if ident == "mut" => {
// 'mut' keyword is only valid in expression after '&'
assert!(matches!(prev_token, PrevToken::Prefix));
output_tokens.push(tokens.next().unwrap());
PrevToken::Prefix
}
Some(TokenTree::Punct(punct)) if punct.as_char() == '?' => {
// '?' is only valid after expression
assert!(matches!(prev_token, PrevToken::Expr));
output_tokens.push(tokens.next().unwrap());
PrevToken::Expr
}
Some(TokenTree::Ident(ident)) if ident == "loop" => {
output_tokens.push(tokens.next().unwrap());
PrevToken::Prefix
}
Some(TokenTree::Ident(ident))
if ident == "if" || ident == "while" || ident == "match" =>
{
output_tokens.push(tokens.next().unwrap());
// We continue parsing. The parse loop will first consume the condition,
// then the block, and continue as normal.
PrevToken::TestKeyword
}
Some(TokenTree::Ident(ident)) if ident == "for" => {
output_tokens.push(tokens.next().unwrap());
let mut decl_contents = consume_decl_pattern(tokens);
output_tokens.append(&mut decl_contents);
PrevToken::DeclPattern
}
Some(TokenTree::Ident(ident)) if ident == "let" => {
// 'let' in expressions is only valid in 'if let' or 'while let'.
assert!(matches!(prev_token, PrevToken::TestKeyword));
let mut decl_contents = consume_decl_pattern(tokens);
output_tokens.append(&mut decl_contents);
PrevToken::DeclPattern
}
Some(TokenTree::Ident(ident)) if ident == "return" || ident == "yield" => {
output_tokens.push(tokens.next().unwrap());
PrevToken::Prefix
}
Some(TokenTree::Ident(ident)) if ident == "break" => {
output_tokens.push(tokens.next().unwrap());
// Check for loop label
let next_token = tokens.peek();
match next_token {
Some(TokenTree::Punct(punct)) if punct.as_char() == '\'' => {
// Consume loop label
output_tokens.push(tokens.next().unwrap());
assert!(matches!(tokens.peek().unwrap(), TokenTree::Ident(_)));
output_tokens.push(tokens.next().unwrap());
}
_ => (),
}
PrevToken::Prefix
}
Some(TokenTree::Ident(ident)) if ident == "continue" => {
output_tokens.push(tokens.next().unwrap());
// Check for loop label
let next_token = tokens.peek();
match next_token {
Some(TokenTree::Punct(punct)) if punct.as_char() == '\'' => {
// Consume loop label
output_tokens.push(tokens.next().unwrap());
assert!(matches!(tokens.peek().unwrap(), TokenTree::Ident(_)));
output_tokens.push(tokens.next().unwrap());
}
_ => (),
}
PrevToken::Prefix
}
// See "NOTES ON RANGE_EXPR" for details
Some(TokenTree::Punct(punct))
if punct.as_char() == '.' && punct.spacing() == Spacing::Joint =>
{
output_tokens.push(tokens.next().unwrap());
let next_token = tokens.next().unwrap();
let next_punct = match &next_token {
TokenTree::Punct(punct) if punct.as_char() == '.' => punct.clone(),
_ => panic!("unexpected token {:?}", next_token),
};
output_tokens.push(next_token);
if next_punct.spacing() == Spacing::Joint {
let next_token = tokens.next().unwrap();
let next_punct = match &next_token {
TokenTree::Punct(punct) => punct.clone(),
_ => panic!("unexpected token {:?}", next_token),
};
output_tokens.push(next_token);
assert!(next_punct.as_char() == '.' || next_punct.as_char() == '=');
PrevToken::Separator
} else if expect_expression_after(prev_token) {
PrevToken::Prefix
} else {
PrevToken::Separator
}
}
Some(TokenTree::Ident(ident)) if ident == "async" => {
// 'async' keyword always starts a closure expression or a block
assert!(expect_expression_after(prev_token));
output_tokens.push(tokens.next().unwrap());
PrevToken::Prefix
}
Some(TokenTree::Ident(ident)) if ident == "move" => {
// 'move' keyword always starts a closure expression
assert!(expect_expression_after(prev_token));
output_tokens.push(tokens.next().unwrap());
PrevToken::Prefix
}
Some(TokenTree::Punct(punct))
if punct.as_char() == '|' && expect_expression_after(prev_token) =>
{
let mut closure_header = consume_closure_params_ret(tokens);
output_tokens.append(&mut closure_header);
PrevToken::Prefix
}
Some(TokenTree::Punct(punct))
if punct.as_char() == '<'
&& (expect_expression_after(prev_token)
|| prev_token == PrevToken::DoubleColon) =>
{
let mut turbofish_contents = consume_stuff_until(tokens, |_| true, false);
output_tokens.append(&mut turbofish_contents);
PrevToken::Expr
}
Some(TokenTree::Punct(punct)) if punct.as_char() == ',' => break,
None => break,
Some(TokenTree::Punct(punct)) => {
let punct = punct.clone();
output_tokens.push(tokens.next().unwrap());
// Except for all cases covered above, we treat joint punctuations
// as a single separator
if punct.spacing() == Spacing::Joint {
let next_token = tokens.next().unwrap();
output_tokens.push(next_token.clone());
match next_token {
TokenTree::Punct(punct) if punct.spacing() == Spacing::Joint => {
output_tokens.push(tokens.next().unwrap());
}
_ => (),
}
}
PrevToken::Separator
}
Some(TokenTree::Ident(_)) => {
output_tokens.push(tokens.next().unwrap());
// Check for macros
let next_token = tokens.peek();
match next_token {
Some(TokenTree::Punct(punct)) if punct.as_char() == '!' => {
// Consume macro call
output_tokens.push(tokens.next().unwrap());
assert!(matches!(tokens.peek().unwrap(), TokenTree::Group(_)));
output_tokens.push(tokens.next().unwrap());
}
_ => (),
}
// TODO - WAIT NONNONONONO
PrevToken::Expr
}
Some(TokenTree::Group(_)) => {
output_tokens.push(tokens.next().unwrap());
PrevToken::Expr
}
Some(TokenTree::Literal(_)) => {
output_tokens.push(tokens.next().unwrap());
PrevToken::Expr
}
};
}
Expression {
tokens: output_tokens,
}
}
// NOTES ON RANGE_EXPR
//
// Range expressions are a bit of a weird case, in that the
// range operator '..' can either be a prefix, a suffix, an
// infix, or *an expression on its own*.
//
// In practice though, if a range is found where an expression
// would be expected, it will be treated as a prefix, *unless*
// it's a the end of the token stream, in which case it will
// be treated as an expression.
//
// Eg the following lines are valid rust syntax:
// ..;
// 0 + ..;
// (..) + 0;
// (..) + ..;
// .. .. .. .. 0;
//
// But the following is invalid syntax:
//
// .. + 0;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment