Split up parser

The structure mostly follows the ast structure, with some slight
changes. Each parser submodule documents which ast submodule it
corresponds to.

This parser is not yet complete, and I have yet to go through its
modules one-by-one to fix and complete them.
This commit is contained in:
Joscha 2022-11-18 20:20:37 +01:00
parent 037a0f69a3
commit a559966c1d
7 changed files with 511 additions and 419 deletions

View file

@ -1,425 +1,16 @@
mod basic;
mod expr;
mod lit;
mod suffix;
mod table_constr;
mod var;
use chumsky::prelude::*;
use chumsky::text::Character;
use crate::ast::{
Expr, Ident, Lit, NumLit, NumLitStr, Space, StringLit, TableConstr, TableConstrElem, TableLit,
TableLitElem,
};
use crate::span::{HasSpan, Span};
use crate::ast::Expr;
type Error = Simple<char, Span>;
// This would probably look a lot nicer with type_alias_impl_trait:
// https://github.com/rust-lang/rust/issues/63063
fn space() -> impl Parser<char, Space, Error = Error> {
// TODO Parse comments
text::whitespace().map_with_span(|(), span| Space {
comment: vec![],
span,
})
}
fn ident() -> impl Parser<char, Ident, Error = Error> {
// TODO Forbid keywords
text::ident().map_with_span(|name, span| Ident { name, span })
}
fn num_lit_str_radix(radix: u32) -> impl Parser<char, (i64, NumLitStr), Error = Error> + Clone {
// Minimum amount of digits required to represent i64::MAX. The rest of this
// code assumes that any value that can be represented using this amount of
// digits fits into an u64.
let max_digits = match radix {
2 => 63,
10 => 19,
16 => 16,
_ => panic!("unsupported radix"),
};
// Representations of i64::MAX.
let max_value = match radix {
2 => "0b_1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111",
10 => "9_223_372_036_854_775_807",
16 => "0x_7fff_ffff_ffff_ffff",
_ => panic!("unsupported radix"),
};
let constructor = match radix {
2 => NumLitStr::Bin,
10 => NumLitStr::Dec,
16 => NumLitStr::Hex,
_ => panic!("unsupported radix"),
};
filter(move |c: &char| c.is_digit(radix) || *c == '_')
.repeated()
.at_least(1)
.collect::<String>()
.try_map(move |s, span| {
let digits = s.chars().filter(|c| *c != '_').collect::<String>();
if digits.is_empty() {
let msg = "integer literal needs to contain at least one digit";
return Err(Simple::custom(span, msg));
} else if digits.len() > max_digits {
let msg = format!("integer literal too large, the maximum value is {max_value}");
return Err(Simple::custom(span, msg));
}
let value = u64::from_str_radix(&digits, radix).unwrap();
if value <= i64::MAX as u64 {
Ok((value as i64, constructor(s)))
} else {
let msg = format!("integer literal too large, the maximum value is {max_value}");
Err(Simple::custom(span, msg))
}
})
}
fn num_lit() -> impl Parser<char, NumLit, Error = Error> + Clone {
(just("0b").ignore_then(num_lit_str_radix(2)))
.or(just("0x").ignore_then(num_lit_str_radix(16)))
.or(num_lit_str_radix(10))
.map_with_span(|(value, str), span| NumLit { value, str, span })
}
fn string_lit() -> impl Parser<char, StringLit, Error = Error> {
// TODO Parse string literals
filter(|_| false).map(|_| unreachable!())
}
fn table_lit_elem(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, TableLitElem, Error = Error> {
let positional = expr
.clone()
.map(|value| TableLitElem::Positional(Box::new(value)));
let named = ident()
.then(space())
.then_ignore(just(':'))
.then(space())
.then(expr)
.map(|(((name, s0), s1), value)| TableLitElem::Named {
name,
s0,
s1,
value: Box::new(value),
});
named.or(positional)
}
fn table_lit(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, TableLit, Error = Error> {
let elem = space()
.then(table_lit_elem(expr))
.then(space())
.map(|((s0, elem), s1)| (s0, elem, s1));
let trailing_comma = just(",").ignore_then(space()).or_not();
let elems = elem.separated_by(just(",")).then(trailing_comma);
just("'{")
.ignore_then(elems)
.then_ignore(just("}"))
.map_with_span(|(elems, trailing_comma), span| TableLit {
elems,
trailing_comma,
span,
})
}
fn lit(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Lit, Error = Error> {
let nil = text::keyword("nil").map_with_span(|_, span| Lit::Nil(span));
let r#true = text::keyword("true").map_with_span(|_, span| Lit::Bool(true, span));
let r#false = text::keyword("false").map_with_span(|_, span| Lit::Bool(false, span));
let num = num_lit().map(Lit::Num);
let string = string_lit().map(Lit::String);
let table = table_lit(expr).map(Lit::Table);
nil.or(r#true).or(r#false).or(num).or(string).or(table)
}
fn table_constr_elem(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, TableConstrElem, Error = Error> {
let lit = table_lit_elem(expr.clone()).map(TableConstrElem::Lit);
let indexed = just("[")
.ignore_then(space())
.then(expr.clone())
.then(space())
.then_ignore(just("]"))
.then(space())
.then_ignore(just(":"))
.then(space())
.then(expr)
.map_with_span(
|(((((s0, index), s1), s2), s3), value), span| TableConstrElem::Indexed {
s0,
index: Box::new(index),
s1,
s2,
s3,
value: Box::new(value),
span,
},
);
indexed.or(lit)
}
fn table_constr(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, TableConstr, Error = Error> {
let elem = space()
.then(table_constr_elem(expr))
.then(space())
.map(|((s0, elem), s1)| (s0, elem, s1));
let trailing_comma = just(",").ignore_then(space()).or_not();
let elems = elem.separated_by(just(",")).then(trailing_comma);
just("{")
.ignore_then(elems)
.then_ignore(just("}"))
.map_with_span(|(elems, trailing_comma), span| TableConstr {
elems,
trailing_comma,
span,
})
}
fn atom_paren(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Expr, Error = Error> {
just("(")
.ignore_then(space())
.then(expr)
.then(space())
.then_ignore(just(")"))
.map_with_span(|((s0, inner), s1), span| Expr::Paren {
s0,
inner: Box::new(inner),
s1,
span,
})
}
fn atom_var(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Expr, Error = Error> {
just("[")
.ignore_then(space())
.then(expr)
.then(space())
.then_ignore(just("]"))
.map_with_span(|((s0, index), s1), span| Expr::Var {
s0,
index: Box::new(index),
s1,
span,
})
}
fn atom_var_assign(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Expr, Error = Error> {
just("[")
.ignore_then(space())
.then(expr.clone())
.then(space())
.then_ignore(just("]"))
.then(space())
.then_ignore(just("="))
.then(space())
.then(expr)
.map_with_span(
|(((((s0, index), s1), s2), s3), value), span| Expr::VarAssign {
s0,
index: Box::new(index),
s1,
s2,
s3,
value: Box::new(value),
span,
},
)
}
fn atom_var_ident_assign(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Expr, Error = Error> {
ident()
.then(space())
.then_ignore(just("="))
.then(space())
.then(expr)
.map(|(((name, s0), s1), value)| Expr::VarIdentAssign {
name,
s0,
s1,
value: Box::new(value),
})
}
fn atom(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Expr, Error = Error> {
let lit = lit(expr.clone()).map(Expr::Lit);
let paren = atom_paren(expr.clone());
let table_constr = table_constr(expr.clone()).map(Expr::TableConstr);
let var = atom_var(expr.clone());
let var_ident = ident().map(Expr::VarIdent);
let var_assign = atom_var_assign(expr.clone());
let var_ident_assign = atom_var_ident_assign(expr);
lit.or(paren)
.or(table_constr)
.or(var_assign)
.or(var)
.or(var_ident_assign)
.or(var_ident)
}
enum Suffix {
/// See [`Expr::Field`].
Field {
s0: Space,
s1: Space,
index: Box<Expr>,
s2: Space,
span: Span,
},
/// See [`Expr::FieldIdent`].
FieldIdent { s0: Space, s1: Space, ident: Ident },
/// See [`Expr::FieldAssign`].
FieldAssign {
s0: Space,
s1: Space,
index: Box<Expr>,
s2: Space,
s3: Space,
s4: Space,
value: Box<Expr>,
},
/// See [`Expr::FieldIdentAssign`].
FieldIdentAssign {
s0: Space,
s1: Space,
ident: Ident,
s2: Space,
s3: Space,
value: Box<Expr>,
},
}
impl Suffix {
fn into_expr(self, expr: Expr) -> Expr {
let expr = Box::new(expr);
match self {
Suffix::Field {
s0,
s1,
index,
s2,
span,
} => Expr::Field {
span: expr.span().join(span),
expr,
s0,
s1,
index,
s2,
},
Suffix::FieldIdent { s0, s1, ident } => Expr::FieldIdent {
expr,
s0,
s1,
ident,
},
Suffix::FieldAssign {
s0,
s1,
index,
s2,
s3,
s4,
value,
} => Expr::FieldAssign {
expr,
s0,
s1,
index,
s2,
s3,
s4,
value,
},
Suffix::FieldIdentAssign {
s0,
s1,
ident,
s2,
s3,
value,
} => Expr::FieldIdentAssign {
expr,
s0,
s1,
ident,
s2,
s3,
value,
},
}
}
}
fn suffix_field(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Suffix, Error = Error> {
space()
.then_ignore(just("["))
.then(space())
.then(expr)
.then(space())
.then_ignore(just("]"))
.map_with_span(|(((s0, s1), index), s2), span| Suffix::Field {
s0,
s1,
index: Box::new(index),
s2,
span,
})
}
fn suffixed(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Expr, Error = Error> {
let suffix_field = suffix_field(expr.clone());
let suffix = suffix_field;
atom(expr)
.then(suffix.repeated())
.foldl(|expr, suffix| suffix.into_expr(expr))
}
fn expr(
expr: impl Parser<char, Expr, Error = Error> + Clone,
) -> impl Parser<char, Expr, Error = Error> {
suffixed(expr)
}
use self::basic::Error;
pub fn parser() -> impl Parser<char, Expr, Error = Error> {
recursive(expr).padded().then_ignore(end())
recursive(expr::expr).padded().then_ignore(end())
}