tada/src/parser/lit.rs

193 lines
6.3 KiB
Rust

//! Corresponds to `ast::lit`.
use chumsky::prelude::*;
use crate::ast::{
Expr, Ident, Lit, NumLit, NumLitStr, Space, StringLit, StringLitElem, TableLit, TableLitElem,
};
use crate::builtin::Builtin;
use super::basic::{EParser, Error};
fn builtin_lit() -> impl Parser<char, Builtin, Error = Error> {
just('\'').ignore_then(choice((
text::keyword("get").to(Builtin::Get),
text::keyword("set").to(Builtin::Set),
text::keyword("getraw").to(Builtin::GetRaw),
text::keyword("setraw").to(Builtin::SetRaw),
text::keyword("getmeta").to(Builtin::GetMeta),
text::keyword("setmeta").to(Builtin::SetMeta),
text::keyword("scope").to(Builtin::Scope),
text::keyword("arg").to(Builtin::Arg),
text::keyword("destructure").to(Builtin::Destructure),
)))
}
fn num_lit_str_radix(radix: u32) -> impl Parser<char, (i64, NumLitStr), Error = Error> {
// Minimum amount of digits required to represent i64::MAX. The rest of this
// code assumes that any value that can be represented using this amount of
// digits fits into an u64.
let max_digits = match radix {
2 => 63,
10 => 19,
16 => 16,
_ => panic!("unsupported radix"),
};
// Representations of i64::MAX.
let max_value = match radix {
2 => "0b_1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111",
10 => "9_223_372_036_854_775_807",
16 => "0x_7fff_ffff_ffff_ffff",
_ => panic!("unsupported radix"),
};
let constructor = match radix {
2 => NumLitStr::Bin,
10 => NumLitStr::Dec,
16 => NumLitStr::Hex,
_ => panic!("unsupported radix"),
};
filter(move |c: &char| c.is_digit(radix) || *c == '_')
.repeated()
.at_least(1)
.collect::<String>()
.try_map(move |s, span| {
let digits = s.chars().filter(|c| *c != '_').collect::<String>();
if digits.is_empty() {
let msg = "integer literal needs to contain at least one digit";
return Err(Simple::custom(span, msg));
} else if digits.len() > max_digits {
let msg = format!("integer literal too large, the maximum value is {max_value}");
return Err(Simple::custom(span, msg));
}
let value = u64::from_str_radix(&digits, radix).unwrap();
if value <= i64::MAX as u64 {
Ok((value as i64, constructor(s)))
} else {
let msg = format!("integer literal too large, the maximum value is {max_value}");
Err(Simple::custom(span, msg))
}
})
}
fn num_lit() -> impl Parser<char, NumLit, Error = Error> {
(just("0b").ignore_then(num_lit_str_radix(2)))
.or(just("0x").ignore_then(num_lit_str_radix(16)))
.or(num_lit_str_radix(10))
.map_with_span(|(value, str), span| NumLit { value, str, span })
}
fn string_lit_elem() -> impl Parser<char, StringLitElem, Error = Error> {
let plain = filter(|c: &char| !matches!(c, '\\' | '"' | '\t' | '\r' | '\n'))
.repeated()
.at_least(1)
.collect::<String>()
.map(StringLitElem::Plain);
// The maximum unicode codepoint is 10ffff, which has 6 digits.
let unicode_char = filter(|c: &char| c.is_ascii_hexdigit())
.repeated()
.at_least(1)
.at_most(6)
.collect::<String>()
.try_map(|str, span| {
let msg = "not a valid unicode code point";
let n = u32::from_str_radix(&str, 16).unwrap();
let c: char = n.try_into().map_err(|_| Simple::custom(span, msg))?;
Ok(c)
});
let unicode = just("\\u{")
.ignore_then(unicode_char)
.then_ignore(just('}'))
.map(StringLitElem::Unicode);
let backslash = just("\\\\").to(StringLitElem::Backslash);
let double_quote = just("\\\"").to(StringLitElem::DoubleQuote);
let tab = just("\\t").to(StringLitElem::Tab);
let carriage_return = just("\\r").to(StringLitElem::CarriageReturn);
let newline = just("\\n").to(StringLitElem::Newline);
plain
.or(unicode)
.or(backslash)
.or(double_quote)
.or(tab)
.or(carriage_return)
.or(newline)
}
fn string_lit() -> impl Parser<char, StringLit, Error = Error> {
string_lit_elem()
.repeated()
.delimited_by(just('"'), just('"'))
.map_with_span(|elems, span| StringLit { elems, span })
}
pub fn table_lit_elem(
space: EParser<Space>,
ident: EParser<Ident>,
expr: EParser<Expr>,
) -> EParser<TableLitElem> {
let positional = expr
.clone()
.map(|value| TableLitElem::Positional(Box::new(value)));
let named = ident
.then(space.clone())
.then_ignore(just(':'))
.then(space)
.then(expr)
.map_with_span(|(((name, s0), s1), value), span| TableLitElem::Named {
name,
s0,
s1,
value: Box::new(value),
span,
});
named.or(positional).boxed()
}
fn table_lit(
space: EParser<Space>,
table_lit_elem: EParser<TableLitElem>,
) -> impl Parser<char, TableLit, Error = Error> {
let elem = space
.clone()
.then(table_lit_elem)
.then(space.clone())
.map(|((s0, elem), s1)| (s0, elem, s1));
let trailing_comma = just(',').ignore_then(space).or_not();
let elems = elem.separated_by(just(',')).then(trailing_comma);
just("'{")
.ignore_then(elems)
.then_ignore(just('}'))
.map_with_span(|(elems, trailing_comma), span| TableLit {
elems,
trailing_comma,
span,
})
}
pub fn lit(space: EParser<Space>, table_lit_elem: EParser<TableLitElem>) -> EParser<Lit> {
let nil = text::keyword("nil").map_with_span(|_, span| Lit::Nil(span));
let r#true = text::keyword("true").map_with_span(|_, span| Lit::Bool(true, span));
let r#false = text::keyword("false").map_with_span(|_, span| Lit::Bool(false, span));
let builtin = builtin_lit().map_with_span(Lit::Builtin);
let num = num_lit().map(Lit::Num);
let string = string_lit().map(Lit::String);
let table = table_lit(space, table_lit_elem).map(Lit::Table);
nil.or(r#true)
.or(r#false)
.or(builtin)
.or(num)
.or(string)
.or(table)
.boxed()
}