diff --git a/src/parser.rs b/src/parser.rs index 770f24f..6f00075 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,425 +1,16 @@ +mod basic; +mod expr; +mod lit; +mod suffix; +mod table_constr; +mod var; + use chumsky::prelude::*; -use chumsky::text::Character; -use crate::ast::{ - Expr, Ident, Lit, NumLit, NumLitStr, Space, StringLit, TableConstr, TableConstrElem, TableLit, - TableLitElem, -}; -use crate::span::{HasSpan, Span}; +use crate::ast::Expr; -type Error = Simple; - -// This would probably look a lot nicer with type_alias_impl_trait: -// https://github.com/rust-lang/rust/issues/63063 - -fn space() -> impl Parser { - // TODO Parse comments - text::whitespace().map_with_span(|(), span| Space { - comment: vec![], - span, - }) -} - -fn ident() -> impl Parser { - // TODO Forbid keywords - text::ident().map_with_span(|name, span| Ident { name, span }) -} - -fn num_lit_str_radix(radix: u32) -> impl Parser + Clone { - // Minimum amount of digits required to represent i64::MAX. The rest of this - // code assumes that any value that can be represented using this amount of - // digits fits into an u64. - let max_digits = match radix { - 2 => 63, - 10 => 19, - 16 => 16, - _ => panic!("unsupported radix"), - }; - - // Representations of i64::MAX. - let max_value = match radix { - 2 => "0b_1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111", - 10 => "9_223_372_036_854_775_807", - 16 => "0x_7fff_ffff_ffff_ffff", - _ => panic!("unsupported radix"), - }; - - let constructor = match radix { - 2 => NumLitStr::Bin, - 10 => NumLitStr::Dec, - 16 => NumLitStr::Hex, - _ => panic!("unsupported radix"), - }; - - filter(move |c: &char| c.is_digit(radix) || *c == '_') - .repeated() - .at_least(1) - .collect::() - .try_map(move |s, span| { - let digits = s.chars().filter(|c| *c != '_').collect::(); - if digits.is_empty() { - let msg = "integer literal needs to contain at least one digit"; - return Err(Simple::custom(span, msg)); - } else if digits.len() > max_digits { - let msg = format!("integer literal too large, the maximum value is {max_value}"); - return Err(Simple::custom(span, msg)); - } - - let value = u64::from_str_radix(&digits, radix).unwrap(); - if value <= i64::MAX as u64 { - Ok((value as i64, constructor(s))) - } else { - let msg = format!("integer literal too large, the maximum value is {max_value}"); - Err(Simple::custom(span, msg)) - } - }) -} - -fn num_lit() -> impl Parser + Clone { - (just("0b").ignore_then(num_lit_str_radix(2))) - .or(just("0x").ignore_then(num_lit_str_radix(16))) - .or(num_lit_str_radix(10)) - .map_with_span(|(value, str), span| NumLit { value, str, span }) -} - -fn string_lit() -> impl Parser { - // TODO Parse string literals - filter(|_| false).map(|_| unreachable!()) -} - -fn table_lit_elem( - expr: impl Parser + Clone, -) -> impl Parser { - let positional = expr - .clone() - .map(|value| TableLitElem::Positional(Box::new(value))); - - let named = ident() - .then(space()) - .then_ignore(just(':')) - .then(space()) - .then(expr) - .map(|(((name, s0), s1), value)| TableLitElem::Named { - name, - s0, - s1, - value: Box::new(value), - }); - - named.or(positional) -} - -fn table_lit( - expr: impl Parser + Clone, -) -> impl Parser { - let elem = space() - .then(table_lit_elem(expr)) - .then(space()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(",").ignore_then(space()).or_not(); - - let elems = elem.separated_by(just(",")).then(trailing_comma); - - just("'{") - .ignore_then(elems) - .then_ignore(just("}")) - .map_with_span(|(elems, trailing_comma), span| TableLit { - elems, - trailing_comma, - span, - }) -} - -fn lit( - expr: impl Parser + Clone, -) -> impl Parser { - let nil = text::keyword("nil").map_with_span(|_, span| Lit::Nil(span)); - let r#true = text::keyword("true").map_with_span(|_, span| Lit::Bool(true, span)); - let r#false = text::keyword("false").map_with_span(|_, span| Lit::Bool(false, span)); - let num = num_lit().map(Lit::Num); - let string = string_lit().map(Lit::String); - let table = table_lit(expr).map(Lit::Table); - - nil.or(r#true).or(r#false).or(num).or(string).or(table) -} - -fn table_constr_elem( - expr: impl Parser + Clone, -) -> impl Parser { - let lit = table_lit_elem(expr.clone()).map(TableConstrElem::Lit); - - let indexed = just("[") - .ignore_then(space()) - .then(expr.clone()) - .then(space()) - .then_ignore(just("]")) - .then(space()) - .then_ignore(just(":")) - .then(space()) - .then(expr) - .map_with_span( - |(((((s0, index), s1), s2), s3), value), span| TableConstrElem::Indexed { - s0, - index: Box::new(index), - s1, - s2, - s3, - value: Box::new(value), - span, - }, - ); - - indexed.or(lit) -} - -fn table_constr( - expr: impl Parser + Clone, -) -> impl Parser { - let elem = space() - .then(table_constr_elem(expr)) - .then(space()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(",").ignore_then(space()).or_not(); - - let elems = elem.separated_by(just(",")).then(trailing_comma); - - just("{") - .ignore_then(elems) - .then_ignore(just("}")) - .map_with_span(|(elems, trailing_comma), span| TableConstr { - elems, - trailing_comma, - span, - }) -} - -fn atom_paren( - expr: impl Parser + Clone, -) -> impl Parser { - just("(") - .ignore_then(space()) - .then(expr) - .then(space()) - .then_ignore(just(")")) - .map_with_span(|((s0, inner), s1), span| Expr::Paren { - s0, - inner: Box::new(inner), - s1, - span, - }) -} - -fn atom_var( - expr: impl Parser + Clone, -) -> impl Parser { - just("[") - .ignore_then(space()) - .then(expr) - .then(space()) - .then_ignore(just("]")) - .map_with_span(|((s0, index), s1), span| Expr::Var { - s0, - index: Box::new(index), - s1, - span, - }) -} - -fn atom_var_assign( - expr: impl Parser + Clone, -) -> impl Parser { - just("[") - .ignore_then(space()) - .then(expr.clone()) - .then(space()) - .then_ignore(just("]")) - .then(space()) - .then_ignore(just("=")) - .then(space()) - .then(expr) - .map_with_span( - |(((((s0, index), s1), s2), s3), value), span| Expr::VarAssign { - s0, - index: Box::new(index), - s1, - s2, - s3, - value: Box::new(value), - span, - }, - ) -} - -fn atom_var_ident_assign( - expr: impl Parser + Clone, -) -> impl Parser { - ident() - .then(space()) - .then_ignore(just("=")) - .then(space()) - .then(expr) - .map(|(((name, s0), s1), value)| Expr::VarIdentAssign { - name, - s0, - s1, - value: Box::new(value), - }) -} - -fn atom( - expr: impl Parser + Clone, -) -> impl Parser { - let lit = lit(expr.clone()).map(Expr::Lit); - let paren = atom_paren(expr.clone()); - let table_constr = table_constr(expr.clone()).map(Expr::TableConstr); - let var = atom_var(expr.clone()); - let var_ident = ident().map(Expr::VarIdent); - let var_assign = atom_var_assign(expr.clone()); - let var_ident_assign = atom_var_ident_assign(expr); - - lit.or(paren) - .or(table_constr) - .or(var_assign) - .or(var) - .or(var_ident_assign) - .or(var_ident) -} - -enum Suffix { - /// See [`Expr::Field`]. - Field { - s0: Space, - s1: Space, - index: Box, - s2: Space, - span: Span, - }, - - /// See [`Expr::FieldIdent`]. - FieldIdent { s0: Space, s1: Space, ident: Ident }, - - /// See [`Expr::FieldAssign`]. - FieldAssign { - s0: Space, - s1: Space, - index: Box, - s2: Space, - s3: Space, - s4: Space, - value: Box, - }, - - /// See [`Expr::FieldIdentAssign`]. - FieldIdentAssign { - s0: Space, - s1: Space, - ident: Ident, - s2: Space, - s3: Space, - value: Box, - }, -} - -impl Suffix { - fn into_expr(self, expr: Expr) -> Expr { - let expr = Box::new(expr); - match self { - Suffix::Field { - s0, - s1, - index, - s2, - span, - } => Expr::Field { - span: expr.span().join(span), - expr, - s0, - s1, - index, - s2, - }, - Suffix::FieldIdent { s0, s1, ident } => Expr::FieldIdent { - expr, - s0, - s1, - ident, - }, - Suffix::FieldAssign { - s0, - s1, - index, - s2, - s3, - s4, - value, - } => Expr::FieldAssign { - expr, - s0, - s1, - index, - s2, - s3, - s4, - value, - }, - Suffix::FieldIdentAssign { - s0, - s1, - ident, - s2, - s3, - value, - } => Expr::FieldIdentAssign { - expr, - s0, - s1, - ident, - s2, - s3, - value, - }, - } - } -} - -fn suffix_field( - expr: impl Parser + Clone, -) -> impl Parser { - space() - .then_ignore(just("[")) - .then(space()) - .then(expr) - .then(space()) - .then_ignore(just("]")) - .map_with_span(|(((s0, s1), index), s2), span| Suffix::Field { - s0, - s1, - index: Box::new(index), - s2, - span, - }) -} - -fn suffixed( - expr: impl Parser + Clone, -) -> impl Parser { - let suffix_field = suffix_field(expr.clone()); - - let suffix = suffix_field; - - atom(expr) - .then(suffix.repeated()) - .foldl(|expr, suffix| suffix.into_expr(expr)) -} - -fn expr( - expr: impl Parser + Clone, -) -> impl Parser { - suffixed(expr) -} +use self::basic::Error; pub fn parser() -> impl Parser { - recursive(expr).padded().then_ignore(end()) + recursive(expr::expr).padded().then_ignore(end()) } diff --git a/src/parser/basic.rs b/src/parser/basic.rs new file mode 100644 index 0000000..971d558 --- /dev/null +++ b/src/parser/basic.rs @@ -0,0 +1,23 @@ +//! Corresponds to `ast::basic`. + +use chumsky::prelude::*; + +use crate::ast::{Ident, Space}; +use crate::span::Span; + +pub type Error = Simple; + +// TODO https://github.com/rust-lang/rust/issues/63063 + +pub fn space() -> impl Parser { + // TODO Parse comments + text::whitespace().map_with_span(|(), span| Space { + comment: vec![], + span, + }) +} + +pub fn ident() -> impl Parser { + // TODO Forbid keywords + text::ident().map_with_span(|name, span| Ident { name, span }) +} diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 0000000..0c0af2e --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,44 @@ +//! Corresponds to `ast::expr`. + +use chumsky::prelude::*; + +use crate::ast::Expr; + +use super::basic::{space, Error}; +use super::lit::lit; +use super::suffix::suffixed; +use super::table_constr::table_constr; +use super::var::var; + +fn atom_paren( + expr: impl Parser + Clone, +) -> impl Parser { + just("(") + .ignore_then(space()) + .then(expr) + .then(space()) + .then_ignore(just(")")) + .map_with_span(|((s0, inner), s1), span| Expr::Paren { + s0, + inner: Box::new(inner), + s1, + span, + }) +} + +fn atom( + expr: impl Parser + Clone, +) -> impl Parser { + let lit = lit(expr.clone()).map(Expr::Lit); + let paren = atom_paren(expr.clone()); + let table_constr = table_constr(expr.clone()).map(Expr::TableConstr); + let var = var(expr).map(Expr::Var); + + lit.or(paren).or(table_constr).or(var) +} + +pub fn expr( + expr: impl Parser + Clone, +) -> impl Parser { + suffixed(atom(expr.clone()), expr) +} diff --git a/src/parser/lit.rs b/src/parser/lit.rs new file mode 100644 index 0000000..29b7ab5 --- /dev/null +++ b/src/parser/lit.rs @@ -0,0 +1,127 @@ +//! Corresponds to `ast::lit`. + +use chumsky::prelude::*; + +use crate::ast::{Expr, Lit, NumLit, NumLitStr, StringLit, TableLit, TableLitElem}; + +use super::basic::{ident, space, Error}; + +fn num_lit_str_radix(radix: u32) -> impl Parser + Clone { + // Minimum amount of digits required to represent i64::MAX. The rest of this + // code assumes that any value that can be represented using this amount of + // digits fits into an u64. + let max_digits = match radix { + 2 => 63, + 10 => 19, + 16 => 16, + _ => panic!("unsupported radix"), + }; + + // Representations of i64::MAX. + let max_value = match radix { + 2 => "0b_1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111", + 10 => "9_223_372_036_854_775_807", + 16 => "0x_7fff_ffff_ffff_ffff", + _ => panic!("unsupported radix"), + }; + + let constructor = match radix { + 2 => NumLitStr::Bin, + 10 => NumLitStr::Dec, + 16 => NumLitStr::Hex, + _ => panic!("unsupported radix"), + }; + + filter(move |c: &char| c.is_digit(radix) || *c == '_') + .repeated() + .at_least(1) + .collect::() + .try_map(move |s, span| { + let digits = s.chars().filter(|c| *c != '_').collect::(); + if digits.is_empty() { + let msg = "integer literal needs to contain at least one digit"; + return Err(Simple::custom(span, msg)); + } else if digits.len() > max_digits { + let msg = format!("integer literal too large, the maximum value is {max_value}"); + return Err(Simple::custom(span, msg)); + } + + let value = u64::from_str_radix(&digits, radix).unwrap(); + if value <= i64::MAX as u64 { + Ok((value as i64, constructor(s))) + } else { + let msg = format!("integer literal too large, the maximum value is {max_value}"); + Err(Simple::custom(span, msg)) + } + }) +} + +pub fn num_lit() -> impl Parser + Clone { + (just("0b").ignore_then(num_lit_str_radix(2))) + .or(just("0x").ignore_then(num_lit_str_radix(16))) + .or(num_lit_str_radix(10)) + .map_with_span(|(value, str), span| NumLit { value, str, span }) +} + +pub fn string_lit() -> impl Parser { + // TODO Parse string literals + filter(|_| false).map(|_| unreachable!()) +} + +pub fn table_lit_elem( + expr: impl Parser + Clone, +) -> impl Parser { + let positional = expr + .clone() + .map(|value| TableLitElem::Positional(Box::new(value))); + + let named = ident() + .then(space()) + .then_ignore(just(':')) + .then(space()) + .then(expr) + .map_with_span(|(((name, s0), s1), value), span| TableLitElem::Named { + name, + s0, + s1, + value: Box::new(value), + span, + }); + + named.or(positional) +} + +pub fn table_lit( + expr: impl Parser + Clone, +) -> impl Parser { + let elem = space() + .then(table_lit_elem(expr)) + .then(space()) + .map(|((s0, elem), s1)| (s0, elem, s1)); + + let trailing_comma = just(",").ignore_then(space()).or_not(); + + let elems = elem.separated_by(just(",")).then(trailing_comma); + + just("'{") + .ignore_then(elems) + .then_ignore(just("}")) + .map_with_span(|(elems, trailing_comma), span| TableLit { + elems, + trailing_comma, + span, + }) +} + +pub fn lit( + expr: impl Parser + Clone, +) -> impl Parser { + let nil = text::keyword("nil").map_with_span(|_, span| Lit::Nil(span)); + let r#true = text::keyword("true").map_with_span(|_, span| Lit::Bool(true, span)); + let r#false = text::keyword("false").map_with_span(|_, span| Lit::Bool(false, span)); + let num = num_lit().map(Lit::Num); + let string = string_lit().map(Lit::String); + let table = table_lit(expr).map(Lit::Table); + + nil.or(r#true).or(r#false).or(num).or(string).or(table) +} diff --git a/src/parser/suffix.rs b/src/parser/suffix.rs new file mode 100644 index 0000000..584bbfa --- /dev/null +++ b/src/parser/suffix.rs @@ -0,0 +1,162 @@ +//! Corresponds to `ast::call` and `ast::field`. + +use chumsky::prelude::*; + +use crate::ast::{Call, Expr, Field, Ident, Space, TableConstr}; +use crate::span::{HasSpan, Span}; + +use super::basic::{space, Error}; + +enum Suffix { + CallArg { + s0: Space, + s1: Space, + arg: Box, + s2: Space, + }, + + CallNoArg { + s0: Space, + s1: Space, + }, + + CallConstr { + s0: Space, + constr: TableConstr, + }, + + FieldAccess { + s0: Space, + s1: Space, + index: Box, + s2: Space, + }, + + FieldAssign { + s0: Space, + s1: Space, + index: Box, + s2: Space, + s3: Space, + s4: Space, + value: Box, + }, + + FieldAccessIdent { + s0: Space, + s1: Space, + ident: Ident, + }, + + FieldAssignIdent { + s0: Space, + s1: Space, + ident: Ident, + s2: Space, + s3: Space, + value: Box, + }, +} + +impl Suffix { + fn into_expr(self, span: Span, expr: Expr) -> Expr { + let expr = Box::new(expr); + match self { + Suffix::CallArg { s0, s1, arg, s2 } => Expr::Call(Call::Arg { + expr, + s0, + s1, + arg, + s2, + span, + }), + Suffix::CallNoArg { s0, s1 } => Expr::Call(Call::NoArg { expr, s0, s1, span }), + Suffix::CallConstr { s0, constr } => Expr::Call(Call::Constr { + expr, + s0, + constr, + span, + }), + Suffix::FieldAccess { s0, s1, index, s2 } => Expr::Field(Field::Access { + expr, + s0, + s1, + index, + s2, + span, + }), + Suffix::FieldAssign { + s0, + s1, + index, + s2, + s3, + s4, + value, + } => Expr::Field(Field::Assign { + expr, + s0, + s1, + index, + s2, + s3, + s4, + value, + span, + }), + Suffix::FieldAccessIdent { s0, s1, ident } => Expr::Field(Field::AccessIdent { + expr, + s0, + s1, + ident, + span, + }), + Suffix::FieldAssignIdent { + s0, + s1, + ident, + s2, + s3, + value, + } => Expr::Field(Field::AssignIdent { + expr, + s0, + s1, + ident, + s2, + s3, + value, + span, + }), + } + } +} + +fn suffix_field_access( + expr: impl Parser + Clone, +) -> impl Parser { + space() + .then_ignore(just("[")) + .then(space()) + .then(expr) + .then(space()) + .then_ignore(just("]")) + .map(|(((s0, s1), index), s2)| Suffix::FieldAccess { + s0, + s1, + index: Box::new(index), + s2, + }) +} + +pub fn suffixed( + atom: impl Parser, + expr: impl Parser + Clone, +) -> impl Parser { + let field_access = suffix_field_access(expr.clone()); + + let suffix = field_access.map_with_span(|suffix, span| (suffix, span)); + + atom.then(suffix.repeated()) + .foldl(|expr, (suffix, span)| suffix.into_expr(expr.span().join(span), expr)) +} diff --git a/src/parser/table_constr.rs b/src/parser/table_constr.rs new file mode 100644 index 0000000..1b70048 --- /dev/null +++ b/src/parser/table_constr.rs @@ -0,0 +1,59 @@ +//! Corresponds to `ast::table_constr`. + +use chumsky::prelude::*; + +use crate::ast::{Expr, TableConstr, TableConstrElem}; + +use super::basic::{space, Error}; +use super::lit::table_lit_elem; + +pub fn table_constr_elem( + expr: impl Parser + Clone, +) -> impl Parser { + let lit = table_lit_elem(expr.clone()).map(TableConstrElem::Lit); + + let indexed = just("[") + .ignore_then(space()) + .then(expr.clone()) + .then(space()) + .then_ignore(just("]")) + .then(space()) + .then_ignore(just(":")) + .then(space()) + .then(expr) + .map_with_span( + |(((((s0, index), s1), s2), s3), value), span| TableConstrElem::Indexed { + s0, + index: Box::new(index), + s1, + s2, + s3, + value: Box::new(value), + span, + }, + ); + + indexed.or(lit) +} + +pub fn table_constr( + expr: impl Parser + Clone, +) -> impl Parser { + let elem = space() + .then(table_constr_elem(expr)) + .then(space()) + .map(|((s0, elem), s1)| (s0, elem, s1)); + + let trailing_comma = just(",").ignore_then(space()).or_not(); + + let elems = elem.separated_by(just(",")).then(trailing_comma); + + just("{") + .ignore_then(elems) + .then_ignore(just("}")) + .map_with_span(|(elems, trailing_comma), span| TableConstr { + elems, + trailing_comma, + span, + }) +} diff --git a/src/parser/var.rs b/src/parser/var.rs new file mode 100644 index 0000000..a46e624 --- /dev/null +++ b/src/parser/var.rs @@ -0,0 +1,86 @@ +//! Corresponds to `ast::var`. + +use chumsky::prelude::*; + +use crate::ast::{Expr, Var}; + +use super::basic::{ident, space, Error}; + +fn var_access( + expr: impl Parser + Clone, +) -> impl Parser { + just("[") + .ignore_then(space()) + .then(expr) + .then(space()) + .then_ignore(just("]")) + .map_with_span(|((s0, index), s1), span| Var::Access { + s0, + index: Box::new(index), + s1, + span, + }) +} + +fn var_assign( + expr: impl Parser + Clone, +) -> impl Parser { + let local = text::keyword("local").ignore_then(space()).or_not(); + + local + .then_ignore(just("[")) + .then(space()) + .then(expr.clone()) + .then(space()) + .then_ignore(just("]")) + .then(space()) + .then_ignore(just("=")) + .then(space()) + .then(expr) + .map_with_span( + |((((((local, s0), index), s1), s2), s3), value), span| Var::Assign { + local, + s0, + index: Box::new(index), + s1, + s2, + s3, + value: Box::new(value), + span, + }, + ) +} + +fn var_assign_ident( + expr: impl Parser + Clone, +) -> impl Parser { + let local = text::keyword("local").ignore_then(space()).or_not(); + + local + .then(ident()) + .then(space()) + .then_ignore(just("=")) + .then(space()) + .then(expr) + .map_with_span( + |((((local, name), s0), s1), value), span| Var::AssignIdent { + local, + name, + s0, + s1, + value: Box::new(value), + span, + }, + ) +} + +pub fn var( + expr: impl Parser + Clone, +) -> impl Parser { + let access = var_access(expr.clone()); + let assign = var_assign(expr.clone()); + let access_ident = ident().map(Var::AccessIdent); + let assign_ident = var_assign_ident(expr); + + assign.or(access).or(assign_ident).or(access_ident) +}