From a559966c1d908e0f939be1e369143f22b507796a Mon Sep 17 00:00:00 2001 From: Joscha Date: Fri, 18 Nov 2022 20:20:37 +0100 Subject: [PATCH] Split up parser The structure mostly follows the ast structure, with some slight changes. Each parser submodule documents which ast submodule it corresponds to. This parser is not yet complete, and I have yet to go through its modules one-by-one to fix and complete them. --- src/parser.rs | 429 +------------------------------------ src/parser/basic.rs | 23 ++ src/parser/expr.rs | 44 ++++ src/parser/lit.rs | 127 +++++++++++ src/parser/suffix.rs | 162 ++++++++++++++ src/parser/table_constr.rs | 59 +++++ src/parser/var.rs | 86 ++++++++ 7 files changed, 511 insertions(+), 419 deletions(-) create mode 100644 src/parser/basic.rs create mode 100644 src/parser/expr.rs create mode 100644 src/parser/lit.rs create mode 100644 src/parser/suffix.rs create mode 100644 src/parser/table_constr.rs create mode 100644 src/parser/var.rs diff --git a/src/parser.rs b/src/parser.rs index 770f24f..6f00075 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,425 +1,16 @@ +mod basic; +mod expr; +mod lit; +mod suffix; +mod table_constr; +mod var; + use chumsky::prelude::*; -use chumsky::text::Character; -use crate::ast::{ - Expr, Ident, Lit, NumLit, NumLitStr, Space, StringLit, TableConstr, TableConstrElem, TableLit, - TableLitElem, -}; -use crate::span::{HasSpan, Span}; +use crate::ast::Expr; -type Error = Simple; - -// This would probably look a lot nicer with type_alias_impl_trait: -// https://github.com/rust-lang/rust/issues/63063 - -fn space() -> impl Parser { - // TODO Parse comments - text::whitespace().map_with_span(|(), span| Space { - comment: vec![], - span, - }) -} - -fn ident() -> impl Parser { - // TODO Forbid keywords - text::ident().map_with_span(|name, span| Ident { name, span }) -} - -fn num_lit_str_radix(radix: u32) -> impl Parser + Clone { - // Minimum amount of digits required to represent i64::MAX. The rest of this - // code assumes that any value that can be represented using this amount of - // digits fits into an u64. - let max_digits = match radix { - 2 => 63, - 10 => 19, - 16 => 16, - _ => panic!("unsupported radix"), - }; - - // Representations of i64::MAX. - let max_value = match radix { - 2 => "0b_1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111", - 10 => "9_223_372_036_854_775_807", - 16 => "0x_7fff_ffff_ffff_ffff", - _ => panic!("unsupported radix"), - }; - - let constructor = match radix { - 2 => NumLitStr::Bin, - 10 => NumLitStr::Dec, - 16 => NumLitStr::Hex, - _ => panic!("unsupported radix"), - }; - - filter(move |c: &char| c.is_digit(radix) || *c == '_') - .repeated() - .at_least(1) - .collect::() - .try_map(move |s, span| { - let digits = s.chars().filter(|c| *c != '_').collect::(); - if digits.is_empty() { - let msg = "integer literal needs to contain at least one digit"; - return Err(Simple::custom(span, msg)); - } else if digits.len() > max_digits { - let msg = format!("integer literal too large, the maximum value is {max_value}"); - return Err(Simple::custom(span, msg)); - } - - let value = u64::from_str_radix(&digits, radix).unwrap(); - if value <= i64::MAX as u64 { - Ok((value as i64, constructor(s))) - } else { - let msg = format!("integer literal too large, the maximum value is {max_value}"); - Err(Simple::custom(span, msg)) - } - }) -} - -fn num_lit() -> impl Parser + Clone { - (just("0b").ignore_then(num_lit_str_radix(2))) - .or(just("0x").ignore_then(num_lit_str_radix(16))) - .or(num_lit_str_radix(10)) - .map_with_span(|(value, str), span| NumLit { value, str, span }) -} - -fn string_lit() -> impl Parser { - // TODO Parse string literals - filter(|_| false).map(|_| unreachable!()) -} - -fn table_lit_elem( - expr: impl Parser + Clone, -) -> impl Parser { - let positional = expr - .clone() - .map(|value| TableLitElem::Positional(Box::new(value))); - - let named = ident() - .then(space()) - .then_ignore(just(':')) - .then(space()) - .then(expr) - .map(|(((name, s0), s1), value)| TableLitElem::Named { - name, - s0, - s1, - value: Box::new(value), - }); - - named.or(positional) -} - -fn table_lit( - expr: impl Parser + Clone, -) -> impl Parser { - let elem = space() - .then(table_lit_elem(expr)) - .then(space()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(",").ignore_then(space()).or_not(); - - let elems = elem.separated_by(just(",")).then(trailing_comma); - - just("'{") - .ignore_then(elems) - .then_ignore(just("}")) - .map_with_span(|(elems, trailing_comma), span| TableLit { - elems, - trailing_comma, - span, - }) -} - -fn lit( - expr: impl Parser + Clone, -) -> impl Parser { - let nil = text::keyword("nil").map_with_span(|_, span| Lit::Nil(span)); - let r#true = text::keyword("true").map_with_span(|_, span| Lit::Bool(true, span)); - let r#false = text::keyword("false").map_with_span(|_, span| Lit::Bool(false, span)); - let num = num_lit().map(Lit::Num); - let string = string_lit().map(Lit::String); - let table = table_lit(expr).map(Lit::Table); - - nil.or(r#true).or(r#false).or(num).or(string).or(table) -} - -fn table_constr_elem( - expr: impl Parser + Clone, -) -> impl Parser { - let lit = table_lit_elem(expr.clone()).map(TableConstrElem::Lit); - - let indexed = just("[") - .ignore_then(space()) - .then(expr.clone()) - .then(space()) - .then_ignore(just("]")) - .then(space()) - .then_ignore(just(":")) - .then(space()) - .then(expr) - .map_with_span( - |(((((s0, index), s1), s2), s3), value), span| TableConstrElem::Indexed { - s0, - index: Box::new(index), - s1, - s2, - s3, - value: Box::new(value), - span, - }, - ); - - indexed.or(lit) -} - -fn table_constr( - expr: impl Parser + Clone, -) -> impl Parser { - let elem = space() - .then(table_constr_elem(expr)) - .then(space()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(",").ignore_then(space()).or_not(); - - let elems = elem.separated_by(just(",")).then(trailing_comma); - - just("{") - .ignore_then(elems) - .then_ignore(just("}")) - .map_with_span(|(elems, trailing_comma), span| TableConstr { - elems, - trailing_comma, - span, - }) -} - -fn atom_paren( - expr: impl Parser + Clone, -) -> impl Parser { - just("(") - .ignore_then(space()) - .then(expr) - .then(space()) - .then_ignore(just(")")) - .map_with_span(|((s0, inner), s1), span| Expr::Paren { - s0, - inner: Box::new(inner), - s1, - span, - }) -} - -fn atom_var( - expr: impl Parser + Clone, -) -> impl Parser { - just("[") - .ignore_then(space()) - .then(expr) - .then(space()) - .then_ignore(just("]")) - .map_with_span(|((s0, index), s1), span| Expr::Var { - s0, - index: Box::new(index), - s1, - span, - }) -} - -fn atom_var_assign( - expr: impl Parser + Clone, -) -> impl Parser { - just("[") - .ignore_then(space()) - .then(expr.clone()) - .then(space()) - .then_ignore(just("]")) - .then(space()) - .then_ignore(just("=")) - .then(space()) - .then(expr) - .map_with_span( - |(((((s0, index), s1), s2), s3), value), span| Expr::VarAssign { - s0, - index: Box::new(index), - s1, - s2, - s3, - value: Box::new(value), - span, - }, - ) -} - -fn atom_var_ident_assign( - expr: impl Parser + Clone, -) -> impl Parser { - ident() - .then(space()) - .then_ignore(just("=")) - .then(space()) - .then(expr) - .map(|(((name, s0), s1), value)| Expr::VarIdentAssign { - name, - s0, - s1, - value: Box::new(value), - }) -} - -fn atom( - expr: impl Parser + Clone, -) -> impl Parser { - let lit = lit(expr.clone()).map(Expr::Lit); - let paren = atom_paren(expr.clone()); - let table_constr = table_constr(expr.clone()).map(Expr::TableConstr); - let var = atom_var(expr.clone()); - let var_ident = ident().map(Expr::VarIdent); - let var_assign = atom_var_assign(expr.clone()); - let var_ident_assign = atom_var_ident_assign(expr); - - lit.or(paren) - .or(table_constr) - .or(var_assign) - .or(var) - .or(var_ident_assign) - .or(var_ident) -} - -enum Suffix { - /// See [`Expr::Field`]. - Field { - s0: Space, - s1: Space, - index: Box, - s2: Space, - span: Span, - }, - - /// See [`Expr::FieldIdent`]. - FieldIdent { s0: Space, s1: Space, ident: Ident }, - - /// See [`Expr::FieldAssign`]. - FieldAssign { - s0: Space, - s1: Space, - index: Box, - s2: Space, - s3: Space, - s4: Space, - value: Box, - }, - - /// See [`Expr::FieldIdentAssign`]. - FieldIdentAssign { - s0: Space, - s1: Space, - ident: Ident, - s2: Space, - s3: Space, - value: Box, - }, -} - -impl Suffix { - fn into_expr(self, expr: Expr) -> Expr { - let expr = Box::new(expr); - match self { - Suffix::Field { - s0, - s1, - index, - s2, - span, - } => Expr::Field { - span: expr.span().join(span), - expr, - s0, - s1, - index, - s2, - }, - Suffix::FieldIdent { s0, s1, ident } => Expr::FieldIdent { - expr, - s0, - s1, - ident, - }, - Suffix::FieldAssign { - s0, - s1, - index, - s2, - s3, - s4, - value, - } => Expr::FieldAssign { - expr, - s0, - s1, - index, - s2, - s3, - s4, - value, - }, - Suffix::FieldIdentAssign { - s0, - s1, - ident, - s2, - s3, - value, - } => Expr::FieldIdentAssign { - expr, - s0, - s1, - ident, - s2, - s3, - value, - }, - } - } -} - -fn suffix_field( - expr: impl Parser + Clone, -) -> impl Parser { - space() - .then_ignore(just("[")) - .then(space()) - .then(expr) - .then(space()) - .then_ignore(just("]")) - .map_with_span(|(((s0, s1), index), s2), span| Suffix::Field { - s0, - s1, - index: Box::new(index), - s2, - span, - }) -} - -fn suffixed( - expr: impl Parser + Clone, -) -> impl Parser { - let suffix_field = suffix_field(expr.clone()); - - let suffix = suffix_field; - - atom(expr) - .then(suffix.repeated()) - .foldl(|expr, suffix| suffix.into_expr(expr)) -} - -fn expr( - expr: impl Parser + Clone, -) -> impl Parser { - suffixed(expr) -} +use self::basic::Error; pub fn parser() -> impl Parser { - recursive(expr).padded().then_ignore(end()) + recursive(expr::expr).padded().then_ignore(end()) } diff --git a/src/parser/basic.rs b/src/parser/basic.rs new file mode 100644 index 0000000..971d558 --- /dev/null +++ b/src/parser/basic.rs @@ -0,0 +1,23 @@ +//! Corresponds to `ast::basic`. + +use chumsky::prelude::*; + +use crate::ast::{Ident, Space}; +use crate::span::Span; + +pub type Error = Simple; + +// TODO https://github.com/rust-lang/rust/issues/63063 + +pub fn space() -> impl Parser { + // TODO Parse comments + text::whitespace().map_with_span(|(), span| Space { + comment: vec![], + span, + }) +} + +pub fn ident() -> impl Parser { + // TODO Forbid keywords + text::ident().map_with_span(|name, span| Ident { name, span }) +} diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 0000000..0c0af2e --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,44 @@ +//! Corresponds to `ast::expr`. + +use chumsky::prelude::*; + +use crate::ast::Expr; + +use super::basic::{space, Error}; +use super::lit::lit; +use super::suffix::suffixed; +use super::table_constr::table_constr; +use super::var::var; + +fn atom_paren( + expr: impl Parser + Clone, +) -> impl Parser { + just("(") + .ignore_then(space()) + .then(expr) + .then(space()) + .then_ignore(just(")")) + .map_with_span(|((s0, inner), s1), span| Expr::Paren { + s0, + inner: Box::new(inner), + s1, + span, + }) +} + +fn atom( + expr: impl Parser + Clone, +) -> impl Parser { + let lit = lit(expr.clone()).map(Expr::Lit); + let paren = atom_paren(expr.clone()); + let table_constr = table_constr(expr.clone()).map(Expr::TableConstr); + let var = var(expr).map(Expr::Var); + + lit.or(paren).or(table_constr).or(var) +} + +pub fn expr( + expr: impl Parser + Clone, +) -> impl Parser { + suffixed(atom(expr.clone()), expr) +} diff --git a/src/parser/lit.rs b/src/parser/lit.rs new file mode 100644 index 0000000..29b7ab5 --- /dev/null +++ b/src/parser/lit.rs @@ -0,0 +1,127 @@ +//! Corresponds to `ast::lit`. + +use chumsky::prelude::*; + +use crate::ast::{Expr, Lit, NumLit, NumLitStr, StringLit, TableLit, TableLitElem}; + +use super::basic::{ident, space, Error}; + +fn num_lit_str_radix(radix: u32) -> impl Parser + Clone { + // Minimum amount of digits required to represent i64::MAX. The rest of this + // code assumes that any value that can be represented using this amount of + // digits fits into an u64. + let max_digits = match radix { + 2 => 63, + 10 => 19, + 16 => 16, + _ => panic!("unsupported radix"), + }; + + // Representations of i64::MAX. + let max_value = match radix { + 2 => "0b_1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111", + 10 => "9_223_372_036_854_775_807", + 16 => "0x_7fff_ffff_ffff_ffff", + _ => panic!("unsupported radix"), + }; + + let constructor = match radix { + 2 => NumLitStr::Bin, + 10 => NumLitStr::Dec, + 16 => NumLitStr::Hex, + _ => panic!("unsupported radix"), + }; + + filter(move |c: &char| c.is_digit(radix) || *c == '_') + .repeated() + .at_least(1) + .collect::() + .try_map(move |s, span| { + let digits = s.chars().filter(|c| *c != '_').collect::(); + if digits.is_empty() { + let msg = "integer literal needs to contain at least one digit"; + return Err(Simple::custom(span, msg)); + } else if digits.len() > max_digits { + let msg = format!("integer literal too large, the maximum value is {max_value}"); + return Err(Simple::custom(span, msg)); + } + + let value = u64::from_str_radix(&digits, radix).unwrap(); + if value <= i64::MAX as u64 { + Ok((value as i64, constructor(s))) + } else { + let msg = format!("integer literal too large, the maximum value is {max_value}"); + Err(Simple::custom(span, msg)) + } + }) +} + +pub fn num_lit() -> impl Parser + Clone { + (just("0b").ignore_then(num_lit_str_radix(2))) + .or(just("0x").ignore_then(num_lit_str_radix(16))) + .or(num_lit_str_radix(10)) + .map_with_span(|(value, str), span| NumLit { value, str, span }) +} + +pub fn string_lit() -> impl Parser { + // TODO Parse string literals + filter(|_| false).map(|_| unreachable!()) +} + +pub fn table_lit_elem( + expr: impl Parser + Clone, +) -> impl Parser { + let positional = expr + .clone() + .map(|value| TableLitElem::Positional(Box::new(value))); + + let named = ident() + .then(space()) + .then_ignore(just(':')) + .then(space()) + .then(expr) + .map_with_span(|(((name, s0), s1), value), span| TableLitElem::Named { + name, + s0, + s1, + value: Box::new(value), + span, + }); + + named.or(positional) +} + +pub fn table_lit( + expr: impl Parser + Clone, +) -> impl Parser { + let elem = space() + .then(table_lit_elem(expr)) + .then(space()) + .map(|((s0, elem), s1)| (s0, elem, s1)); + + let trailing_comma = just(",").ignore_then(space()).or_not(); + + let elems = elem.separated_by(just(",")).then(trailing_comma); + + just("'{") + .ignore_then(elems) + .then_ignore(just("}")) + .map_with_span(|(elems, trailing_comma), span| TableLit { + elems, + trailing_comma, + span, + }) +} + +pub fn lit( + expr: impl Parser + Clone, +) -> impl Parser { + let nil = text::keyword("nil").map_with_span(|_, span| Lit::Nil(span)); + let r#true = text::keyword("true").map_with_span(|_, span| Lit::Bool(true, span)); + let r#false = text::keyword("false").map_with_span(|_, span| Lit::Bool(false, span)); + let num = num_lit().map(Lit::Num); + let string = string_lit().map(Lit::String); + let table = table_lit(expr).map(Lit::Table); + + nil.or(r#true).or(r#false).or(num).or(string).or(table) +} diff --git a/src/parser/suffix.rs b/src/parser/suffix.rs new file mode 100644 index 0000000..584bbfa --- /dev/null +++ b/src/parser/suffix.rs @@ -0,0 +1,162 @@ +//! Corresponds to `ast::call` and `ast::field`. + +use chumsky::prelude::*; + +use crate::ast::{Call, Expr, Field, Ident, Space, TableConstr}; +use crate::span::{HasSpan, Span}; + +use super::basic::{space, Error}; + +enum Suffix { + CallArg { + s0: Space, + s1: Space, + arg: Box, + s2: Space, + }, + + CallNoArg { + s0: Space, + s1: Space, + }, + + CallConstr { + s0: Space, + constr: TableConstr, + }, + + FieldAccess { + s0: Space, + s1: Space, + index: Box, + s2: Space, + }, + + FieldAssign { + s0: Space, + s1: Space, + index: Box, + s2: Space, + s3: Space, + s4: Space, + value: Box, + }, + + FieldAccessIdent { + s0: Space, + s1: Space, + ident: Ident, + }, + + FieldAssignIdent { + s0: Space, + s1: Space, + ident: Ident, + s2: Space, + s3: Space, + value: Box, + }, +} + +impl Suffix { + fn into_expr(self, span: Span, expr: Expr) -> Expr { + let expr = Box::new(expr); + match self { + Suffix::CallArg { s0, s1, arg, s2 } => Expr::Call(Call::Arg { + expr, + s0, + s1, + arg, + s2, + span, + }), + Suffix::CallNoArg { s0, s1 } => Expr::Call(Call::NoArg { expr, s0, s1, span }), + Suffix::CallConstr { s0, constr } => Expr::Call(Call::Constr { + expr, + s0, + constr, + span, + }), + Suffix::FieldAccess { s0, s1, index, s2 } => Expr::Field(Field::Access { + expr, + s0, + s1, + index, + s2, + span, + }), + Suffix::FieldAssign { + s0, + s1, + index, + s2, + s3, + s4, + value, + } => Expr::Field(Field::Assign { + expr, + s0, + s1, + index, + s2, + s3, + s4, + value, + span, + }), + Suffix::FieldAccessIdent { s0, s1, ident } => Expr::Field(Field::AccessIdent { + expr, + s0, + s1, + ident, + span, + }), + Suffix::FieldAssignIdent { + s0, + s1, + ident, + s2, + s3, + value, + } => Expr::Field(Field::AssignIdent { + expr, + s0, + s1, + ident, + s2, + s3, + value, + span, + }), + } + } +} + +fn suffix_field_access( + expr: impl Parser + Clone, +) -> impl Parser { + space() + .then_ignore(just("[")) + .then(space()) + .then(expr) + .then(space()) + .then_ignore(just("]")) + .map(|(((s0, s1), index), s2)| Suffix::FieldAccess { + s0, + s1, + index: Box::new(index), + s2, + }) +} + +pub fn suffixed( + atom: impl Parser, + expr: impl Parser + Clone, +) -> impl Parser { + let field_access = suffix_field_access(expr.clone()); + + let suffix = field_access.map_with_span(|suffix, span| (suffix, span)); + + atom.then(suffix.repeated()) + .foldl(|expr, (suffix, span)| suffix.into_expr(expr.span().join(span), expr)) +} diff --git a/src/parser/table_constr.rs b/src/parser/table_constr.rs new file mode 100644 index 0000000..1b70048 --- /dev/null +++ b/src/parser/table_constr.rs @@ -0,0 +1,59 @@ +//! Corresponds to `ast::table_constr`. + +use chumsky::prelude::*; + +use crate::ast::{Expr, TableConstr, TableConstrElem}; + +use super::basic::{space, Error}; +use super::lit::table_lit_elem; + +pub fn table_constr_elem( + expr: impl Parser + Clone, +) -> impl Parser { + let lit = table_lit_elem(expr.clone()).map(TableConstrElem::Lit); + + let indexed = just("[") + .ignore_then(space()) + .then(expr.clone()) + .then(space()) + .then_ignore(just("]")) + .then(space()) + .then_ignore(just(":")) + .then(space()) + .then(expr) + .map_with_span( + |(((((s0, index), s1), s2), s3), value), span| TableConstrElem::Indexed { + s0, + index: Box::new(index), + s1, + s2, + s3, + value: Box::new(value), + span, + }, + ); + + indexed.or(lit) +} + +pub fn table_constr( + expr: impl Parser + Clone, +) -> impl Parser { + let elem = space() + .then(table_constr_elem(expr)) + .then(space()) + .map(|((s0, elem), s1)| (s0, elem, s1)); + + let trailing_comma = just(",").ignore_then(space()).or_not(); + + let elems = elem.separated_by(just(",")).then(trailing_comma); + + just("{") + .ignore_then(elems) + .then_ignore(just("}")) + .map_with_span(|(elems, trailing_comma), span| TableConstr { + elems, + trailing_comma, + span, + }) +} diff --git a/src/parser/var.rs b/src/parser/var.rs new file mode 100644 index 0000000..a46e624 --- /dev/null +++ b/src/parser/var.rs @@ -0,0 +1,86 @@ +//! Corresponds to `ast::var`. + +use chumsky::prelude::*; + +use crate::ast::{Expr, Var}; + +use super::basic::{ident, space, Error}; + +fn var_access( + expr: impl Parser + Clone, +) -> impl Parser { + just("[") + .ignore_then(space()) + .then(expr) + .then(space()) + .then_ignore(just("]")) + .map_with_span(|((s0, index), s1), span| Var::Access { + s0, + index: Box::new(index), + s1, + span, + }) +} + +fn var_assign( + expr: impl Parser + Clone, +) -> impl Parser { + let local = text::keyword("local").ignore_then(space()).or_not(); + + local + .then_ignore(just("[")) + .then(space()) + .then(expr.clone()) + .then(space()) + .then_ignore(just("]")) + .then(space()) + .then_ignore(just("=")) + .then(space()) + .then(expr) + .map_with_span( + |((((((local, s0), index), s1), s2), s3), value), span| Var::Assign { + local, + s0, + index: Box::new(index), + s1, + s2, + s3, + value: Box::new(value), + span, + }, + ) +} + +fn var_assign_ident( + expr: impl Parser + Clone, +) -> impl Parser { + let local = text::keyword("local").ignore_then(space()).or_not(); + + local + .then(ident()) + .then(space()) + .then_ignore(just("=")) + .then(space()) + .then(expr) + .map_with_span( + |((((local, name), s0), s1), value), span| Var::AssignIdent { + local, + name, + s0, + s1, + value: Box::new(value), + span, + }, + ) +} + +pub fn var( + expr: impl Parser + Clone, +) -> impl Parser { + let access = var_access(expr.clone()); + let assign = var_assign(expr.clone()); + let access_ident = ident().map(Var::AccessIdent); + let assign_ident = var_assign_ident(expr); + + assign.or(access).or(assign_ident).or(access_ident) +}