From 6533c9dcf7bc00587d5f22927c99cb69575818f7 Mon Sep 17 00:00:00 2001 From: Joscha Date: Sun, 20 Nov 2022 20:25:39 +0100 Subject: [PATCH] Handle things separated by things differently I noticed that programs like '{} would parse correctly while '{ } would expect an inner element. This was because the leading space was actually part of the element parser, which is a violation of the (as of yet unspoken) rule that parsers should not parse surrounding whitespace. Because whitespace whas treated differently from everywhere else and because this implementation was wrong, I decided to reimplement it, abstracting the concept of things separated by other things with optional trailing things. I did this in such a way that surrounding whitespace is not touched. --- src/ast/basic.rs | 20 ++++++++++++++++++++ src/ast/lit.rs | 10 ++++++---- src/ast/program.rs | 10 +++++----- src/ast/table_constr.rs | 10 ++++++---- src/ast/table_destr.rs | 12 +++++++----- src/parser/basic.rs | 26 +++++++++++++++++++++++++- src/parser/lit.rs | 26 +++++++++++--------------- src/parser/program.rs | 21 ++++++++++----------- src/parser/table_constr.rs | 29 +++++++++++++---------------- src/parser/table_destr.rs | 29 +++++++++++++---------------- 10 files changed, 116 insertions(+), 77 deletions(-) diff --git a/src/ast/basic.rs b/src/ast/basic.rs index e472e39..1194111 100644 --- a/src/ast/basic.rs +++ b/src/ast/basic.rs @@ -47,3 +47,23 @@ impl HasSpan for Ident { self.span } } + +#[derive(Debug, Clone)] +pub enum Separated { + Empty(Span), + NonEmpty { + first_elem: E, + last_elems: Vec<(S1, E)>, + trailing: Option, + span: Span, + }, +} + +impl HasSpan for Separated { + fn span(&self) -> Span { + match self { + Separated::Empty(span) => *span, + Separated::NonEmpty { span, .. } => *span, + } + } +} diff --git a/src/ast/lit.rs b/src/ast/lit.rs index 299bc1c..c079955 100644 --- a/src/ast/lit.rs +++ b/src/ast/lit.rs @@ -3,7 +3,7 @@ use std::fmt; use crate::builtin::Builtin; use crate::span::{HasSpan, Span}; -use super::{Expr, Ident, Space}; +use super::{Expr, Ident, Separated, Space}; #[derive(Clone)] pub enum NumLitStr { @@ -126,11 +126,13 @@ impl HasSpan for TableLitElem { } /// `'{ a, foo: b }` +/// +/// Structure: `'{ s0 elems s1 }` #[derive(Debug, Clone)] pub struct TableLit { - pub elems: Vec<(Space, TableLitElem, Space)>, - /// `Some` if there is a trailing comma, `None` otherwise. - pub trailing_comma: Option, + pub s0: Space, + pub elems: Separated, + pub s1: Space, pub span: Span, } diff --git a/src/ast/program.rs b/src/ast/program.rs index 5a198d0..1a219f5 100644 --- a/src/ast/program.rs +++ b/src/ast/program.rs @@ -1,6 +1,6 @@ use crate::span::{HasSpan, Span}; -use super::{Expr, Space, TableLitElem}; +use super::{Expr, Separated, Space, TableLitElem}; #[derive(Debug, Clone)] pub enum Program { @@ -12,12 +12,12 @@ pub enum Program { span: Span, }, - /// Structure: `s0 module elems trailing_comma` + /// Structure: `s0 module s1 elems s2` Module { s0: Space, - elems: Vec<(Space, TableLitElem, Space)>, - /// `Some` if there is a trailing comma, `None` otherwise. - trailing_comma: Option, + s1: Space, + elems: Separated, + s2: Space, span: Span, }, } diff --git a/src/ast/table_constr.rs b/src/ast/table_constr.rs index 0b699cc..bf6b517 100644 --- a/src/ast/table_constr.rs +++ b/src/ast/table_constr.rs @@ -1,6 +1,6 @@ use crate::span::{HasSpan, Span}; -use super::{Expr, Space, TableLitElem}; +use super::{Expr, Separated, Space, TableLitElem}; #[derive(Debug, Clone)] pub enum TableConstrElem { @@ -31,11 +31,13 @@ impl HasSpan for TableConstrElem { } /// `{ a, b, foo: c, [d]: e }` +/// +/// Structure: `{ s0 elems s1 }` #[derive(Debug, Clone)] pub struct TableConstr { - pub elems: Vec<(Space, TableConstrElem, Space)>, - /// `Some` if there is a trailing comma, `None` otherwise. - pub trailing_comma: Option, + pub s0: Space, + pub elems: Separated, + pub s1: Space, pub span: Span, } diff --git a/src/ast/table_destr.rs b/src/ast/table_destr.rs index 56c8d36..3d90fb3 100644 --- a/src/ast/table_destr.rs +++ b/src/ast/table_destr.rs @@ -1,6 +1,6 @@ use crate::span::{HasSpan, Span}; -use super::{Expr, Ident, Space}; +use super::{Expr, Ident, Separated, Space}; // TODO Make table patterns recursive @@ -30,12 +30,14 @@ impl HasSpan for TablePatternElem { } } -/// `'{ foo, bar: baz }` +/// `{ foo, bar: baz }` +/// +/// Structure: `{ s0 elems s1 }` #[derive(Debug, Clone)] pub struct TablePattern { - pub elems: Vec<(Space, TablePatternElem, Space)>, - /// `Some` if there is a trailing comma, `None` otherwise. - pub trailing_comma: Option, + pub s0: Space, + pub elems: Separated, + pub s1: Space, pub span: Span, } diff --git a/src/parser/basic.rs b/src/parser/basic.rs index 8d7ba1d..2a65c68 100644 --- a/src/parser/basic.rs +++ b/src/parser/basic.rs @@ -3,7 +3,7 @@ use chumsky::prelude::*; use chumsky::text::Character; -use crate::ast::{Ident, Line, Space}; +use crate::ast::{Ident, Line, Separated, Space}; use crate::span::Span; pub type Error = Simple; @@ -58,3 +58,27 @@ pub fn ident() -> EParser { pub fn local(space: EParser) -> EParser> { text::keyword("local").ignore_then(space).or_not().boxed() } + +// This function is more of a utility function. Because of this and to keep the +// code nicer, I have decided that the rules specified in the `parser` module +// don't apply to it. +pub fn separated_by( + elem: impl Parser + Clone + 'static, + separator: impl Parser + 'static, + trailing_separator: impl Parser + 'static, +) -> EParser> { + elem.clone() + .then(separator.then(elem).repeated()) + .then(trailing_separator.or_not()) + .or_not() + .map_with_span(|s, span| match s { + Some(((first_elem, last_elems), trailing)) => Separated::NonEmpty { + first_elem, + last_elems, + trailing, + span, + }, + None => Separated::Empty(span), + }) + .boxed() +} diff --git a/src/parser/lit.rs b/src/parser/lit.rs index 7d04e1f..0bdf12a 100644 --- a/src/parser/lit.rs +++ b/src/parser/lit.rs @@ -7,7 +7,7 @@ use crate::ast::{ }; use crate::builtin::Builtin; -use super::basic::{EParser, Error}; +use super::basic::{separated_by, EParser, Error}; fn builtin_lit() -> impl Parser { just('\'').ignore_then(choice(( @@ -154,22 +154,18 @@ fn table_lit( space: EParser, table_lit_elem: EParser, ) -> impl Parser { - let elem = space + let separator = space.clone().then_ignore(just(',')).then(space.clone()); + let trailing_separator = space.clone().then_ignore(just(',')); + + space .clone() - .then(table_lit_elem) - .then(space.clone()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(',').ignore_then(space).or_not(); - - let elems = elem.separated_by(just(',')).then(trailing_comma); - - just("'{") - .ignore_then(elems) - .then_ignore(just('}')) - .map_with_span(|(elems, trailing_comma), span| TableLit { + .then(separated_by(table_lit_elem, separator, trailing_separator)) + .then(space) + .delimited_by(just("'{"), just('}')) + .map_with_span(|((s0, elems), s1), span| TableLit { + s0, elems, - trailing_comma, + s1, span, }) } diff --git a/src/parser/program.rs b/src/parser/program.rs index 26e12fd..0b46f94 100644 --- a/src/parser/program.rs +++ b/src/parser/program.rs @@ -4,7 +4,7 @@ use chumsky::prelude::*; use crate::ast::{Expr, Program, Space, TableLitElem}; -use super::basic::EParser; +use super::basic::{separated_by, EParser}; pub fn program( space: EParser, @@ -17,20 +17,19 @@ pub fn program( .then(space.clone()) .map_with_span(|((s0, expr), s1), span| Program::Expr { s0, expr, s1, span }); - let elem = space - .clone() - .then(table_lit_elem) - .then(space.clone()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - let trailing_comma = just(',').ignore_then(space.clone()).or_not(); + let separator = space.clone().then_ignore(just(',')).then(space.clone()); + let trailing_separator = space.clone().then_ignore(just(',')); let module = space + .clone() .then_ignore(text::keyword("module")) - .then(elem.separated_by(just(','))) - .then(trailing_comma) - .map_with_span(|((s0, elems), trailing_comma), span| Program::Module { + .then(space.clone()) + .then(separated_by(table_lit_elem, separator, trailing_separator)) + .then(space.clone()) + .map_with_span(|(((s0, s1), elems), s2), span| Program::Module { s0, + s1, elems, - trailing_comma, + s2, span, }); diff --git a/src/parser/table_constr.rs b/src/parser/table_constr.rs index ac0bb04..fd6aefd 100644 --- a/src/parser/table_constr.rs +++ b/src/parser/table_constr.rs @@ -4,13 +4,13 @@ use chumsky::prelude::*; use crate::ast::{Expr, Space, TableConstr, TableConstrElem, TableLitElem}; -use super::basic::{EParser, Error}; +use super::basic::{separated_by, EParser, Error}; fn table_constr_elem( space: EParser, table_lit_elem: EParser, expr: EParser, -) -> impl Parser { +) -> impl Parser + Clone { let lit = table_lit_elem.map(TableConstrElem::Lit); let indexed = just('[') @@ -42,22 +42,19 @@ pub fn table_constr( table_lit_elem: EParser, expr: EParser, ) -> EParser { - let elem = space + let elem = table_constr_elem(space.clone(), table_lit_elem, expr); + let separator = space.clone().then_ignore(just(',')).then(space.clone()); + let trailing_separator = space.clone().then_ignore(just(',')); + + space .clone() - .then(table_constr_elem(space.clone(), table_lit_elem, expr)) - .then(space.clone()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(',').ignore_then(space).or_not(); - - let elems = elem.separated_by(just(',')).then(trailing_comma); - - just('{') - .ignore_then(elems) - .then_ignore(just('}')) - .map_with_span(|(elems, trailing_comma), span| TableConstr { + .then(separated_by(elem, separator, trailing_separator)) + .then(space) + .delimited_by(just('{'), just('}')) + .map_with_span(|((s0, elems), s1), span| TableConstr { + s0, elems, - trailing_comma, + s1, span, }) .boxed() diff --git a/src/parser/table_destr.rs b/src/parser/table_destr.rs index dfde71e..4fb5eda 100644 --- a/src/parser/table_destr.rs +++ b/src/parser/table_destr.rs @@ -4,12 +4,12 @@ use chumsky::prelude::*; use crate::ast::{Expr, Ident, Space, TableDestr, TablePattern, TablePatternElem}; -use super::basic::{EParser, Error}; +use super::basic::{separated_by, EParser, Error}; fn table_pattern_elem( space: EParser, ident: EParser, -) -> impl Parser { +) -> impl Parser + Clone { let positional = ident.clone().map(TablePatternElem::Positional); let named = ident @@ -30,22 +30,19 @@ fn table_pattern_elem( } pub fn table_pattern(space: EParser, ident: EParser) -> EParser { - let elem = space + let elem = table_pattern_elem(space.clone(), ident); + let separator = space.clone().then_ignore(just(',')).then(space.clone()); + let trailing_separator = space.clone().then_ignore(just(',')); + + space .clone() - .then(table_pattern_elem(space.clone(), ident)) - .then(space.clone()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(',').ignore_then(space).or_not(); - - let elems = elem.separated_by(just(',')).then(trailing_comma); - - just('{') - .ignore_then(elems) - .then_ignore(just('}')) - .map_with_span(|(elems, trailing_comma), span| TablePattern { + .then(separated_by(elem, separator, trailing_separator)) + .then(space) + .delimited_by(just('{'), just('}')) + .map_with_span(|((s0, elems), s1), span| TablePattern { + s0, elems, - trailing_comma, + s1, span, }) .boxed()