From b009a9c4ec36082478dd1e26f8709df23d34339c Mon Sep 17 00:00:00 2001 From: Joscha Date: Sun, 20 Nov 2022 20:25:39 +0100 Subject: [PATCH 1/4] Handle things separated by things differently I noticed that programs like '{} would parse correctly while '{ } would expect an inner element. This was because the leading space was actually part of the element parser, which is a violation of the (as of yet unspoken) rule that parsers should not parse surrounding whitespace. Because whitespace whas treated differently from everywhere else and because this implementation was wrong, I decided to reimplement it, abstracting the concept of things separated by other things with optional trailing things. I did this in such a way that surrounding whitespace is not touched. --- src/ast/basic.rs | 20 ++++++++++++++++++++ src/ast/lit.rs | 10 ++++++---- src/ast/program.rs | 10 +++++----- src/ast/table_constr.rs | 10 ++++++---- src/ast/table_destr.rs | 12 +++++++----- src/parser/basic.rs | 26 +++++++++++++++++++++++++- src/parser/lit.rs | 26 +++++++++++--------------- src/parser/program.rs | 21 ++++++++++----------- src/parser/table_constr.rs | 29 +++++++++++++---------------- src/parser/table_destr.rs | 29 +++++++++++++---------------- 10 files changed, 116 insertions(+), 77 deletions(-) diff --git a/src/ast/basic.rs b/src/ast/basic.rs index e472e39..1194111 100644 --- a/src/ast/basic.rs +++ b/src/ast/basic.rs @@ -47,3 +47,23 @@ impl HasSpan for Ident { self.span } } + +#[derive(Debug, Clone)] +pub enum Separated { + Empty(Span), + NonEmpty { + first_elem: E, + last_elems: Vec<(S1, E)>, + trailing: Option, + span: Span, + }, +} + +impl HasSpan for Separated { + fn span(&self) -> Span { + match self { + Separated::Empty(span) => *span, + Separated::NonEmpty { span, .. } => *span, + } + } +} diff --git a/src/ast/lit.rs b/src/ast/lit.rs index 299bc1c..c079955 100644 --- a/src/ast/lit.rs +++ b/src/ast/lit.rs @@ -3,7 +3,7 @@ use std::fmt; use crate::builtin::Builtin; use crate::span::{HasSpan, Span}; -use super::{Expr, Ident, Space}; +use super::{Expr, Ident, Separated, Space}; #[derive(Clone)] pub enum NumLitStr { @@ -126,11 +126,13 @@ impl HasSpan for TableLitElem { } /// `'{ a, foo: b }` +/// +/// Structure: `'{ s0 elems s1 }` #[derive(Debug, Clone)] pub struct TableLit { - pub elems: Vec<(Space, TableLitElem, Space)>, - /// `Some` if there is a trailing comma, `None` otherwise. - pub trailing_comma: Option, + pub s0: Space, + pub elems: Separated, + pub s1: Space, pub span: Span, } diff --git a/src/ast/program.rs b/src/ast/program.rs index 5a198d0..1a219f5 100644 --- a/src/ast/program.rs +++ b/src/ast/program.rs @@ -1,6 +1,6 @@ use crate::span::{HasSpan, Span}; -use super::{Expr, Space, TableLitElem}; +use super::{Expr, Separated, Space, TableLitElem}; #[derive(Debug, Clone)] pub enum Program { @@ -12,12 +12,12 @@ pub enum Program { span: Span, }, - /// Structure: `s0 module elems trailing_comma` + /// Structure: `s0 module s1 elems s2` Module { s0: Space, - elems: Vec<(Space, TableLitElem, Space)>, - /// `Some` if there is a trailing comma, `None` otherwise. - trailing_comma: Option, + s1: Space, + elems: Separated, + s2: Space, span: Span, }, } diff --git a/src/ast/table_constr.rs b/src/ast/table_constr.rs index 0b699cc..741a30c 100644 --- a/src/ast/table_constr.rs +++ b/src/ast/table_constr.rs @@ -1,6 +1,6 @@ use crate::span::{HasSpan, Span}; -use super::{Expr, Space, TableLitElem}; +use super::{Expr, Separated, Space, TableLitElem}; #[derive(Debug, Clone)] pub enum TableConstrElem { @@ -31,11 +31,13 @@ impl HasSpan for TableConstrElem { } /// `{ a, b, foo: c, [d]: e }` +/// +/// Structure: `{ s0 elems s1 }` #[derive(Debug, Clone)] pub struct TableConstr { - pub elems: Vec<(Space, TableConstrElem, Space)>, - /// `Some` if there is a trailing comma, `None` otherwise. - pub trailing_comma: Option, + pub s0: Space, + pub elems: Separated, + pub s1: Space, pub span: Span, } diff --git a/src/ast/table_destr.rs b/src/ast/table_destr.rs index 56c8d36..9cbf46a 100644 --- a/src/ast/table_destr.rs +++ b/src/ast/table_destr.rs @@ -1,6 +1,6 @@ use crate::span::{HasSpan, Span}; -use super::{Expr, Ident, Space}; +use super::{Expr, Ident, Separated, Space}; // TODO Make table patterns recursive @@ -30,12 +30,14 @@ impl HasSpan for TablePatternElem { } } -/// `'{ foo, bar: baz }` +/// `{ foo, bar: baz }` +/// +/// Structure: `{ s0 elems s1 }` #[derive(Debug, Clone)] pub struct TablePattern { - pub elems: Vec<(Space, TablePatternElem, Space)>, - /// `Some` if there is a trailing comma, `None` otherwise. - pub trailing_comma: Option, + pub s0: Space, + pub elems: Separated, + pub s1: Space, pub span: Span, } diff --git a/src/parser/basic.rs b/src/parser/basic.rs index 8d7ba1d..2a65c68 100644 --- a/src/parser/basic.rs +++ b/src/parser/basic.rs @@ -3,7 +3,7 @@ use chumsky::prelude::*; use chumsky::text::Character; -use crate::ast::{Ident, Line, Space}; +use crate::ast::{Ident, Line, Separated, Space}; use crate::span::Span; pub type Error = Simple; @@ -58,3 +58,27 @@ pub fn ident() -> EParser { pub fn local(space: EParser) -> EParser> { text::keyword("local").ignore_then(space).or_not().boxed() } + +// This function is more of a utility function. Because of this and to keep the +// code nicer, I have decided that the rules specified in the `parser` module +// don't apply to it. +pub fn separated_by( + elem: impl Parser + Clone + 'static, + separator: impl Parser + 'static, + trailing_separator: impl Parser + 'static, +) -> EParser> { + elem.clone() + .then(separator.then(elem).repeated()) + .then(trailing_separator.or_not()) + .or_not() + .map_with_span(|s, span| match s { + Some(((first_elem, last_elems), trailing)) => Separated::NonEmpty { + first_elem, + last_elems, + trailing, + span, + }, + None => Separated::Empty(span), + }) + .boxed() +} diff --git a/src/parser/lit.rs b/src/parser/lit.rs index 7d04e1f..0bdf12a 100644 --- a/src/parser/lit.rs +++ b/src/parser/lit.rs @@ -7,7 +7,7 @@ use crate::ast::{ }; use crate::builtin::Builtin; -use super::basic::{EParser, Error}; +use super::basic::{separated_by, EParser, Error}; fn builtin_lit() -> impl Parser { just('\'').ignore_then(choice(( @@ -154,22 +154,18 @@ fn table_lit( space: EParser, table_lit_elem: EParser, ) -> impl Parser { - let elem = space + let separator = space.clone().then_ignore(just(',')).then(space.clone()); + let trailing_separator = space.clone().then_ignore(just(',')); + + space .clone() - .then(table_lit_elem) - .then(space.clone()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(',').ignore_then(space).or_not(); - - let elems = elem.separated_by(just(',')).then(trailing_comma); - - just("'{") - .ignore_then(elems) - .then_ignore(just('}')) - .map_with_span(|(elems, trailing_comma), span| TableLit { + .then(separated_by(table_lit_elem, separator, trailing_separator)) + .then(space) + .delimited_by(just("'{"), just('}')) + .map_with_span(|((s0, elems), s1), span| TableLit { + s0, elems, - trailing_comma, + s1, span, }) } diff --git a/src/parser/program.rs b/src/parser/program.rs index 26e12fd..0b46f94 100644 --- a/src/parser/program.rs +++ b/src/parser/program.rs @@ -4,7 +4,7 @@ use chumsky::prelude::*; use crate::ast::{Expr, Program, Space, TableLitElem}; -use super::basic::EParser; +use super::basic::{separated_by, EParser}; pub fn program( space: EParser, @@ -17,20 +17,19 @@ pub fn program( .then(space.clone()) .map_with_span(|((s0, expr), s1), span| Program::Expr { s0, expr, s1, span }); - let elem = space - .clone() - .then(table_lit_elem) - .then(space.clone()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - let trailing_comma = just(',').ignore_then(space.clone()).or_not(); + let separator = space.clone().then_ignore(just(',')).then(space.clone()); + let trailing_separator = space.clone().then_ignore(just(',')); let module = space + .clone() .then_ignore(text::keyword("module")) - .then(elem.separated_by(just(','))) - .then(trailing_comma) - .map_with_span(|((s0, elems), trailing_comma), span| Program::Module { + .then(space.clone()) + .then(separated_by(table_lit_elem, separator, trailing_separator)) + .then(space.clone()) + .map_with_span(|(((s0, s1), elems), s2), span| Program::Module { s0, + s1, elems, - trailing_comma, + s2, span, }); diff --git a/src/parser/table_constr.rs b/src/parser/table_constr.rs index ac0bb04..fd6aefd 100644 --- a/src/parser/table_constr.rs +++ b/src/parser/table_constr.rs @@ -4,13 +4,13 @@ use chumsky::prelude::*; use crate::ast::{Expr, Space, TableConstr, TableConstrElem, TableLitElem}; -use super::basic::{EParser, Error}; +use super::basic::{separated_by, EParser, Error}; fn table_constr_elem( space: EParser, table_lit_elem: EParser, expr: EParser, -) -> impl Parser { +) -> impl Parser + Clone { let lit = table_lit_elem.map(TableConstrElem::Lit); let indexed = just('[') @@ -42,22 +42,19 @@ pub fn table_constr( table_lit_elem: EParser, expr: EParser, ) -> EParser { - let elem = space + let elem = table_constr_elem(space.clone(), table_lit_elem, expr); + let separator = space.clone().then_ignore(just(',')).then(space.clone()); + let trailing_separator = space.clone().then_ignore(just(',')); + + space .clone() - .then(table_constr_elem(space.clone(), table_lit_elem, expr)) - .then(space.clone()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(',').ignore_then(space).or_not(); - - let elems = elem.separated_by(just(',')).then(trailing_comma); - - just('{') - .ignore_then(elems) - .then_ignore(just('}')) - .map_with_span(|(elems, trailing_comma), span| TableConstr { + .then(separated_by(elem, separator, trailing_separator)) + .then(space) + .delimited_by(just('{'), just('}')) + .map_with_span(|((s0, elems), s1), span| TableConstr { + s0, elems, - trailing_comma, + s1, span, }) .boxed() diff --git a/src/parser/table_destr.rs b/src/parser/table_destr.rs index dfde71e..4fb5eda 100644 --- a/src/parser/table_destr.rs +++ b/src/parser/table_destr.rs @@ -4,12 +4,12 @@ use chumsky::prelude::*; use crate::ast::{Expr, Ident, Space, TableDestr, TablePattern, TablePatternElem}; -use super::basic::{EParser, Error}; +use super::basic::{separated_by, EParser, Error}; fn table_pattern_elem( space: EParser, ident: EParser, -) -> impl Parser { +) -> impl Parser + Clone { let positional = ident.clone().map(TablePatternElem::Positional); let named = ident @@ -30,22 +30,19 @@ fn table_pattern_elem( } pub fn table_pattern(space: EParser, ident: EParser) -> EParser { - let elem = space + let elem = table_pattern_elem(space.clone(), ident); + let separator = space.clone().then_ignore(just(',')).then(space.clone()); + let trailing_separator = space.clone().then_ignore(just(',')); + + space .clone() - .then(table_pattern_elem(space.clone(), ident)) - .then(space.clone()) - .map(|((s0, elem), s1)| (s0, elem, s1)); - - let trailing_comma = just(',').ignore_then(space).or_not(); - - let elems = elem.separated_by(just(',')).then(trailing_comma); - - just('{') - .ignore_then(elems) - .then_ignore(just('}')) - .map_with_span(|(elems, trailing_comma), span| TablePattern { + .then(separated_by(elem, separator, trailing_separator)) + .then(space) + .delimited_by(just('{'), just('}')) + .map_with_span(|((s0, elems), s1), span| TablePattern { + s0, elems, - trailing_comma, + s1, span, }) .boxed() From 969570fc4b193ccd6ec822188dcc43a4d7d1393c Mon Sep 17 00:00:00 2001 From: Joscha Date: Sun, 20 Nov 2022 20:30:09 +0100 Subject: [PATCH 2/4] Make rule about consuming surrounding whitespace --- src/parser.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.rs b/src/parser.rs index 72d26c6..1650d97 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,6 +2,7 @@ //! //! # Rules //! +//! - Parsers must not consume surrounding whitespace. //! - Public parser functions must return [`basic::EParser`]. //! - Public parser functions must receive public subparsers via their arguments. //! - Each public parser function must be called exactly once, inside this file. From b21619dabdce636add1ad9c03125e23179cd5dff Mon Sep 17 00:00:00 2001 From: Joscha Date: Sun, 20 Nov 2022 20:32:14 +0100 Subject: [PATCH 3/4] Pretty print programs partially --- src/main.rs | 12 ++++-------- src/pretty.rs | 10 +--------- src/pretty/program.rs | 23 +++++++++++++++++++++++ 3 files changed, 28 insertions(+), 17 deletions(-) create mode 100644 src/pretty/program.rs diff --git a/src/main.rs b/src/main.rs index 57f1850..ee5f73a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,6 @@ use std::fs; use std::path::PathBuf; -use ::pretty::{Pretty, RcAllocator}; use chumsky::Parser as _; use clap::Parser; @@ -48,17 +47,14 @@ fn main() -> anyhow::Result<()> { let stream = span::stream_from_str(&content); match parser::parser().parse(stream) { Ok(program) => { - println!("Successful parse"); - let doc = program.pretty(&RcAllocator); let mut out = vec![]; - doc.render(100, &mut out)?; - let str = String::from_utf8(out)?; - println!("{str}"); + program.to_doc().render(100, &mut out)?; + println!("{}", String::from_utf8(out)?); } Err(errs) => { - println!("Parsing failed"); + eprintln!("Parsing failed"); for err in errs { - println!("{err:?}"); + eprintln!("{err:?}"); } } } diff --git a/src/pretty.rs b/src/pretty.rs index 1d8e288..3847c62 100644 --- a/src/pretty.rs +++ b/src/pretty.rs @@ -1,9 +1 @@ -use pretty::{DocAllocator, DocBuilder, Pretty}; - -use crate::ast::Program; - -impl<'a, A: DocAllocator<'a>> Pretty<'a, A> for Program { - fn pretty(self, allocator: &'a A) -> DocBuilder<'a, A, ()> { - allocator.text("Hello world") - } -} +mod program; diff --git a/src/pretty/program.rs b/src/pretty/program.rs new file mode 100644 index 0000000..60a5ca0 --- /dev/null +++ b/src/pretty/program.rs @@ -0,0 +1,23 @@ +use pretty::RcDoc; + +use crate::ast::Program; + +impl Program { + pub fn to_doc(&self) -> RcDoc { + match self { + Program::Expr { + s0, + expr, + s1, + span: _, + } => RcDoc::nil(), + Program::Module { + s0, + s1, + elems, + s2, + span: _, + } => RcDoc::text("module"), + } + } +} From a18532a23ad9c68266a5f45ce6d7569458df436b Mon Sep 17 00:00:00 2001 From: Joscha Date: Sun, 20 Nov 2022 20:33:29 +0100 Subject: [PATCH 4/4] Start pretty printing comments --- src/pretty/basic.rs | 86 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/pretty/basic.rs diff --git a/src/pretty/basic.rs b/src/pretty/basic.rs new file mode 100644 index 0000000..c3e6dcc --- /dev/null +++ b/src/pretty/basic.rs @@ -0,0 +1,86 @@ +use std::mem; + +use pretty::{DocAllocator, DocBuilder, Pretty, RcAllocator, RcDoc}; + +use crate::ast::{Ident, Line, Space}; + +pub enum Group<'a> { + EmptyLine, + CommentBlock(Vec<&'a str>), +} + +/// Group comments and deduplicate empty lines +fn group_comments(lines: &[Line]) -> Vec { + let mut result = vec![]; + + let mut current_block = vec![]; + let mut last_line_was_empty = false; + for line in lines { + match line { + Line::Empty if last_line_was_empty => {} + Line::Empty => { + last_line_was_empty = true; + if !current_block.is_empty() { + result.push(Group::CommentBlock(mem::take(&mut current_block))); + } + result.push(Group::EmptyLine); + } + Line::Comment(comment) => { + last_line_was_empty = false; + current_block.push(comment); + } + } + } + + if !current_block.is_empty() { + result.push(Group::CommentBlock(current_block)); + } + + result +} + +fn remove_leading_empty_line(groups: &mut Vec) { + if let Some(Group::EmptyLine) = groups.first() { + groups.remove(0); + } +} + +fn remove_trailing_empty_line(groups: &mut Vec) { + if let Some(Group::EmptyLine) = groups.last() { + groups.pop(); + } +} + +fn comment_group_to_doc(comment: Vec<&str>) -> RcDoc { + RcAllocator + .concat(comment.into_iter().map(|c| { + RcDoc::text("# ") + .append(RcDoc::text(c)) + .append(RcDoc::hardline()) + })) + .indent(0) + .into_doc() +} + +impl Space { + /// Format as document for inline use, prefering nil if possible. + pub fn to_doc_inline_nospace(&self) -> RcDoc { + RcDoc::nil() + } + + /// Format as document, prefering a single space if possible. + pub fn to_doc_inline_space(&self) -> RcDoc { + RcDoc::space() + } + + /// Format as document, prefering a newline if possible. + pub fn to_doc_newline(&self) -> RcDoc { + RcDoc::line() + } +} + +impl Ident { + pub fn to_doc(&self) -> RcDoc { + RcDoc::text(&self.name) + } +}