From b009a9c4ec36082478dd1e26f8709df23d34339c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 20 Nov 2022 20:25:39 +0100
Subject: [PATCH 1/4] Handle things separated by things differently

I noticed that programs like '{} would parse correctly while '{ } would
expect an inner element. This was because the leading space was actually
part of the element parser, which is a violation of the (as of yet
unspoken) rule that parsers should not parse surrounding whitespace.

Because whitespace whas treated differently from everywhere else and
because this implementation was wrong, I decided to reimplement it,
abstracting the concept of things separated by other things with
optional trailing things. I did this in such a way that surrounding
whitespace is not touched.
---
 src/ast/basic.rs           | 20 ++++++++++++++++++++
 src/ast/lit.rs             | 10 ++++++----
 src/ast/program.rs         | 10 +++++-----
 src/ast/table_constr.rs    | 10 ++++++----
 src/ast/table_destr.rs     | 12 +++++++-----
 src/parser/basic.rs        | 26 +++++++++++++++++++++++++-
 src/parser/lit.rs          | 26 +++++++++++---------------
 src/parser/program.rs      | 21 ++++++++++-----------
 src/parser/table_constr.rs | 29 +++++++++++++----------------
 src/parser/table_destr.rs  | 29 +++++++++++++----------------
 10 files changed, 116 insertions(+), 77 deletions(-)
diff --git a/src/ast/basic.rs b/src/ast/basic.rs
index e472e39..1194111 100644
--- a/src/ast/basic.rs
+++ b/src/ast/basic.rs
@@ -47,3 +47,23 @@ impl HasSpan for Ident {
         self.span
     }
 }
+
+#[derive(Debug, Clone)]
+pub enum Separated<E, S1, S2> {
+    Empty(Span),
+    NonEmpty {
+        first_elem: E,
+        last_elems: Vec<(S1, E)>,
+        trailing: Option<S2>,
+        span: Span,
+    },
+}
+
+impl<E, S1, S2> HasSpan for Separated<E, S1, S2> {
+    fn span(&self) -> Span {
+        match self {
+            Separated::Empty(span) => *span,
+            Separated::NonEmpty { span, .. } => *span,
+        }
+    }
+}
diff --git a/src/ast/lit.rs b/src/ast/lit.rs
index 299bc1c..c079955 100644
--- a/src/ast/lit.rs
+++ b/src/ast/lit.rs
@@ -3,7 +3,7 @@ use std::fmt;
 use crate::builtin::Builtin;
 use crate::span::{HasSpan, Span};
 
-use super::{Expr, Ident, Space};
+use super::{Expr, Ident, Separated, Space};
 
 #[derive(Clone)]
 pub enum NumLitStr {
@@ -126,11 +126,13 @@ impl HasSpan for TableLitElem {
 }
 
 /// `'{ a, foo: b }`
+///
+/// Structure: `'{ s0 elems s1 }`
 #[derive(Debug, Clone)]
 pub struct TableLit {
-    pub elems: Vec<(Space, TableLitElem, Space)>,
-    /// `Some` if there is a trailing comma, `None` otherwise.
-    pub trailing_comma: Option<Space>,
+    pub s0: Space,
+    pub elems: Separated<TableLitElem, (Space, Space), Space>,
+    pub s1: Space,
     pub span: Span,
 }
 
diff --git a/src/ast/program.rs b/src/ast/program.rs
index 5a198d0..1a219f5 100644
--- a/src/ast/program.rs
+++ b/src/ast/program.rs
@@ -1,6 +1,6 @@
 use crate::span::{HasSpan, Span};
 
-use super::{Expr, Space, TableLitElem};
+use super::{Expr, Separated, Space, TableLitElem};
 
 #[derive(Debug, Clone)]
 pub enum Program {
@@ -12,12 +12,12 @@ pub enum Program {
         span: Span,
     },
 
-    /// Structure: `s0 module elems trailing_comma`
+    /// Structure: `s0 module s1 elems s2`
     Module {
         s0: Space,
-        elems: Vec<(Space, TableLitElem, Space)>,
-        /// `Some` if there is a trailing comma, `None` otherwise.
-        trailing_comma: Option<Space>,
+        s1: Space,
+        elems: Separated<TableLitElem, (Space, Space), Space>,
+        s2: Space,
         span: Span,
     },
 }
diff --git a/src/ast/table_constr.rs b/src/ast/table_constr.rs
index 0b699cc..741a30c 100644
--- a/src/ast/table_constr.rs
+++ b/src/ast/table_constr.rs
@@ -1,6 +1,6 @@
 use crate::span::{HasSpan, Span};
 
-use super::{Expr, Space, TableLitElem};
+use super::{Expr, Separated, Space, TableLitElem};
 
 #[derive(Debug, Clone)]
 pub enum TableConstrElem {
@@ -31,11 +31,13 @@ impl HasSpan for TableConstrElem {
 }
 
 /// `{ a, b, foo: c, [d]: e }`
+/// 
+/// Structure: `{ s0 elems s1 }`
 #[derive(Debug, Clone)]
 pub struct TableConstr {
-    pub elems: Vec<(Space, TableConstrElem, Space)>,
-    /// `Some` if there is a trailing comma, `None` otherwise.
-    pub trailing_comma: Option<Space>,
+    pub s0: Space,
+    pub elems: Separated<TableConstrElem, (Space, Space), Space>,
+    pub s1: Space,
     pub span: Span,
 }
 
diff --git a/src/ast/table_destr.rs b/src/ast/table_destr.rs
index 56c8d36..9cbf46a 100644
--- a/src/ast/table_destr.rs
+++ b/src/ast/table_destr.rs
@@ -1,6 +1,6 @@
 use crate::span::{HasSpan, Span};
 
-use super::{Expr, Ident, Space};
+use super::{Expr, Ident, Separated, Space};
 
 // TODO Make table patterns recursive
 
@@ -30,12 +30,14 @@ impl HasSpan for TablePatternElem {
     }
 }
 
-/// `'{ foo, bar: baz }`
+/// `{ foo, bar: baz }`
+/// 
+/// Structure: `{ s0 elems s1 }`
 #[derive(Debug, Clone)]
 pub struct TablePattern {
-    pub elems: Vec<(Space, TablePatternElem, Space)>,
-    /// `Some` if there is a trailing comma, `None` otherwise.
-    pub trailing_comma: Option<Space>,
+    pub s0: Space,
+    pub elems: Separated<TablePatternElem, (Space, Space), Space>,
+    pub s1: Space,
     pub span: Span,
 }
 
diff --git a/src/parser/basic.rs b/src/parser/basic.rs
index 8d7ba1d..2a65c68 100644
--- a/src/parser/basic.rs
+++ b/src/parser/basic.rs
@@ -3,7 +3,7 @@
 use chumsky::prelude::*;
 use chumsky::text::Character;
 
-use crate::ast::{Ident, Line, Space};
+use crate::ast::{Ident, Line, Separated, Space};
 use crate::span::Span;
 
 pub type Error = Simple<char, Span>;
@@ -58,3 +58,27 @@ pub fn ident() -> EParser<Ident> {
 pub fn local(space: EParser<Space>) -> EParser<Option<Space>> {
     text::keyword("local").ignore_then(space).or_not().boxed()
 }
+
+// This function is more of a utility function. Because of this and to keep the
+// code nicer, I have decided that the rules specified in the `parser` module
+// don't apply to it.
+pub fn separated_by<E: 'static, S1: 'static, S2: 'static>(
+    elem: impl Parser<char, E, Error = Error> + Clone + 'static,
+    separator: impl Parser<char, S1, Error = Error> + 'static,
+    trailing_separator: impl Parser<char, S2, Error = Error> + 'static,
+) -> EParser<Separated<E, S1, S2>> {
+    elem.clone()
+        .then(separator.then(elem).repeated())
+        .then(trailing_separator.or_not())
+        .or_not()
+        .map_with_span(|s, span| match s {
+            Some(((first_elem, last_elems), trailing)) => Separated::NonEmpty {
+                first_elem,
+                last_elems,
+                trailing,
+                span,
+            },
+            None => Separated::Empty(span),
+        })
+        .boxed()
+}
diff --git a/src/parser/lit.rs b/src/parser/lit.rs
index 7d04e1f..0bdf12a 100644
--- a/src/parser/lit.rs
+++ b/src/parser/lit.rs
@@ -7,7 +7,7 @@ use crate::ast::{
 };
 use crate::builtin::Builtin;
 
-use super::basic::{EParser, Error};
+use super::basic::{separated_by, EParser, Error};
 
 fn builtin_lit() -> impl Parser<char, Builtin, Error = Error> {
     just('\'').ignore_then(choice((
@@ -154,22 +154,18 @@ fn table_lit(
     space: EParser<Space>,
     table_lit_elem: EParser<TableLitElem>,
 ) -> impl Parser<char, TableLit, Error = Error> {
-    let elem = space
+    let separator = space.clone().then_ignore(just(',')).then(space.clone());
+    let trailing_separator = space.clone().then_ignore(just(','));
+
+    space
         .clone()
-        .then(table_lit_elem)
-        .then(space.clone())
-        .map(|((s0, elem), s1)| (s0, elem, s1));
-
-    let trailing_comma = just(',').ignore_then(space).or_not();
-
-    let elems = elem.separated_by(just(',')).then(trailing_comma);
-
-    just("'{")
-        .ignore_then(elems)
-        .then_ignore(just('}'))
-        .map_with_span(|(elems, trailing_comma), span| TableLit {
+        .then(separated_by(table_lit_elem, separator, trailing_separator))
+        .then(space)
+        .delimited_by(just("'{"), just('}'))
+        .map_with_span(|((s0, elems), s1), span| TableLit {
+            s0,
             elems,
-            trailing_comma,
+            s1,
             span,
         })
 }
diff --git a/src/parser/program.rs b/src/parser/program.rs
index 26e12fd..0b46f94 100644
--- a/src/parser/program.rs
+++ b/src/parser/program.rs
@@ -4,7 +4,7 @@ use chumsky::prelude::*;
 
 use crate::ast::{Expr, Program, Space, TableLitElem};
 
-use super::basic::EParser;
+use super::basic::{separated_by, EParser};
 
 pub fn program(
     space: EParser<Space>,
@@ -17,20 +17,19 @@ pub fn program(
         .then(space.clone())
         .map_with_span(|((s0, expr), s1), span| Program::Expr { s0, expr, s1, span });
 
-    let elem = space
-        .clone()
-        .then(table_lit_elem)
-        .then(space.clone())
-        .map(|((s0, elem), s1)| (s0, elem, s1));
-    let trailing_comma = just(',').ignore_then(space.clone()).or_not();
+    let separator = space.clone().then_ignore(just(',')).then(space.clone());
+    let trailing_separator = space.clone().then_ignore(just(','));
     let module = space
+        .clone()
         .then_ignore(text::keyword("module"))
-        .then(elem.separated_by(just(',')))
-        .then(trailing_comma)
-        .map_with_span(|((s0, elems), trailing_comma), span| Program::Module {
+        .then(space.clone())
+        .then(separated_by(table_lit_elem, separator, trailing_separator))
+        .then(space.clone())
+        .map_with_span(|(((s0, s1), elems), s2), span| Program::Module {
             s0,
+            s1,
             elems,
-            trailing_comma,
+            s2,
             span,
         });
 
diff --git a/src/parser/table_constr.rs b/src/parser/table_constr.rs
index ac0bb04..fd6aefd 100644
--- a/src/parser/table_constr.rs
+++ b/src/parser/table_constr.rs
@@ -4,13 +4,13 @@ use chumsky::prelude::*;
 
 use crate::ast::{Expr, Space, TableConstr, TableConstrElem, TableLitElem};
 
-use super::basic::{EParser, Error};
+use super::basic::{separated_by, EParser, Error};
 
 fn table_constr_elem(
     space: EParser<Space>,
     table_lit_elem: EParser<TableLitElem>,
     expr: EParser<Expr>,
-) -> impl Parser<char, TableConstrElem, Error = Error> {
+) -> impl Parser<char, TableConstrElem, Error = Error> + Clone {
     let lit = table_lit_elem.map(TableConstrElem::Lit);
 
     let indexed = just('[')
@@ -42,22 +42,19 @@ pub fn table_constr(
     table_lit_elem: EParser<TableLitElem>,
     expr: EParser<Expr>,
 ) -> EParser<TableConstr> {
-    let elem = space
+    let elem = table_constr_elem(space.clone(), table_lit_elem, expr);
+    let separator = space.clone().then_ignore(just(',')).then(space.clone());
+    let trailing_separator = space.clone().then_ignore(just(','));
+
+    space
         .clone()
-        .then(table_constr_elem(space.clone(), table_lit_elem, expr))
-        .then(space.clone())
-        .map(|((s0, elem), s1)| (s0, elem, s1));
-
-    let trailing_comma = just(',').ignore_then(space).or_not();
-
-    let elems = elem.separated_by(just(',')).then(trailing_comma);
-
-    just('{')
-        .ignore_then(elems)
-        .then_ignore(just('}'))
-        .map_with_span(|(elems, trailing_comma), span| TableConstr {
+        .then(separated_by(elem, separator, trailing_separator))
+        .then(space)
+        .delimited_by(just('{'), just('}'))
+        .map_with_span(|((s0, elems), s1), span| TableConstr {
+            s0,
             elems,
-            trailing_comma,
+            s1,
             span,
         })
         .boxed()
diff --git a/src/parser/table_destr.rs b/src/parser/table_destr.rs
index dfde71e..4fb5eda 100644
--- a/src/parser/table_destr.rs
+++ b/src/parser/table_destr.rs
@@ -4,12 +4,12 @@ use chumsky::prelude::*;
 
 use crate::ast::{Expr, Ident, Space, TableDestr, TablePattern, TablePatternElem};
 
-use super::basic::{EParser, Error};
+use super::basic::{separated_by, EParser, Error};
 
 fn table_pattern_elem(
     space: EParser<Space>,
     ident: EParser<Ident>,
-) -> impl Parser<char, TablePatternElem, Error = Error> {
+) -> impl Parser<char, TablePatternElem, Error = Error> + Clone {
     let positional = ident.clone().map(TablePatternElem::Positional);
 
     let named = ident
@@ -30,22 +30,19 @@ fn table_pattern_elem(
 }
 
 pub fn table_pattern(space: EParser<Space>, ident: EParser<Ident>) -> EParser<TablePattern> {
-    let elem = space
+    let elem = table_pattern_elem(space.clone(), ident);
+    let separator = space.clone().then_ignore(just(',')).then(space.clone());
+    let trailing_separator = space.clone().then_ignore(just(','));
+
+    space
         .clone()
-        .then(table_pattern_elem(space.clone(), ident))
-        .then(space.clone())
-        .map(|((s0, elem), s1)| (s0, elem, s1));
-
-    let trailing_comma = just(',').ignore_then(space).or_not();
-
-    let elems = elem.separated_by(just(',')).then(trailing_comma);
-
-    just('{')
-        .ignore_then(elems)
-        .then_ignore(just('}'))
-        .map_with_span(|(elems, trailing_comma), span| TablePattern {
+        .then(separated_by(elem, separator, trailing_separator))
+        .then(space)
+        .delimited_by(just('{'), just('}'))
+        .map_with_span(|((s0, elems), s1), span| TablePattern {
+            s0,
             elems,
-            trailing_comma,
+            s1,
             span,
         })
         .boxed()

From 969570fc4b193ccd6ec822188dcc43a4d7d1393c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 20 Nov 2022 20:30:09 +0100
Subject: [PATCH 2/4] Make rule about consuming surrounding whitespace

---
 src/parser.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/parser.rs b/src/parser.rs
index 72d26c6..1650d97 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -2,6 +2,7 @@
 //!
 //! # Rules
 //!
+//! - Parsers must not consume surrounding whitespace.
 //! - Public parser functions must return [`basic::EParser`].
 //! - Public parser functions must receive public subparsers via their arguments.
 //! - Each public parser function must be called exactly once, inside this file.

From b21619dabdce636add1ad9c03125e23179cd5dff Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 20 Nov 2022 20:32:14 +0100
Subject: [PATCH 3/4] Pretty print programs partially

---
 src/main.rs           | 12 ++++--------
 src/pretty.rs         | 10 +---------
 src/pretty/program.rs | 23 +++++++++++++++++++++++
 3 files changed, 28 insertions(+), 17 deletions(-)
 create mode 100644 src/pretty/program.rs

diff --git a/src/main.rs b/src/main.rs
index 57f1850..ee5f73a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,6 @@
 use std::fs;
 use std::path::PathBuf;
 
-use ::pretty::{Pretty, RcAllocator};
 use chumsky::Parser as _;
 use clap::Parser;
 
@@ -48,17 +47,14 @@ fn main() -> anyhow::Result<()> {
             let stream = span::stream_from_str(&content);
             match parser::parser().parse(stream) {
                 Ok(program) => {
-                    println!("Successful parse");
-                    let doc = program.pretty(&RcAllocator);
                     let mut out = vec![];
-                    doc.render(100, &mut out)?;
-                    let str = String::from_utf8(out)?;
-                    println!("{str}");
+                    program.to_doc().render(100, &mut out)?;
+                    println!("{}", String::from_utf8(out)?);
                 }
                 Err(errs) => {
-                    println!("Parsing failed");
+                    eprintln!("Parsing failed");
                     for err in errs {
-                        println!("{err:?}");
+                        eprintln!("{err:?}");
                     }
                 }
             }
diff --git a/src/pretty.rs b/src/pretty.rs
index 1d8e288..3847c62 100644
--- a/src/pretty.rs
+++ b/src/pretty.rs
@@ -1,9 +1 @@
-use pretty::{DocAllocator, DocBuilder, Pretty};
-
-use crate::ast::Program;
-
-impl<'a, A: DocAllocator<'a>> Pretty<'a, A> for Program {
-    fn pretty(self, allocator: &'a A) -> DocBuilder<'a, A, ()> {
-        allocator.text("Hello world")
-    }
-}
+mod program;
diff --git a/src/pretty/program.rs b/src/pretty/program.rs
new file mode 100644
index 0000000..60a5ca0
--- /dev/null
+++ b/src/pretty/program.rs
@@ -0,0 +1,23 @@
+use pretty::RcDoc;
+
+use crate::ast::Program;
+
+impl Program {
+    pub fn to_doc(&self) -> RcDoc {
+        match self {
+            Program::Expr {
+                s0,
+                expr,
+                s1,
+                span: _,
+            } => RcDoc::nil(),
+            Program::Module {
+                s0,
+                s1,
+                elems,
+                s2,
+                span: _,
+            } => RcDoc::text("module"),
+        }
+    }
+}

From a18532a23ad9c68266a5f45ce6d7569458df436b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 20 Nov 2022 20:33:29 +0100
Subject: [PATCH 4/4] Start pretty printing comments

---
 src/pretty/basic.rs | 86 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 src/pretty/basic.rs

diff --git a/src/pretty/basic.rs b/src/pretty/basic.rs
new file mode 100644
index 0000000..c3e6dcc
--- /dev/null
+++ b/src/pretty/basic.rs
@@ -0,0 +1,86 @@
+use std::mem;
+
+use pretty::{DocAllocator, DocBuilder, Pretty, RcAllocator, RcDoc};
+
+use crate::ast::{Ident, Line, Space};
+
+pub enum Group<'a> {
+    EmptyLine,
+    CommentBlock(Vec<&'a str>),
+}
+
+/// Group comments and deduplicate empty lines
+fn group_comments(lines: &[Line]) -> Vec<Group> {
+    let mut result = vec![];
+
+    let mut current_block = vec![];
+    let mut last_line_was_empty = false;
+    for line in lines {
+        match line {
+            Line::Empty if last_line_was_empty => {}
+            Line::Empty => {
+                last_line_was_empty = true;
+                if !current_block.is_empty() {
+                    result.push(Group::CommentBlock(mem::take(&mut current_block)));
+                }
+                result.push(Group::EmptyLine);
+            }
+            Line::Comment(comment) => {
+                last_line_was_empty = false;
+                current_block.push(comment);
+            }
+        }
+    }
+
+    if !current_block.is_empty() {
+        result.push(Group::CommentBlock(current_block));
+    }
+
+    result
+}
+
+fn remove_leading_empty_line(groups: &mut Vec<Group>) {
+    if let Some(Group::EmptyLine) = groups.first() {
+        groups.remove(0);
+    }
+}
+
+fn remove_trailing_empty_line(groups: &mut Vec<Group>) {
+    if let Some(Group::EmptyLine) = groups.last() {
+        groups.pop();
+    }
+}
+
+fn comment_group_to_doc(comment: Vec<&str>) -> RcDoc {
+    RcAllocator
+        .concat(comment.into_iter().map(|c| {
+            RcDoc::text("# ")
+                .append(RcDoc::text(c))
+                .append(RcDoc::hardline())
+        }))
+        .indent(0)
+        .into_doc()
+}
+
+impl Space {
+    /// Format as document for inline use, prefering nil if possible.
+    pub fn to_doc_inline_nospace(&self) -> RcDoc {
+        RcDoc::nil()
+    }
+
+    /// Format as document, prefering a single space if possible.
+    pub fn to_doc_inline_space(&self) -> RcDoc {
+        RcDoc::space()
+    }
+
+    /// Format as document, prefering a newline if possible.
+    pub fn to_doc_newline(&self) -> RcDoc {
+        RcDoc::line()
+    }
+}
+
+impl Ident {
+    pub fn to_doc(&self) -> RcDoc {
+        RcDoc::text(&self.name)
+    }
+}