Switch to pest-generated parser

This commit is contained in:
Joscha 2021-11-18 02:49:38 +01:00
parent 9c9e5764f2
commit 1e58672e21
9 changed files with 262 additions and 250 deletions

View file

@ -75,7 +75,7 @@ pub struct WeekdaySpec {
#[derive(Debug)]
pub enum IntVar {
/// `j`, see https://en.wikipedia.org/wiki/Julian_day
/// `j`, see <https://en.wikipedia.org/wiki/Julian_day>
JulianDay,
/// `y`
Year,
@ -85,7 +85,7 @@ pub enum IntVar {
YearLength,
/// `yd`, day of the year
YearDay,
/// `yD`, day of the year starting from the end
/// `Yd`, day of the year starting from the end
///
/// Equal to `yl - yd + 1`
YearDayReverse,
@ -93,7 +93,7 @@ pub enum IntVar {
///
/// Equal to `((yd - 1) / 7) + 1`
YearWeek,
/// `yw`, 1 during the last 7 days of the year, 2 during the previous etc.
/// `Yw`, 1 during the last 7 days of the year, 2 during the previous etc.
///
/// Equal to `((yD - 1) / 7) + 1`
YearWeekReverse,
@ -103,7 +103,7 @@ pub enum IntVar {
MonthLength,
/// `d` or `md`, day of the month
MonthDay,
/// `D` or `mD`, day of the month starting from the end
/// `D` or `Md`, day of the month starting from the end
///
/// Equal to `ml - md + 1`
MonthDayReverse,
@ -111,7 +111,7 @@ pub enum IntVar {
///
/// Equal to `((md - 1) / 7) + 1`
MonthWeek,
/// `mW`, 1 during the last 7 days of the month, 2 during the previous etc.
/// `Mw`, 1 during the last 7 days of the month, 2 during the previous etc.
///
/// Equal to `((mD - 1) / 7) + 1`
MonthWeekReverse,

View file

@ -1,7 +1,11 @@
use std::fs;
use std::path::PathBuf;
use pest::Parser;
use structopt::StructOpt;
use parse::{MyParser, Rule};
mod commands;
mod parse;
@ -11,10 +15,10 @@ pub struct Opt {
file: PathBuf,
}
fn main() {
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
println!("{:#?}", opt);
let commands = parse::parse(&opt.file);
println!("{:#?}", commands);
let content = fs::read_to_string(&opt.file)?;
let parsed = MyParser::parse(Rule::file, &content)?.next().unwrap();
println!("{:#?}", parsed);
Ok(())
}

View file

@ -1,18 +1,3 @@
use std::fs;
use std::path::Path;
use crate::commands::Command;
use self::line::parse_lines;
mod error;
mod line;
mod parser;
pub fn parse(file: &Path) -> anyhow::Result<Vec<Command>> {
let content = fs::read_to_string(file)?;
let lines = parse_lines(&content)?;
println!("{:#?}", lines);
todo!()
}
#[derive(pest_derive::Parser)]
#[grammar = "parse/grammar.pest"]
pub struct MyParser;

View file

@ -1,32 +0,0 @@
use std::error;
#[derive(Debug, thiserror::Error)]
#[error("line {line}: {reason}")]
pub struct ParseError {
line: usize,
reason: Box<dyn error::Error>,
}
impl ParseError {
#[must_use]
pub fn new(line: usize, reason: impl error::Error + 'static) -> Self {
Self {
line,
reason: Box::new(reason),
}
}
#[must_use]
pub fn pack<T>(line: usize, reason: impl error::Error + 'static) -> Result<T, Self> {
Err(Self::new(line, reason))
}
}
pub trait ToParseError: error::Error + 'static + Sized {
#[must_use]
fn at(self, line: usize) -> ParseError {
ParseError::new(line, self)
}
}
impl<E: error::Error + 'static> ToParseError for E {}

105
src/parse/grammar.pest Normal file
View file

@ -0,0 +1,105 @@
eol = _{ NEWLINE | EOI }
WHITESPACE = _{ !eol ~ WHITE_SPACE }
rest = { (!eol ~ ANY)+ }
title = { WHITESPACE ~ rest ~ eol }
year = @{ ASCII_DIGIT{4} }
month = @{ ASCII_DIGIT{2} }
day = @{ ASCII_DIGIT{2} }
datum = ${ year ~ "-" ~ month ~ "-" ~ day }
bdatum = ${ (year | "?") ~ "-" ~ month ~ "-" ~ day }
hour = @{ ASCII_DIGIT{2} }
minute = @{ ASCII_DIGIT{2} }
time = ${ hour ~ ":" ~ minute }
weekday = { "mon" | "tue" | "wed" | "thu" | "fri" | "sat" | "sun" }
amount = { ("+" | "-") ~ ASCII_DIGIT* }
delta_weekdays = { amount ~ weekday }
delta_years = { amount ~ ("y" | "Y") }
delta_months = { amount ~ ("m" | "M") }
delta_days = { amount ~ "d" }
delta_weeks = { amount ~ "w" }
delta_hours = { amount ~ "h" }
delta_minutes = { amount ~ "m" }
delta = { (delta_weekdays | delta_years | delta_months | delta_days | delta_weeks | delta_hours | delta_minutes)+ }
paren_expr = { "(" ~ expr ~ ")" }
number = @{ ASCII_DIGIT+ }
boolean = { "true" | "false" }
variable = {
"j"
| "yl" | "yd" | "Yd" | "yw" | "Yw" | "y"
| "ml" | "md" | "Md" | "mw" | "Mw" | "m"
| "d" | "D"
| "iy" | "iyl"
| "wd"
| "e"
| "mon" | "tue" | "wed" | "thu" | "fri" | "sat" | "sun"
| "isWeekday" | "isWeekend" | "isLeapYear"
}
term = { paren_expr | number | boolean | variable }
op = {
"+" | "-" | "*" | "/" | "%"
| "=" | "!="
| "<=" | "<" | ">=" | ">"
| "&" | "|" | "^"
}
expr = { term ~ (op ~ term)* }
date_fixed_start = { datum ~ delta? ~ time? }
date_fixed_end = { datum ~ delta? ~ time? | delta ~ time? | time }
date_fixed_repeat = { delta }
date_fixed = { date_fixed_start ~ ("--" ~ date_fixed_end)? ~ (";" ~ date_fixed_repeat)? }
date_expr_start = { ("*" | paren_expr) ~ delta? ~ time? }
date_expr_end = { delta ~ time? | time }
date_expr = { date_expr_start ~ ("--" ~ date_expr_end)? }
date_weekday_start = { weekday ~ delta? ~ time? }
date_weekday_end = { weekday ~ delta? ~ time? | delta ~ time? | time }
date_weekday = { date_weekday_start ~ ("--" ~ date_weekday_end)? }
date = !{ "DATE" ~ (date_fixed | date_expr | date_weekday) ~ eol }
bdate = !{ "BDATE" ~ bdatum ~ eol }
from = !{ "FROM" ~ datum ~ eol }
until = !{ "UNTIL" ~ datum ~ eol }
except = !{ "EXCEPT" ~ datum ~ eol }
donedate = { "(" ~ datum ~ time ~ ")" }
done = !{ "DONE" ~ datum? ~ donedate? ~ eol }
// I need to use `nl` for the empty line here. Otherwise, the parser gets into
// an endless loop at the `EOI` since `indented*` can appear at the end of the
// file and would just repeatedly match the empty string.
indented_line = { NEWLINE | WHITESPACE ~ rest ~ eol }
description = { indented_line* }
task = {
"TASK"
~ title
~ (date | from | until | except | done)*
~ description
}
note = {
"NOTE"
~ title
~ (date | from | until | except | done)*
~ description
}
birthday = {
"BIRTHDAY"
~ title
~ bdate
~ description
}
command = { task | note | birthday }
file = ${ SOI ~ NEWLINE* ~ command* ~ EOI }

View file

@ -1,102 +0,0 @@
use std::result;
use chrono::NaiveDate;
use crate::commands::{BirthdaySpec, Done, Spec};
use super::error::ParseError;
#[derive(Debug)]
pub enum Line {
Empty,
Indented(String),
Task(String),
Note(String),
Birthday(String),
Date(Spec),
BDate(BirthdaySpec),
From(NaiveDate),
Until(NaiveDate),
Except(NaiveDate),
Done(Done),
}
#[derive(Debug, thiserror::Error)]
pub enum Reason {
#[error("unknown format")]
UnknownFormat,
#[error("unknown command {0:?}")]
UnknownCommand(String),
#[error("empty command body")]
EmptyCommand,
}
type Result<T> = result::Result<T, ParseError>;
pub fn parse_lines(content: &str) -> Result<Vec<Line>> {
content
.lines()
.enumerate()
.map(|(line, content)| parse_line(line, content))
.collect()
}
fn parse_line(line: usize, content: &str) -> Result<Line> {
println!("Parsing line {:?}", content);
if content.is_empty() {
Ok(Line::Empty)
} else if content.starts_with('\t') || content.starts_with(' ') {
Ok(Line::Indented(content.to_string()))
} else if let Some((name, rest)) = parse_command(content) {
let rest = rest.trim();
if rest.is_empty() {
return ParseError::pack(line, Reason::EmptyCommand);
}
match name {
"TASK" => Ok(Line::Task(rest.to_string())),
"NOTE" => Ok(Line::Note(rest.to_string())),
"BIRTHDAY" => Ok(Line::Birthday(rest.to_string())),
"DATE" => parse_date(rest),
"BDATE" => parse_bdate(rest),
"FROM" => parse_datum(rest).map(Line::From),
"UNTIL" => parse_datum(rest).map(Line::Until),
"EXCEPT" => parse_datum(rest).map(Line::Except),
"DONE" => parse_done(rest),
_ => ParseError::pack(line, Reason::UnknownCommand(name.to_string())),
}
} else {
ParseError::pack(line, Reason::UnknownFormat)
}
}
fn parse_command(line: &str) -> Option<(&str, &str)> {
if let Some(space) = line.find(' ') {
let name = &line[..space];
let content = &line[space + 1..];
Some((name, content))
} else {
None
}
}
fn parse_date(s: &str) -> Result<Line> {
println!(" parsing date from {:?}", s);
Ok(Line::Empty) // TODO
}
fn parse_bdate(s: &str) -> Result<Line> {
println!(" parsing bdate from {:?}", s);
Ok(Line::Empty) // TODO
}
fn parse_datum(s: &str) -> Result<NaiveDate> {
println!(" parsing datum from {:?}", s);
Ok(NaiveDate::from_ymd(2015, 3, 14)) // TODO
}
fn parse_done(s: &str) -> Result<Line> {
println!(" parsing done from {:?}", s);
Ok(Line::Empty) // TODO
}

View file

@ -1,88 +0,0 @@
pub struct Parser<'d> {
data: &'d str,
index: usize,
}
#[derive(Debug, thiserror::Error)]
pub enum Reason {
#[error("expected character {expected:?} at {rest:?}")]
ExpectedChar { expected: char, rest: String },
#[error("expected string {expected:?} at {rest:?}")]
ExpectedStr { expected: String, rest: String },
#[error("expected whitespace at {rest:?}")]
ExpectedWhitespace { rest: String },
}
impl<'d> Parser<'d> {
pub fn new(data: &'d str) -> Self {
Self { data, index: 0 }
}
fn rest(&self) -> &'d str {
&self.data[self.index..]
}
pub fn peek(&self) -> Option<char> {
self.rest().chars().next()
}
pub fn take(&mut self) -> Option<char> {
if let Some(c) = self.peek() {
self.index += c.len_utf8();
Some(c)
} else {
None
}
}
pub fn take_exact(&mut self, c: char) -> Result<(), Reason> {
if self.peek() == Some(c) {
self.take();
Ok(())
} else {
Err(Reason::ExpectedChar {
expected: c,
rest: self.rest().to_string(),
})
}
}
pub fn take_any_whitespace(&mut self) {
while let Some(c) = self.peek() {
if c.is_whitespace() {
self.take();
} else {
break;
}
}
}
pub fn take_some_whitespace(&mut self) -> Result<(), Reason> {
match self.peek() {
Some(c) if c.is_whitespace() => {
self.take();
self.take_any_whitespace();
Ok(())
}
_ => Err(Reason::ExpectedWhitespace {
rest: self.rest().to_string(),
}),
}
}
pub fn starts_with(&self, pattern: &str) -> bool {
self.data.starts_with(pattern)
}
pub fn take_starting_with(&mut self, pattern: &str) -> Result<(), Reason> {
if self.starts_with(pattern) {
self.index += pattern.len();
Ok(())
} else {
Err(Reason::ExpectedStr {
expected: pattern.to_string(),
rest: self.rest().to_string(),
})
}
}
}