Switch to pest-generated parser

This commit is contained in:
Joscha 2021-11-18 02:49:38 +01:00
parent 9c9e5764f2
commit 1e58672e21
9 changed files with 262 additions and 250 deletions

138
Cargo.lock generated
View file

@ -40,6 +40,39 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "block-buffer"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
dependencies = [
"block-padding",
"byte-tools",
"byteorder",
"generic-array",
]
[[package]]
name = "block-padding"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
dependencies = [
"byte-tools",
]
[[package]]
name = "byte-tools"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "chrono"
version = "0.4.19"
@ -68,6 +101,30 @@ dependencies = [
"vec_map",
]
[[package]]
name = "digest"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
dependencies = [
"generic-array",
]
[[package]]
name = "fake-simd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
[[package]]
name = "generic-array"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd"
dependencies = [
"typenum",
]
[[package]]
name = "heck"
version = "0.3.3"
@ -98,6 +155,12 @@ version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673"
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "num-integer"
version = "0.1.44"
@ -117,6 +180,55 @@ dependencies = [
"autocfg",
]
[[package]]
name = "opaque-debug"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
[[package]]
name = "pest"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
dependencies = [
"ucd-trie",
]
[[package]]
name = "pest_derive"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0"
dependencies = [
"pest",
"pest_generator",
]
[[package]]
name = "pest_generator"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55"
dependencies = [
"pest",
"pest_meta",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pest_meta"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d"
dependencies = [
"maplit",
"pest",
"sha-1",
]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
@ -159,6 +271,18 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "sha-1"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df"
dependencies = [
"block-buffer",
"digest",
"fake-simd",
"opaque-debug",
]
[[package]]
name = "strsim"
version = "0.8.0"
@ -246,10 +370,24 @@ version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"pest",
"pest_derive",
"structopt",
"thiserror",
]
[[package]]
name = "typenum"
version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec"
[[package]]
name = "ucd-trie"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"
[[package]]
name = "unicode-segmentation"
version = "1.8.0"

View file

@ -6,5 +6,7 @@ edition = "2018"
[dependencies]
anyhow = "1.0.45"
chrono = "0.4.19"
pest = "2.1.3"
pest_derive = "2.1.0"
structopt = "0.3.25"
thiserror = "1.0.30"

View file

@ -75,7 +75,7 @@ pub struct WeekdaySpec {
#[derive(Debug)]
pub enum IntVar {
/// `j`, see https://en.wikipedia.org/wiki/Julian_day
/// `j`, see <https://en.wikipedia.org/wiki/Julian_day>
JulianDay,
/// `y`
Year,
@ -85,7 +85,7 @@ pub enum IntVar {
YearLength,
/// `yd`, day of the year
YearDay,
/// `yD`, day of the year starting from the end
/// `Yd`, day of the year starting from the end
///
/// Equal to `yl - yd + 1`
YearDayReverse,
@ -93,7 +93,7 @@ pub enum IntVar {
///
/// Equal to `((yd - 1) / 7) + 1`
YearWeek,
/// `yw`, 1 during the last 7 days of the year, 2 during the previous etc.
/// `Yw`, 1 during the last 7 days of the year, 2 during the previous etc.
///
/// Equal to `((yD - 1) / 7) + 1`
YearWeekReverse,
@ -103,7 +103,7 @@ pub enum IntVar {
MonthLength,
/// `d` or `md`, day of the month
MonthDay,
/// `D` or `mD`, day of the month starting from the end
/// `D` or `Md`, day of the month starting from the end
///
/// Equal to `ml - md + 1`
MonthDayReverse,
@ -111,7 +111,7 @@ pub enum IntVar {
///
/// Equal to `((md - 1) / 7) + 1`
MonthWeek,
/// `mW`, 1 during the last 7 days of the month, 2 during the previous etc.
/// `Mw`, 1 during the last 7 days of the month, 2 during the previous etc.
///
/// Equal to `((mD - 1) / 7) + 1`
MonthWeekReverse,

View file

@ -1,7 +1,11 @@
use std::fs;
use std::path::PathBuf;
use pest::Parser;
use structopt::StructOpt;
use parse::{MyParser, Rule};
mod commands;
mod parse;
@ -11,10 +15,10 @@ pub struct Opt {
file: PathBuf,
}
fn main() {
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
println!("{:#?}", opt);
let commands = parse::parse(&opt.file);
println!("{:#?}", commands);
let content = fs::read_to_string(&opt.file)?;
let parsed = MyParser::parse(Rule::file, &content)?.next().unwrap();
println!("{:#?}", parsed);
Ok(())
}

View file

@ -1,18 +1,3 @@
use std::fs;
use std::path::Path;
use crate::commands::Command;
use self::line::parse_lines;
mod error;
mod line;
mod parser;
pub fn parse(file: &Path) -> anyhow::Result<Vec<Command>> {
let content = fs::read_to_string(file)?;
let lines = parse_lines(&content)?;
println!("{:#?}", lines);
todo!()
}
#[derive(pest_derive::Parser)]
#[grammar = "parse/grammar.pest"]
pub struct MyParser;

View file

@ -1,32 +0,0 @@
use std::error;
#[derive(Debug, thiserror::Error)]
#[error("line {line}: {reason}")]
pub struct ParseError {
line: usize,
reason: Box<dyn error::Error>,
}
impl ParseError {
#[must_use]
pub fn new(line: usize, reason: impl error::Error + 'static) -> Self {
Self {
line,
reason: Box::new(reason),
}
}
#[must_use]
pub fn pack<T>(line: usize, reason: impl error::Error + 'static) -> Result<T, Self> {
Err(Self::new(line, reason))
}
}
pub trait ToParseError: error::Error + 'static + Sized {
#[must_use]
fn at(self, line: usize) -> ParseError {
ParseError::new(line, self)
}
}
impl<E: error::Error + 'static> ToParseError for E {}

105
src/parse/grammar.pest Normal file
View file

@ -0,0 +1,105 @@
eol = _{ NEWLINE | EOI }
WHITESPACE = _{ !eol ~ WHITE_SPACE }
rest = { (!eol ~ ANY)+ }
title = { WHITESPACE ~ rest ~ eol }
year = @{ ASCII_DIGIT{4} }
month = @{ ASCII_DIGIT{2} }
day = @{ ASCII_DIGIT{2} }
datum = ${ year ~ "-" ~ month ~ "-" ~ day }
bdatum = ${ (year | "?") ~ "-" ~ month ~ "-" ~ day }
hour = @{ ASCII_DIGIT{2} }
minute = @{ ASCII_DIGIT{2} }
time = ${ hour ~ ":" ~ minute }
weekday = { "mon" | "tue" | "wed" | "thu" | "fri" | "sat" | "sun" }
amount = { ("+" | "-") ~ ASCII_DIGIT* }
delta_weekdays = { amount ~ weekday }
delta_years = { amount ~ ("y" | "Y") }
delta_months = { amount ~ ("m" | "M") }
delta_days = { amount ~ "d" }
delta_weeks = { amount ~ "w" }
delta_hours = { amount ~ "h" }
delta_minutes = { amount ~ "m" }
delta = { (delta_weekdays | delta_years | delta_months | delta_days | delta_weeks | delta_hours | delta_minutes)+ }
paren_expr = { "(" ~ expr ~ ")" }
number = @{ ASCII_DIGIT+ }
boolean = { "true" | "false" }
variable = {
"j"
| "yl" | "yd" | "Yd" | "yw" | "Yw" | "y"
| "ml" | "md" | "Md" | "mw" | "Mw" | "m"
| "d" | "D"
| "iy" | "iyl"
| "wd"
| "e"
| "mon" | "tue" | "wed" | "thu" | "fri" | "sat" | "sun"
| "isWeekday" | "isWeekend" | "isLeapYear"
}
term = { paren_expr | number | boolean | variable }
op = {
"+" | "-" | "*" | "/" | "%"
| "=" | "!="
| "<=" | "<" | ">=" | ">"
| "&" | "|" | "^"
}
expr = { term ~ (op ~ term)* }
date_fixed_start = { datum ~ delta? ~ time? }
date_fixed_end = { datum ~ delta? ~ time? | delta ~ time? | time }
date_fixed_repeat = { delta }
date_fixed = { date_fixed_start ~ ("--" ~ date_fixed_end)? ~ (";" ~ date_fixed_repeat)? }
date_expr_start = { ("*" | paren_expr) ~ delta? ~ time? }
date_expr_end = { delta ~ time? | time }
date_expr = { date_expr_start ~ ("--" ~ date_expr_end)? }
date_weekday_start = { weekday ~ delta? ~ time? }
date_weekday_end = { weekday ~ delta? ~ time? | delta ~ time? | time }
date_weekday = { date_weekday_start ~ ("--" ~ date_weekday_end)? }
date = !{ "DATE" ~ (date_fixed | date_expr | date_weekday) ~ eol }
bdate = !{ "BDATE" ~ bdatum ~ eol }
from = !{ "FROM" ~ datum ~ eol }
until = !{ "UNTIL" ~ datum ~ eol }
except = !{ "EXCEPT" ~ datum ~ eol }
donedate = { "(" ~ datum ~ time ~ ")" }
done = !{ "DONE" ~ datum? ~ donedate? ~ eol }
// I need to use `nl` for the empty line here. Otherwise, the parser gets into
// an endless loop at the `EOI` since `indented*` can appear at the end of the
// file and would just repeatedly match the empty string.
indented_line = { NEWLINE | WHITESPACE ~ rest ~ eol }
description = { indented_line* }
task = {
"TASK"
~ title
~ (date | from | until | except | done)*
~ description
}
note = {
"NOTE"
~ title
~ (date | from | until | except | done)*
~ description
}
birthday = {
"BIRTHDAY"
~ title
~ bdate
~ description
}
command = { task | note | birthday }
file = ${ SOI ~ NEWLINE* ~ command* ~ EOI }

View file

@ -1,102 +0,0 @@
use std::result;
use chrono::NaiveDate;
use crate::commands::{BirthdaySpec, Done, Spec};
use super::error::ParseError;
#[derive(Debug)]
pub enum Line {
Empty,
Indented(String),
Task(String),
Note(String),
Birthday(String),
Date(Spec),
BDate(BirthdaySpec),
From(NaiveDate),
Until(NaiveDate),
Except(NaiveDate),
Done(Done),
}
#[derive(Debug, thiserror::Error)]
pub enum Reason {
#[error("unknown format")]
UnknownFormat,
#[error("unknown command {0:?}")]
UnknownCommand(String),
#[error("empty command body")]
EmptyCommand,
}
type Result<T> = result::Result<T, ParseError>;
pub fn parse_lines(content: &str) -> Result<Vec<Line>> {
content
.lines()
.enumerate()
.map(|(line, content)| parse_line(line, content))
.collect()
}
fn parse_line(line: usize, content: &str) -> Result<Line> {
println!("Parsing line {:?}", content);
if content.is_empty() {
Ok(Line::Empty)
} else if content.starts_with('\t') || content.starts_with(' ') {
Ok(Line::Indented(content.to_string()))
} else if let Some((name, rest)) = parse_command(content) {
let rest = rest.trim();
if rest.is_empty() {
return ParseError::pack(line, Reason::EmptyCommand);
}
match name {
"TASK" => Ok(Line::Task(rest.to_string())),
"NOTE" => Ok(Line::Note(rest.to_string())),
"BIRTHDAY" => Ok(Line::Birthday(rest.to_string())),
"DATE" => parse_date(rest),
"BDATE" => parse_bdate(rest),
"FROM" => parse_datum(rest).map(Line::From),
"UNTIL" => parse_datum(rest).map(Line::Until),
"EXCEPT" => parse_datum(rest).map(Line::Except),
"DONE" => parse_done(rest),
_ => ParseError::pack(line, Reason::UnknownCommand(name.to_string())),
}
} else {
ParseError::pack(line, Reason::UnknownFormat)
}
}
fn parse_command(line: &str) -> Option<(&str, &str)> {
if let Some(space) = line.find(' ') {
let name = &line[..space];
let content = &line[space + 1..];
Some((name, content))
} else {
None
}
}
fn parse_date(s: &str) -> Result<Line> {
println!(" parsing date from {:?}", s);
Ok(Line::Empty) // TODO
}
fn parse_bdate(s: &str) -> Result<Line> {
println!(" parsing bdate from {:?}", s);
Ok(Line::Empty) // TODO
}
fn parse_datum(s: &str) -> Result<NaiveDate> {
println!(" parsing datum from {:?}", s);
Ok(NaiveDate::from_ymd(2015, 3, 14)) // TODO
}
fn parse_done(s: &str) -> Result<Line> {
println!(" parsing done from {:?}", s);
Ok(Line::Empty) // TODO
}

View file

@ -1,88 +0,0 @@
pub struct Parser<'d> {
data: &'d str,
index: usize,
}
#[derive(Debug, thiserror::Error)]
pub enum Reason {
#[error("expected character {expected:?} at {rest:?}")]
ExpectedChar { expected: char, rest: String },
#[error("expected string {expected:?} at {rest:?}")]
ExpectedStr { expected: String, rest: String },
#[error("expected whitespace at {rest:?}")]
ExpectedWhitespace { rest: String },
}
impl<'d> Parser<'d> {
pub fn new(data: &'d str) -> Self {
Self { data, index: 0 }
}
fn rest(&self) -> &'d str {
&self.data[self.index..]
}
pub fn peek(&self) -> Option<char> {
self.rest().chars().next()
}
pub fn take(&mut self) -> Option<char> {
if let Some(c) = self.peek() {
self.index += c.len_utf8();
Some(c)
} else {
None
}
}
pub fn take_exact(&mut self, c: char) -> Result<(), Reason> {
if self.peek() == Some(c) {
self.take();
Ok(())
} else {
Err(Reason::ExpectedChar {
expected: c,
rest: self.rest().to_string(),
})
}
}
pub fn take_any_whitespace(&mut self) {
while let Some(c) = self.peek() {
if c.is_whitespace() {
self.take();
} else {
break;
}
}
}
pub fn take_some_whitespace(&mut self) -> Result<(), Reason> {
match self.peek() {
Some(c) if c.is_whitespace() => {
self.take();
self.take_any_whitespace();
Ok(())
}
_ => Err(Reason::ExpectedWhitespace {
rest: self.rest().to_string(),
}),
}
}
pub fn starts_with(&self, pattern: &str) -> bool {
self.data.starts_with(pattern)
}
pub fn take_starting_with(&mut self, pattern: &str) -> Result<(), Reason> {
if self.starts_with(pattern) {
self.index += pattern.len();
Ok(())
} else {
Err(Reason::ExpectedStr {
expected: pattern.to_string(),
rest: self.rest().to_string(),
})
}
}
}