diff --git a/.gitignore b/.gitignore index 1062418..4bfa550 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .idea/ *.iml +target/ diff --git a/pom.xml b/pom.xml index 50e30c8..8fa8ea2 100644 --- a/pom.xml +++ b/pom.xml @@ -8,4 +8,19 @@ composite-parse 1.0-SNAPSHOT + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 11 + + + + + + diff --git a/src/main/java/de/plugh/compositeparse/Block.java b/src/main/java/de/plugh/compositeparse/Block.java new file mode 100644 index 0000000..5834950 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/Block.java @@ -0,0 +1,140 @@ +package de.plugh.compositeparse; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * A {@link Block} represents a single {@link Parser#parse(Block)} call. It helps implement back tracking and useful + * error messages. + *

+ * Whenever a {@link Parser}'s parse function is called, it creates a new {@link Block} and passes it along to all parse + * calls of its sub-parsers. It also registers the {@link Block} it created with its super-parser's {@link Block}. + *

+ * Each {@link Block} additionally saves the input's cursor position when it was created. This way, backtracking can be + * achieved by walking up the {@link Block} tree and setting the input's cursor position to a higher {@link Block}'s + * saved cursor position. + *

+ * In addition to that, each {@link Block} remembers a naming scheme that operates on the {@link Block}'s sub-blocks. + * This naming scheme is used to create a useful error message when a {@link ParseException} is thrown. + */ +public class Block { + + private static final int CONTEXT_LOOKBACK = 24; + private final int initialCursor; + private List subblocks; + private Function, String> namingScheme; + private StringInput input; + + private Block(Function, String> namingScheme, StringInput input) { + subblocks = new ArrayList<>(); + this.namingScheme = namingScheme; + + this.input = input; + initialCursor = input.getCursor(); + } + + /** + * Create a top-level block from an input {@link String}. + * + * @param text the input {@link String} + */ + public Block(String text) { + this(Block::alternative, new StringInput(text)); + } + + /** + * Create a new block as a sub-block of an existing block. + * + * @param superblock the block that this block is a child to + * @param namingScheme the naming scheme to use for {@link #getName()} + */ + Block(Block superblock, Function, String> namingScheme) { + this(namingScheme, superblock.input); + + superblock.register(this); + } + + /* + * Naming schemes + */ + + /** + * Use the name of the first sub-block (useful for sequential blocks). + * + * @param blocks a block's sub-blocks + * @return the first sub-block's name + */ + public static String first(List blocks) { + if (blocks.size() > 0) { + return blocks.get(0).getName(); + } else { + throw new BlockException("No subblocks found"); + } + } + + /** + * UCombine all sub-blocks' names using "or" (useful for optional parsers). + * + * @param blocks a block's sub-blocks + * @return all sub-blocks' names, joined with "or" + */ + public static String alternative(List blocks) { + if (blocks.size() > 0) { + return blocks.stream().map(Block::getName).collect(Collectors.joining(" or ")); + } else { + throw new BlockException("No subblocks found"); + } + } + + /** + * Always return a constant name. + * + * @param name a block's sub-blocks + * @return the name + */ + public static Function, String> label(String name) { + return ignored -> name; + } + + private void register(Block subblock) { + subblocks.add(subblock); + } + + /** + * @return the input {@link StringInput} + */ + public StringInput getInput() { + return input; + } + + /** + * @return the name + */ + public String getName() { + return namingScheme.apply(subblocks); + } + + /** + * Reset the input {@link StringInput}'s cursor to this block's initial cursor position + */ + public void resetCursor() { + input.setCursor(initialCursor); + } + + /** + * @return a few characters from before this block's initial cursor position + */ + public String getContext() { + int currentCursor = input.getCursor(); + + input.setCursor(initialCursor); + String context = input.look(-CONTEXT_LOOKBACK); + + input.setCursor(currentCursor); + + return "..." + context; + } + +} diff --git a/src/main/java/de/plugh/compositeparse/BlockException.java b/src/main/java/de/plugh/compositeparse/BlockException.java new file mode 100644 index 0000000..2ca2e42 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/BlockException.java @@ -0,0 +1,18 @@ +package de.plugh.compositeparse; + +/** + * A {@link BlockException} is thrown when actions are executed on a malformed {@link Block} structure. + */ +@SuppressWarnings("serial") // This exception does not need to be serialised. +public class BlockException extends RuntimeException { + + /** + * Create a new {@link BlockException}. + * + * @param reason what went wrong + */ + public BlockException(String reason) { + super(reason); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/Pair.java b/src/main/java/de/plugh/compositeparse/Pair.java new file mode 100644 index 0000000..a9113fc --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/Pair.java @@ -0,0 +1,64 @@ +package de.plugh.compositeparse; + +/** + * A {@link Pair} is an immutable class representing a tuple with two elements. + * + * @param type of the first element + * @param type of the second element + */ +public class Pair { + + private final A first; + private final B second; + + /** + * Create a new {@link Pair} from two elements. + * + * @param first the first element + * @param second the second element + */ + public Pair(A first, B second) { + this.first = first; + this.second = second; + } + + /** + * @return the first element + */ + public A getFirst() { + return first; + } + + /** + * @return the second element + */ + public B getSecond() { + return second; + } + + @Override + public int hashCode() { + // Auto-generated by eclipse + final int prime = 31; + int result = 1; + result = prime * result + ((first == null) ? 0 : first.hashCode()); + result = prime * result + ((second == null) ? 0 : second.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + // Auto-generated by eclipse, with small changes + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + Pair other = (Pair) obj; + if (first == null) { + if (other.first != null) return false; + } else if (!first.equals(other.first)) return false; + if (second == null) { + return other.second == null; + } else return second.equals(other.second); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/ParseException.java b/src/main/java/de/plugh/compositeparse/ParseException.java new file mode 100644 index 0000000..a977b67 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/ParseException.java @@ -0,0 +1,46 @@ +package de.plugh.compositeparse; + +/** + * This exception is thrown when a parser encounters incorrect input. It contains a bit of information about the + * failure: + *

+ * The name of the parser that failed. + *

+ * A few characters of context, ending at the position where the parser failed. + */ +@SuppressWarnings("serial") // This exception does not need to be serialised. +public class ParseException extends Exception { + + private final String name; + private final String context; + + /** + * Create a new {@link ParseException} at a block. + * + * @param block the block to take the extra information from + */ + public ParseException(Block block) { + name = block.getName(); + context = block.getContext(); + } + + /** + * @return the name + */ + public String getName() { + return name; + } + + /** + * @return the context + */ + public String getContext() { + return context; + } + + @Override + public String getMessage() { + return getContext() + "<- expected: " + getName(); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/Parser.java b/src/main/java/de/plugh/compositeparse/Parser.java new file mode 100644 index 0000000..0057780 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/Parser.java @@ -0,0 +1,111 @@ +package de.plugh.compositeparse; + +import java.util.List; +import java.util.function.Function; + +/* + * THIS LIBRARY + * + * This parsing system was inspired by haskell's megaparsec library and aims to + * somewhat recreate the feel and flexibility of megaparsec in java. + * + * The main concept of this library is that parsers can be combined into bigger + * parsers through either sequential parsing or by passing parsers into + * constructors of other parsers. This happens during parsing and thus can + * depend on previously parsed input and/or the current state of the program. + * + * For combining Parsers to work properly, all Parsers are immutable. + * + * BLOCKS + * + * While parsing, the library builds up a structure of Blocks representing the + * structure of the Parsers which have already been tried. This structure serves + * a dual purpose: + * + * 1) Each block stores the StringReader's cursor position when it is created. + * This allows for the Parsers to backtrack, should a branch fail. + * + * 2) Each block holds sub-blocks created by the parsers it consists of. When a + * ParseException is thrown, this information is used to figure out which syntax + * was expected at the point of failure. This allows for descriptive error + * messages which can be very useful in an interactive environment. + * + * A structure like this could not be constructed at compile time or cached, + * because it depends on the input that is being parsed: Depending on the input + * already parsed, a parser can decide to use different subparsers. Because of + * this, the structure is created while parsing is occurring. + * + * For more info, see the documentation for Block. + * + * COMBINING PARSERS + * + * The main method of combining parsers is by sequentially calling them one + * after the other. This is also the easiest way to collect results from the + * parsers. Loops and conditionals can also be used, as can previously parsed + * input. + * + * In some situations, there are multiple possible "branches" a parser could + * take. In those cases, the Options parser can try multiple different parsers, + * backtracking when one of them fails to try the next one. + * + * The Default parser can provide a default value in case a parser fails. + * + * The Repeat parser can repeat a parser a certain amount of times, with an + * optional separator parser in-between. + * + * One can also manually catch the ParseExceptions. In that case, the cursor + * position is reset (as if the parser that threw an exception never parsed any + * input) and another Parser can be used. + */ + +/** + * A {@link Parser} knows how to parse a specific bit of information. + *

+ * {@link Parser}s are usually created by combining multiple smaller parsers. For more information, see the introductory + * comment in the source file of this class. + * + * @param return type of the parser + */ +@FunctionalInterface +public interface Parser { + + /** + * @return the parser's naming scheme + */ + default Function, String> getNamingScheme() { + return Block::first; + } + + /** + * Parse a specific bit of information from the input. + *

+ * Do not overwrite this function unless you know what you're doing! + *

+ * This is the function you usually want to call. + * + * @param block the calling parser's {@link Block} + * @return the information it parsed + * @throws ParseException if the input format was incorrect + */ + default T parse(Block block) throws ParseException { + Block subblock = new Block(block, getNamingScheme()); + try { + return read(subblock); + } catch (ParseException e) { + subblock.resetCursor(); + throw e; + } + } + + /** + * The implementation regarding how to parse the specific bit of information. + *

+ * This is the function you usually want to overwrite. + * + * @param block the calling parser's {@link Block} + * @return the information it parsed + * @throws ParseException if the input format was incorrect + */ + T read(Block block) throws ParseException; + +} diff --git a/src/main/java/de/plugh/compositeparse/StringInput.java b/src/main/java/de/plugh/compositeparse/StringInput.java new file mode 100644 index 0000000..3e1e7a9 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/StringInput.java @@ -0,0 +1,128 @@ +package de.plugh.compositeparse; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * The {@link StringInput} consists of a {@link String} and a cursor position on that {@link String}. + *

+ * It provides a convenient way to view a {@link String}, in addition to a few useful functions. + */ +public class StringInput { + + private String string; + private int cursor; + + /** + * Create a new {@link StringInput} over a {@link String}. + * + * @param string the content of the reader + */ + public StringInput(String string) { + this.string = string; + this.cursor = 0; + } + + private int clampCursor(int position, int delta) { + /* + * A cursor can have position string.length() because its position is + * interpreted as between the characters, not on the characters, similar to + * python's slicing. + * + * Examples, using "|" as the cursor position and "aabc" as the string: + * + * |aabc - The cursor is in position 0. + * + * aab|c - The cursor is in position 3. + * + * aabc| - The cursor is in position 4. + */ + + /* + * This prevents an overflow/underflow if somebody tries to look(), read() or + * move() with Integer.MIN_VALUE or Integer.MAX_VALUE (like I did while testing + * this). + */ + int minDelta = -position; + int maxDelta = string.length() - position; + return position + Math.max(minDelta, Math.min(maxDelta, delta)); + } + + /** + * @return the cursor position + */ + public int getCursor() { + return cursor; + } + + /** + * @param cursor the cursor position + */ + public void setCursor(int cursor) { + this.cursor = clampCursor(cursor, 0); + } + + /** + * Move the cursor a certain amount of characters relative to the cursor's current position. A positive amount moves + * forward (towards the end of the string), a negative moves backward (towards the beginning of the string). + * + * @param amount how many characters to move the cursor by + */ + public void move(int amount) { + setCursor(clampCursor(getCursor(), amount)); + } + + /** + * Read a certain amount of characters relative to the cursor's current position. A positive amount looks forward + * (towards the end of the string), a negative looks backward (towards the beginning of the string). + * + * @param amount how many characters to look up + * @return the specified section of the string + */ + public String look(int amount) { + if (amount >= 0) { + return string.substring(cursor, clampCursor(cursor, amount)); + } else { + return string.substring(clampCursor(cursor, amount), cursor); + } + } + + /** + * Combines a {@link #look(int)} and a {@link #move(int)} operation. + * + * @param amount how many characters to look up and move + * @return the specified section of the string + */ + public String read(int amount) { + String result = look(amount); + move(amount); + return result; + } + + /** + * Match and {@link #read(int)} the regex passed, starting at the current cursor position. + *

+ * This returns everything from the current cursor position to the end of the match that was found, so make sure to + * anchor your regexes (using ^) unless you need all of that. + * + * @param regex the regular expression to use + * @return the string matched (or null, if no match was found) + */ + public String match(String regex) { + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(string.substring(cursor)); + if (matcher.find()) { + return read(matcher.end()); + } else { + return null; + } + } + + /** + * @return whether the whole input was consumed + */ + public boolean complete() { + return cursor >= string.length(); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/BoundedInteger.java b/src/main/java/de/plugh/compositeparse/parsers/BoundedInteger.java new file mode 100644 index 0000000..9362179 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/BoundedInteger.java @@ -0,0 +1,122 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.List; +import java.util.function.Function; + +/** + * Parses an integer between a lower and upper bound. + */ +public class BoundedInteger implements Parser { + + /* + * @formatter:off + * + * Regex breakdown: + * + * ^ - Start at the cursor's current position + * [+-]? - Optionally match a sign in the beginning of the string + * \\d+ - Match as many digits as you can find, at least one + * + * @formatter:on + */ + private static final String INTEGER_REGEX = "^[+-]?\\d+"; + + private final int min; + private final int max; + + /** + * Parse an integer between min and max. The integer is of the format {@code [+-]}. + * + * @param min minimum value of the integer + * @param max maximum value of the integer + * @see #between(int, int) + */ + public BoundedInteger(int min, int max) { + this.min = min; + this.max = max; + } + + /** + * Parse an integer. The integer is of the format {@code [+-]}. + */ + public BoundedInteger() { + this(Integer.MIN_VALUE, Integer.MAX_VALUE); + } + + /** + * Parse an integer >= min. + * + * @param min minimun size of the integer + * @return the {@link BoundedInteger} + */ + public static BoundedInteger atLeast(int min) { + return new BoundedInteger(min, Integer.MAX_VALUE); + } + + /** + * Parse an integer <= max. + * + * @param max maximum size of the integer + * @return the {@link BoundedInteger} + */ + public static BoundedInteger atMost(int max) { + return new BoundedInteger(Integer.MIN_VALUE, max); + } + + /** + * Parse an integer with min <= integer <= max. + * + * @param min minimun size of the integer + * @param max maximum size of the integer + * @return the {@link BoundedInteger} + */ + + public static BoundedInteger between(int min, int max) { + return new BoundedInteger(min, max); + } + + @Override + public Function, String> getNamingScheme() { + String description = "integer"; + + if (min > Integer.MIN_VALUE && max < Integer.MAX_VALUE) { + description += " (between " + min + " and " + max + ")"; + } else if (min > Integer.MIN_VALUE) { + description += " (at least " + min + ")"; + } else if (max < Integer.MAX_VALUE) { + description += " (at most " + max + ")"; + } + + return Block.label(description); + } + + @Override + public Integer read(Block block) throws ParseException { + String integerString; + try { + integerString = new Expression(INTEGER_REGEX).parse(block); + } catch (ParseException e) { + // Mask the regex parse exception with our own (the error messages are better + // this way) + throw new ParseException(block); + } + + int integer; + try { + integer = Integer.parseInt(integerString); + } catch (NumberFormatException e) { + throw new ParseException(block); + } + + if (integer < min || integer > max) { + throw new ParseException(block); + } + + return integer; + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/Constant.java b/src/main/java/de/plugh/compositeparse/parsers/Constant.java new file mode 100644 index 0000000..821a050 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/Constant.java @@ -0,0 +1,38 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.List; +import java.util.function.Function; + +/** + * Consumes no input and returns a constant value. + * + * @param return type of the parser + */ +public class Constant implements Parser { + + private T value; + + /** + * Create a new {@link Constant} parser. + * + * @param value the value to return + */ + public Constant(T value) { + this.value = value; + } + + @Override + public Function, String> getNamingScheme() { + return Block.label("constant"); + } + + @Override + public T read(Block block) throws ParseException { + return value; + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/Decision.java b/src/main/java/de/plugh/compositeparse/parsers/Decision.java new file mode 100644 index 0000000..65973ad --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/Decision.java @@ -0,0 +1,66 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.Pair; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +/** + * Decide which parser to use from a list of "body" parsers and their "head"s. + *

+ * If a "head" parses successfully, the corresponding "body" parser must be successful, otherwise a parse exception is + * raised. If no "head" is successful, the {@link Decision} parser fails too. + * + * @param return type of the parser + */ +public class Decision implements Parser { + + private final List, Parser>> pairs; + + /** + * Create a new {@link Decision} parser from all passed arguments, which are "head"-"body" pairs. + * + * @param pairs multiple "head"-"body" pairs + */ + @SafeVarargs + public Decision(Pair, Parser>... pairs) { + this.pairs = new ArrayList<>(); + for (Pair, Parser> pair : pairs) { + this.pairs.add(pair); + } + } + + /** + * Create a new {@link Decision} parser from a list of "head"-"body" pairs. + * + * @param pairs a list of "head"-"body" pairs + */ + public Decision(List, Parser>> pairs) { + this.pairs = new ArrayList<>(pairs); + } + + @Override + public Function, String> getNamingScheme() { + return Block::alternative; + } + + @Override + public T read(Block block) throws ParseException { + for (Pair, Parser> pair : pairs) { + try { + pair.getFirst().parse(block); + } catch (ParseException e) { + continue; + } + + return pair.getSecond().parse(block); + } + + throw new ParseException(block); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/Default.java b/src/main/java/de/plugh/compositeparse/parsers/Default.java new file mode 100644 index 0000000..4b8d6a3 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/Default.java @@ -0,0 +1,57 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.List; +import java.util.function.Function; + +/** + * Try a parser and return its value, or a default value if the parser fails. + *

+ * This parser will never throw a {@link ParseException}. + * + * @param return type of the parser + */ +public class Default implements Parser { + + private final T value; + private final Parser parser; + + /** + * Create a new {@link Default} parser. + * + * @param value the value to return + * @param parser the parser to try + */ + public Default(T value, Parser parser) { + this.value = value; + this.parser = parser; + } + + /** + * Create a new {@link Default} parser. + * + * @param parser the parser to try + */ + public Default(Parser parser) { + this(null, parser); + } + + @Override + public Function, String> getNamingScheme() { + // There is always a block 0 because of Default's read() implementation. + return blocks -> blocks.get(0).getName(); + } + + @Override + public T read(Block block) { + try { + return parser.parse(block); + } catch (ParseException ignored) { + return value; + } + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/EndOfInput.java b/src/main/java/de/plugh/compositeparse/parsers/EndOfInput.java new file mode 100644 index 0000000..baeb2f5 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/EndOfInput.java @@ -0,0 +1,49 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.List; +import java.util.function.Function; + +/** + * Returns a value when the end of the input has been reached, fails otherwise. + * + * @param return type of the parser + */ +public class EndOfInput implements Parser { + + private final T value; + + /** + * Create a new {@link EndOfInput} parser. + * + * @param value the value to return + */ + public EndOfInput(T value) { + this.value = value; + } + + /** + * Create a new {@link EndOfInput} parser that always returns {@code null}. + */ + public EndOfInput() { + this(null); + } + + @Override + public Function, String> getNamingScheme() { + return Block.label("end of input"); + } + + @Override + public T read(Block block) throws ParseException { + if (block.getInput().complete()) { + return value; + } else { + throw new ParseException(block); + } + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/Expression.java b/src/main/java/de/plugh/compositeparse/parsers/Expression.java new file mode 100644 index 0000000..f39d0d0 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/Expression.java @@ -0,0 +1,45 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; +import de.plugh.compositeparse.StringInput; + +import java.util.List; +import java.util.function.Function; + +/** + * Parse a regular expression from the input. + */ +public class Expression implements Parser { + + private final String regex; + + /** + * Create a new {@link Expression} parser. + * + * @param regex the regular expression to use + * @see StringInput#match(String) + */ + public Expression(String regex) { + this.regex = regex; + } + + @Override + public Function, String> getNamingScheme() { + return Block.label("regex \"" + regex + "\""); + } + + @Override + public String read(Block block) throws ParseException { + StringInput input = block.getInput(); + String result = input.match(regex); + + if (result == null) { + throw new ParseException(block); + } + + return result; + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/Label.java b/src/main/java/de/plugh/compositeparse/parsers/Label.java new file mode 100644 index 0000000..7d29eec --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/Label.java @@ -0,0 +1,43 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.List; +import java.util.function.Function; + +/** + * Attaches a name to a parser. + *

+ * This can be useful for naming lambda parsers, or renaming existing parsers. + * + * @param return type of the parser + */ +public class Label implements Parser { + + private final String name; + private final Parser parser; + + /** + * Create a new {@link Label} parser. + * + * @param name the parser's new name + * @param parser the parser to rename + */ + public Label(String name, Parser parser) { + this.name = name; + this.parser = parser; + } + + @Override + public Function, String> getNamingScheme() { + return Block.label(name); + } + + @Override + public T read(Block block) throws ParseException { + return parser.parse(block); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/Literal.java b/src/main/java/de/plugh/compositeparse/parsers/Literal.java new file mode 100644 index 0000000..25d06ee --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/Literal.java @@ -0,0 +1,85 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; +import de.plugh.compositeparse.StringInput; + +import java.util.List; +import java.util.function.Function; + +/** + * Parses a string literal from the input (case sensitive). + *

+ * For more flexible input, see {@link Expression}. + * + * @param return type of the parser + */ +public class Literal implements Parser { + + /** + * A single space {@code " "} + */ + public static final Literal SPACE = new Literal<>(" "); + + /** + * A single comma {@code ","} + */ + public static final Literal COMMA = new Literal<>(","); + + /** + * A single semicolon {@code ";"} + */ + public static final Literal SEMICOLON = new Literal<>(";"); + + private final String literal; + private final T value; + + /** + * Consume a string literal from the input and return a value if successful. + * + * @param literal the literal to consume from the input + * @param value the value to return + */ + public Literal(String literal, T value) { + this.literal = literal; + this.value = value; + } + + /** + * Consume a string literal from the input + * + * @param literal the literal to consume from the input + */ + public Literal(String literal) { + this(literal, null); + } + + /** + * Create a {@link Literal} that returns (literally) the literal it consumes. + *

+ * Shorthand for new Literal(literal, literal) + * + * @param literal the literal to consume from the input + * @return the {@link Literal} + */ + public static Literal literally(String literal) { + return new Literal<>(literal, literal); + } + + @Override + public Function, String> getNamingScheme() { + return Block.label("\"" + literal + "\""); + } + + @Override + public T read(Block block) throws ParseException { + StringInput input = block.getInput(); + if (input.read(literal.length()).equals(literal)) { + return value; + } else { + throw new ParseException(block); + } + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/Options.java b/src/main/java/de/plugh/compositeparse/parsers/Options.java new file mode 100644 index 0000000..306eb21 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/Options.java @@ -0,0 +1,59 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +/** + * Try a few parsers in order (backtracking if a parser fails) and return the result of the first successful parser. + * + * @param return type of the parser + */ +public class Options implements Parser { + + private final List> parsers; + + /** + * Create a new {@link Options} from all passed parsers. + * + * @param parsers the parsers to try. + */ + @SafeVarargs + public Options(Parser... parsers) { + this.parsers = new ArrayList<>(); + for (Parser parser : parsers) { + this.parsers.add(parser); + } + } + + /** + * Create a new {@link Options} from a list of parsers. + * + * @param parsers the parsers to try. + */ + public Options(List> parsers) { + this.parsers = new ArrayList<>(parsers); + } + + @Override + public Function, String> getNamingScheme() { + return Block::alternative; + } + + @Override + public T read(Block block) throws ParseException { + for (Parser parser : parsers) { + try { + return parser.parse(block); + } catch (ParseException ignored) { + } + } + + throw new ParseException(block); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/parsers/Repeat.java b/src/main/java/de/plugh/compositeparse/parsers/Repeat.java new file mode 100644 index 0000000..0f242d5 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/Repeat.java @@ -0,0 +1,209 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.ArrayList; +import java.util.List; + +/** + * Repeats a parser a certain amount of times and compiles the results in a {@link List}. + *

+ * Use another parser in-between the main parser to parse separators (e. g. commas). + * + * @param return type of the parser + */ +public class Repeat implements Parser> { + + private final Parser separator; + private final Parser parser; + private final int from; + private final int to; + + /** + * Create a new {@link Repeat} parser. + * + * @param from minimum amount of repeats + * @param to maximum amount of repeats + * @param separator the parser that separates the main parser + * @param parser the parser to repeatedly use + */ + public Repeat(int from, int to, Parser separator, Parser parser) { + // Just in case somebody enters incorrect values, attempt to interpret them as + // best as possible. + this.from = Math.max(0, Math.min(from, to)); + this.to = Math.max(0, Math.max(from, to)); + + this.separator = separator; + this.parser = parser; + } + + /** + * Create a new {@link Repeat} parser (without separators). + * + * @param from minimum amount of repeats + * @param to maximum amount of repeats + * @param parser the parser to repeatedly use + */ + public Repeat(int from, int to, Parser parser) { + this(from, to, null, parser); + } + + /** + * Create a new {@link Repeat} parser that repeats zero or more times. + * + * @param separator the parser that separates the main parser + * @param parser the parser to repeatedly use + */ + public Repeat(Parser separator, Parser parser) { + this(0, Integer.MAX_VALUE, separator, parser); + } + + /** + * Create a new {@link Repeat} parser that repeats zero or more times (without separator). + * + * @param parser the parser to repeatedly use + */ + public Repeat(Parser parser) { + this(null, parser); + } + + /** + * Repeat a parser at least {@code amount} times. + * + * @param the {@link Repeat}'s type + * @param amount how often to repeat the parser + * @param separator the parser that separates the main parser + * @param parser the parser to repeatedly use + * @return the {@link Repeat} + */ + public static Repeat atLeast(int amount, Parser separator, Parser parser) { + return new Repeat<>(amount, Integer.MAX_VALUE, separator, parser); + } + + /** + * Repeat a parser at least {@code amount} times (without separator). + * + * @param the {@link Repeat}'s type + * @param amount how often to repeat the parser + * @param parser the parser to repeatedly use + * @return the {@link Repeat} + */ + public static Repeat atLeast(int amount, Parser parser) { + return new Repeat<>(amount, Integer.MAX_VALUE, parser); + } + + /** + * Repeat a parser at most {@code amount} times. + * + * @param the {@link Repeat}'s type + * @param amount how often to repeat the parser + * @param separator the parser that separates the main parser + * @param parser the parser to repeatedly use + * @return the {@link Repeat} + */ + public static Repeat atMost(int amount, Parser separator, Parser parser) { + return new Repeat<>(0, amount, separator, parser); + } + + /** + * Repeat a parser at most {@code amount} times (without separator). + * + * @param the {@link Repeat}'s type + * @param amount how often to repeat the parser + * @param parser the parser to repeatedly use + * @return the {@link Repeat} + */ + public static Repeat atMost(int amount, Parser parser) { + return new Repeat<>(0, amount, parser); + } + + /** + * Repeat a parser exactly {@code amount} times. + * + * @param the {@link Repeat}'s type + * @param amount how often to repeat the parser + * @param separator the parser that separates the main parser + * @param parser the parser to repeatedly use + * @return the {@link Repeat} + */ + public static Repeat exactly(int amount, Parser separator, Parser parser) { + return new Repeat<>(amount, amount, separator, parser); + } + + /** + * Repeat a parser exactly {@code amount} times (without separator). + * + * @param the {@link Repeat}'s type + * @param amount how often to repeat the parser + * @param parser the parser to repeatedly use + * @return the {@link Repeat} + */ + public static Repeat exactly(int amount, Parser parser) { + return new Repeat<>(amount, amount, parser); + } + + @Override + public List read(Block block) throws ParseException { + List results = new ArrayList<>(); + + if (from > 0) { + // The first element, not preceded by a "between" + results.add(parser.parse(block)); + + // All other elements, preceded by a "between" + results.addAll(parseRequired(block, from - 1)); + results.addAll(parseOptional(block, to - from)); + } else if (to > 0) { + // The first element, not preceded by a "between" + try { + results.add(parser.parse(block)); + } catch (ParseException e) { + return results; // empty list + } + + // All other elements, preceded by a "between" + results.addAll(parseOptional(block, to - 1)); // from == 0 + } + + return results; + } + + private void parseBetween(Block block) throws ParseException { + if (separator != null) { + separator.parse(block); + } + } + + private List parseRequired(Block block, int amount) throws ParseException { + List results = new ArrayList<>(); + + for (int i = 0; i < amount; i++) { + parseBetween(block); + results.add(parser.parse(block)); + } + + return results; + } + + private List parseOptional(Block block, int amount) { + List results = new ArrayList<>(); + + for (int i = 0; i < amount; i++) { + try { + T result = ((Parser) block2 -> { + parseBetween(block2); + return parser.parse(block2); + }).parse(block); + + results.add(result); + } catch (ParseException e) { + break; + } + } + + return results; + } + +}