From d1d31abb046dec4359ceb894dd1d25971be72d8a Mon Sep 17 00:00:00 2001 From: Joscha Date: Wed, 19 Jun 2019 22:57:32 +0000 Subject: [PATCH] Add incomplete xml parser Doesn't adhere to any standards. Instead, it parses what I thought was valid xml in a haphazard way. Might still be an interesting thing to look at, to see how one would use the parser library. --- .../compositeparse/parsers/QuotedString.java | 53 +++++++++++++ .../compositeparse/xml/AttributesParser.java | 49 ++++++++++++ .../plugh/compositeparse/xml/CommentNode.java | 27 +++++++ .../compositeparse/xml/CommentNodeParser.java | 20 +++++ .../plugh/compositeparse/xml/ElementNode.java | 76 +++++++++++++++++++ .../compositeparse/xml/ElementNodeParser.java | 51 +++++++++++++ .../de/plugh/compositeparse/xml/Node.java | 11 +++ .../plugh/compositeparse/xml/NodeParser.java | 19 +++++ .../de/plugh/compositeparse/xml/TextNode.java | 27 +++++++ .../compositeparse/xml/TextNodeParser.java | 17 +++++ 10 files changed, 350 insertions(+) create mode 100644 src/main/java/de/plugh/compositeparse/parsers/QuotedString.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/AttributesParser.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/CommentNode.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/CommentNodeParser.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/ElementNode.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/ElementNodeParser.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/Node.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/NodeParser.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/TextNode.java create mode 100644 src/main/java/de/plugh/compositeparse/xml/TextNodeParser.java diff --git a/src/main/java/de/plugh/compositeparse/parsers/QuotedString.java b/src/main/java/de/plugh/compositeparse/parsers/QuotedString.java new file mode 100644 index 0000000..012a9e0 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/parsers/QuotedString.java @@ -0,0 +1,53 @@ +package de.plugh.compositeparse.parsers; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; + +import java.util.List; +import java.util.function.Function; + +public class QuotedString implements Parser { + + private final String quoteChar; + + public QuotedString(String quoteChar) { + this.quoteChar = quoteChar; + } + + public QuotedString() { + this("\""); + } + + @Override + public Function, String> getNamingScheme() { + return Block.label("quoted string"); + } + + @Override + public String read(Block block) throws ParseException { + Literal.literally(quoteChar).parse(block); + + StringBuilder result = new StringBuilder(); + boolean escaped = false; + while (true) { + String s = block.getInput().read(1); + + if (s.isEmpty()) { + throw new ParseException(block); + } else if (escaped) { + result.append(s); + escaped = false; + } else if (s.equals(quoteChar)) { + break; + } else if (s.equals("\\")) { + escaped = true; + } else { + result.append(s); + } + } + + return result.toString(); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/AttributesParser.java b/src/main/java/de/plugh/compositeparse/xml/AttributesParser.java new file mode 100644 index 0000000..2ab1129 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/AttributesParser.java @@ -0,0 +1,49 @@ +package de.plugh.compositeparse.xml; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; +import de.plugh.compositeparse.parsers.Expression; +import de.plugh.compositeparse.parsers.Literal; +import de.plugh.compositeparse.parsers.QuotedString; +import de.plugh.compositeparse.parsers.Repeat; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class AttributesParser implements Parser> { + + private static final String REGEX_NAME = "[:a-zA-Z_][:a-zA-Z0-9_.-]*"; + + @Override + public Map read(Block block) throws ParseException { + List attributes = new Repeat<>(block1 -> { + Repeat.atLeast(1, Literal.literally(" ")).parse(block1); + + String name = new Expression(REGEX_NAME).parse(block1); + Literal.literally("=").parse(block1); + String value = new QuotedString().parse(block1); + + return new Attribute(name, value); + }).parse(block); + + Map attributeMap = new HashMap<>(); + attributes.forEach(attribute -> attributeMap.put(attribute.name, attribute.value)); + + return attributeMap; + } + + private class Attribute { + + final String name; + final String value; + + Attribute(String name, String value) { + this.name = name; + this.value = value; + } + + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/CommentNode.java b/src/main/java/de/plugh/compositeparse/xml/CommentNode.java new file mode 100644 index 0000000..5ebe6b2 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/CommentNode.java @@ -0,0 +1,27 @@ +package de.plugh.compositeparse.xml; + +public class CommentNode extends Node { + + private final String content; + + public CommentNode(String content) { + this.content = content; + } + + @Override + public String prettyPrint(String indent, boolean newline) { + if (newline) { + return indent + "" + "\n"; + } else { + return indent + ""; + } + } + + @Override + public String toString() { + return "CommentNode{" + + "content='" + content + '\'' + + '}'; + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/CommentNodeParser.java b/src/main/java/de/plugh/compositeparse/xml/CommentNodeParser.java new file mode 100644 index 0000000..fbd1b12 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/CommentNodeParser.java @@ -0,0 +1,20 @@ +package de.plugh.compositeparse.xml; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; +import de.plugh.compositeparse.parsers.Expression; +import de.plugh.compositeparse.parsers.Literal; + +public class CommentNodeParser implements Parser { + + @Override + public CommentNode read(Block block) throws ParseException { + Literal.literally("").parse(block); + + return new CommentNode(content); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/ElementNode.java b/src/main/java/de/plugh/compositeparse/xml/ElementNode.java new file mode 100644 index 0000000..5ddf637 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/ElementNode.java @@ -0,0 +1,76 @@ +package de.plugh.compositeparse.xml; + +import java.util.List; +import java.util.Map; + +public class ElementNode extends Node { + + private final String name; + private final Map attributes; + private final List subnodes; + + public ElementNode(String name, Map attributes, + List subnodes) { + this.name = name; + this.attributes = attributes; + this.subnodes = subnodes; + } + + @Override + public String prettyPrint(String indent, boolean newline) { + StringBuilder result = new StringBuilder() + .append(indent) + .append("<") + .append(name); + + attributes.forEach((s, s2) -> { + result + .append(" ") + .append(s) + .append("=\"") + .append(s2) // TODO escape + .append("\""); + }); + + if (subnodes.isEmpty()) { + result.append("/>"); + } else { + result.append(">"); + + if (subnodes.size() == 1 && subnodes.get(0) instanceof TextNode) { + result.append(subnodes.get(0).prettyPrint("", false)); + + result + .append(""); + } else { + result.append("\n"); + + subnodes.forEach(node -> result.append(node.prettyPrint(indent + " ", true))); + + result + .append(indent) + .append(""); + } + } + + if (newline) { + result.append("\n"); + } + + return result.toString(); + } + + @Override + public String toString() { + return "ElementNode{" + + "name='" + name + '\'' + + ", attributes=" + attributes + + ", subnodes=" + subnodes + + '}'; + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/ElementNodeParser.java b/src/main/java/de/plugh/compositeparse/xml/ElementNodeParser.java new file mode 100644 index 0000000..fd5ff3f --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/ElementNodeParser.java @@ -0,0 +1,51 @@ +package de.plugh.compositeparse.xml; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.Pair; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; +import de.plugh.compositeparse.parsers.Decision; +import de.plugh.compositeparse.parsers.Expression; +import de.plugh.compositeparse.parsers.Literal; +import de.plugh.compositeparse.parsers.Repeat; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class ElementNodeParser implements Parser { + + private static final String REGEX_NAME = "[:a-zA-Z_][:a-zA-Z0-9_.-]*"; + + @Override + public ElementNode read(Block block) throws ParseException { + Literal.literally("<").parse(block); + String name = new Expression(REGEX_NAME).parse(block); + + Map attributes = new AttributesParser().parse(block); + + List subnodes = new Decision>( + // First, try to find a close tag + new Pair<>( + block1 -> { + new Repeat<>(Literal.literally(" ")).parse(block1); + Literal.literally("/>").parse(block1); + return null; + }, + block1 -> new ArrayList<>() + ), + // If that fails, actually parse the contents and the rest + new Pair<>( + Literal.literally(">"), + block1 -> { + List foundSubnodes = new Repeat<>(new NodeParser()).parse(block); + Literal.literally("").parse(block1); + return foundSubnodes; + } + ) + ).parse(block); + + return new ElementNode(name, attributes, subnodes); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/Node.java b/src/main/java/de/plugh/compositeparse/xml/Node.java new file mode 100644 index 0000000..a03dc89 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/Node.java @@ -0,0 +1,11 @@ +package de.plugh.compositeparse.xml; + +public abstract class Node { + + public abstract String prettyPrint(String indent, boolean newline); + + public String prettyPrint() { + return prettyPrint("", false); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/NodeParser.java b/src/main/java/de/plugh/compositeparse/xml/NodeParser.java new file mode 100644 index 0000000..1be4849 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/NodeParser.java @@ -0,0 +1,19 @@ +package de.plugh.compositeparse.xml; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; +import de.plugh.compositeparse.parsers.Options; + +public class NodeParser implements Parser { + + @Override + public Node read(Block block) throws ParseException { + return new Options<>( + block1 -> new CommentNodeParser().parse(block1), + block1 -> new ElementNodeParser().parse(block1), + block1 -> new TextNodeParser().parse(block1) + ).parse(block); + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/TextNode.java b/src/main/java/de/plugh/compositeparse/xml/TextNode.java new file mode 100644 index 0000000..01d2604 --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/TextNode.java @@ -0,0 +1,27 @@ +package de.plugh.compositeparse.xml; + +public class TextNode extends Node { + + private final String text; + + public TextNode(String text) { + this.text = text; + } + + @Override + public String prettyPrint(String indent, boolean newline) { + if (newline) { + return indent + text.trim() + "\n"; + } else { + return indent + text.trim(); + } + } + + @Override + public String toString() { + return "TextNode{" + + "text='" + text + '\'' + + '}'; + } + +} diff --git a/src/main/java/de/plugh/compositeparse/xml/TextNodeParser.java b/src/main/java/de/plugh/compositeparse/xml/TextNodeParser.java new file mode 100644 index 0000000..498133b --- /dev/null +++ b/src/main/java/de/plugh/compositeparse/xml/TextNodeParser.java @@ -0,0 +1,17 @@ +package de.plugh.compositeparse.xml; + +import de.plugh.compositeparse.Block; +import de.plugh.compositeparse.ParseException; +import de.plugh.compositeparse.Parser; +import de.plugh.compositeparse.parsers.Expression; + +public class TextNodeParser implements Parser { + + @Override + public TextNode read(Block block) throws ParseException { + String text = new Expression("^[^<]+").parse(block); + + return new TextNode(text); + } + +}