Add incomplete xml parser

Doesn't adhere to any standards. Instead, it parses what I thought was
valid xml in a haphazard way. Might still be an interesting thing to
look at, to see how one would use the parser library.
This commit is contained in:
Joscha 2019-06-19 22:57:32 +00:00
parent b864824308
commit d1d31abb04
10 changed files with 350 additions and 0 deletions

View file

@ -0,0 +1,53 @@
package de.plugh.compositeparse.parsers;
import de.plugh.compositeparse.Block;
import de.plugh.compositeparse.ParseException;
import de.plugh.compositeparse.Parser;
import java.util.List;
import java.util.function.Function;
public class QuotedString implements Parser<String> {
private final String quoteChar;
public QuotedString(String quoteChar) {
this.quoteChar = quoteChar;
}
public QuotedString() {
this("\"");
}
@Override
public Function<List<Block>, String> getNamingScheme() {
return Block.label("quoted string");
}
@Override
public String read(Block block) throws ParseException {
Literal.literally(quoteChar).parse(block);
StringBuilder result = new StringBuilder();
boolean escaped = false;
while (true) {
String s = block.getInput().read(1);
if (s.isEmpty()) {
throw new ParseException(block);
} else if (escaped) {
result.append(s);
escaped = false;
} else if (s.equals(quoteChar)) {
break;
} else if (s.equals("\\")) {
escaped = true;
} else {
result.append(s);
}
}
return result.toString();
}
}

View file

@ -0,0 +1,49 @@
package de.plugh.compositeparse.xml;
import de.plugh.compositeparse.Block;
import de.plugh.compositeparse.ParseException;
import de.plugh.compositeparse.Parser;
import de.plugh.compositeparse.parsers.Expression;
import de.plugh.compositeparse.parsers.Literal;
import de.plugh.compositeparse.parsers.QuotedString;
import de.plugh.compositeparse.parsers.Repeat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class AttributesParser implements Parser<Map<String, String>> {
private static final String REGEX_NAME = "[:a-zA-Z_][:a-zA-Z0-9_.-]*";
@Override
public Map<String, String> read(Block block) throws ParseException {
List<Attribute> attributes = new Repeat<>(block1 -> {
Repeat.atLeast(1, Literal.literally(" ")).parse(block1);
String name = new Expression(REGEX_NAME).parse(block1);
Literal.literally("=").parse(block1);
String value = new QuotedString().parse(block1);
return new Attribute(name, value);
}).parse(block);
Map<String, String> attributeMap = new HashMap<>();
attributes.forEach(attribute -> attributeMap.put(attribute.name, attribute.value));
return attributeMap;
}
private class Attribute {
final String name;
final String value;
Attribute(String name, String value) {
this.name = name;
this.value = value;
}
}
}

View file

@ -0,0 +1,27 @@
package de.plugh.compositeparse.xml;
public class CommentNode extends Node {
private final String content;
public CommentNode(String content) {
this.content = content;
}
@Override
public String prettyPrint(String indent, boolean newline) {
if (newline) {
return indent + "<!--" + content + "-->" + "\n";
} else {
return indent + "<!--" + content + "-->";
}
}
@Override
public String toString() {
return "CommentNode{" +
"content='" + content + '\'' +
'}';
}
}

View file

@ -0,0 +1,20 @@
package de.plugh.compositeparse.xml;
import de.plugh.compositeparse.Block;
import de.plugh.compositeparse.ParseException;
import de.plugh.compositeparse.Parser;
import de.plugh.compositeparse.parsers.Expression;
import de.plugh.compositeparse.parsers.Literal;
public class CommentNodeParser implements Parser<CommentNode> {
@Override
public CommentNode read(Block block) throws ParseException {
Literal.literally("<!--").parse(block);
String content = new Expression("^.*(?=--)").parse(block);
Literal.literally("-->").parse(block);
return new CommentNode(content);
}
}

View file

@ -0,0 +1,76 @@
package de.plugh.compositeparse.xml;
import java.util.List;
import java.util.Map;
public class ElementNode extends Node {
private final String name;
private final Map<String, String> attributes;
private final List<Node> subnodes;
public ElementNode(String name, Map<String, String> attributes,
List<Node> subnodes) {
this.name = name;
this.attributes = attributes;
this.subnodes = subnodes;
}
@Override
public String prettyPrint(String indent, boolean newline) {
StringBuilder result = new StringBuilder()
.append(indent)
.append("<")
.append(name);
attributes.forEach((s, s2) -> {
result
.append(" ")
.append(s)
.append("=\"")
.append(s2) // TODO escape
.append("\"");
});
if (subnodes.isEmpty()) {
result.append("/>");
} else {
result.append(">");
if (subnodes.size() == 1 && subnodes.get(0) instanceof TextNode) {
result.append(subnodes.get(0).prettyPrint("", false));
result
.append("</")
.append(name)
.append(">");
} else {
result.append("\n");
subnodes.forEach(node -> result.append(node.prettyPrint(indent + " ", true)));
result
.append(indent)
.append("</")
.append(name)
.append(">");
}
}
if (newline) {
result.append("\n");
}
return result.toString();
}
@Override
public String toString() {
return "ElementNode{" +
"name='" + name + '\'' +
", attributes=" + attributes +
", subnodes=" + subnodes +
'}';
}
}

View file

@ -0,0 +1,51 @@
package de.plugh.compositeparse.xml;
import de.plugh.compositeparse.Block;
import de.plugh.compositeparse.Pair;
import de.plugh.compositeparse.ParseException;
import de.plugh.compositeparse.Parser;
import de.plugh.compositeparse.parsers.Decision;
import de.plugh.compositeparse.parsers.Expression;
import de.plugh.compositeparse.parsers.Literal;
import de.plugh.compositeparse.parsers.Repeat;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class ElementNodeParser implements Parser<ElementNode> {
private static final String REGEX_NAME = "[:a-zA-Z_][:a-zA-Z0-9_.-]*";
@Override
public ElementNode read(Block block) throws ParseException {
Literal.literally("<").parse(block);
String name = new Expression(REGEX_NAME).parse(block);
Map<String, String> attributes = new AttributesParser().parse(block);
List<Node> subnodes = new Decision<List<Node>>(
// First, try to find a close tag
new Pair<>(
block1 -> {
new Repeat<>(Literal.literally(" ")).parse(block1);
Literal.literally("/>").parse(block1);
return null;
},
block1 -> new ArrayList<>()
),
// If that fails, actually parse the contents and the rest
new Pair<>(
Literal.literally(">"),
block1 -> {
List<Node> foundSubnodes = new Repeat<>(new NodeParser()).parse(block);
Literal.literally("</" + name + ">").parse(block1);
return foundSubnodes;
}
)
).parse(block);
return new ElementNode(name, attributes, subnodes);
}
}

View file

@ -0,0 +1,11 @@
package de.plugh.compositeparse.xml;
public abstract class Node {
public abstract String prettyPrint(String indent, boolean newline);
public String prettyPrint() {
return prettyPrint("", false);
}
}

View file

@ -0,0 +1,19 @@
package de.plugh.compositeparse.xml;
import de.plugh.compositeparse.Block;
import de.plugh.compositeparse.ParseException;
import de.plugh.compositeparse.Parser;
import de.plugh.compositeparse.parsers.Options;
public class NodeParser implements Parser<Node> {
@Override
public Node read(Block block) throws ParseException {
return new Options<>(
block1 -> new CommentNodeParser().parse(block1),
block1 -> new ElementNodeParser().parse(block1),
block1 -> new TextNodeParser().parse(block1)
).parse(block);
}
}

View file

@ -0,0 +1,27 @@
package de.plugh.compositeparse.xml;
public class TextNode extends Node {
private final String text;
public TextNode(String text) {
this.text = text;
}
@Override
public String prettyPrint(String indent, boolean newline) {
if (newline) {
return indent + text.trim() + "\n";
} else {
return indent + text.trim();
}
}
@Override
public String toString() {
return "TextNode{" +
"text='" + text + '\'' +
'}';
}
}

View file

@ -0,0 +1,17 @@
package de.plugh.compositeparse.xml;
import de.plugh.compositeparse.Block;
import de.plugh.compositeparse.ParseException;
import de.plugh.compositeparse.Parser;
import de.plugh.compositeparse.parsers.Expression;
public class TextNodeParser implements Parser<TextNode> {
@Override
public TextNode read(Block block) throws ParseException {
String text = new Expression("^[^<]+").parse(block);
return new TextNode(text);
}
}