Add incomplete xml parser
Doesn't adhere to any standards. Instead, it parses what I thought was valid xml in a haphazard way. Might still be an interesting thing to look at, to see how one would use the parser library.
This commit is contained in:
parent
b864824308
commit
d1d31abb04
10 changed files with 350 additions and 0 deletions
|
|
@ -0,0 +1,53 @@
|
|||
package de.plugh.compositeparse.parsers;
|
||||
|
||||
import de.plugh.compositeparse.Block;
|
||||
import de.plugh.compositeparse.ParseException;
|
||||
import de.plugh.compositeparse.Parser;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class QuotedString implements Parser<String> {
|
||||
|
||||
private final String quoteChar;
|
||||
|
||||
public QuotedString(String quoteChar) {
|
||||
this.quoteChar = quoteChar;
|
||||
}
|
||||
|
||||
public QuotedString() {
|
||||
this("\"");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Function<List<Block>, String> getNamingScheme() {
|
||||
return Block.label("quoted string");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String read(Block block) throws ParseException {
|
||||
Literal.literally(quoteChar).parse(block);
|
||||
|
||||
StringBuilder result = new StringBuilder();
|
||||
boolean escaped = false;
|
||||
while (true) {
|
||||
String s = block.getInput().read(1);
|
||||
|
||||
if (s.isEmpty()) {
|
||||
throw new ParseException(block);
|
||||
} else if (escaped) {
|
||||
result.append(s);
|
||||
escaped = false;
|
||||
} else if (s.equals(quoteChar)) {
|
||||
break;
|
||||
} else if (s.equals("\\")) {
|
||||
escaped = true;
|
||||
} else {
|
||||
result.append(s);
|
||||
}
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
import de.plugh.compositeparse.Block;
|
||||
import de.plugh.compositeparse.ParseException;
|
||||
import de.plugh.compositeparse.Parser;
|
||||
import de.plugh.compositeparse.parsers.Expression;
|
||||
import de.plugh.compositeparse.parsers.Literal;
|
||||
import de.plugh.compositeparse.parsers.QuotedString;
|
||||
import de.plugh.compositeparse.parsers.Repeat;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class AttributesParser implements Parser<Map<String, String>> {
|
||||
|
||||
private static final String REGEX_NAME = "[:a-zA-Z_][:a-zA-Z0-9_.-]*";
|
||||
|
||||
@Override
|
||||
public Map<String, String> read(Block block) throws ParseException {
|
||||
List<Attribute> attributes = new Repeat<>(block1 -> {
|
||||
Repeat.atLeast(1, Literal.literally(" ")).parse(block1);
|
||||
|
||||
String name = new Expression(REGEX_NAME).parse(block1);
|
||||
Literal.literally("=").parse(block1);
|
||||
String value = new QuotedString().parse(block1);
|
||||
|
||||
return new Attribute(name, value);
|
||||
}).parse(block);
|
||||
|
||||
Map<String, String> attributeMap = new HashMap<>();
|
||||
attributes.forEach(attribute -> attributeMap.put(attribute.name, attribute.value));
|
||||
|
||||
return attributeMap;
|
||||
}
|
||||
|
||||
private class Attribute {
|
||||
|
||||
final String name;
|
||||
final String value;
|
||||
|
||||
Attribute(String name, String value) {
|
||||
this.name = name;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
27
src/main/java/de/plugh/compositeparse/xml/CommentNode.java
Normal file
27
src/main/java/de/plugh/compositeparse/xml/CommentNode.java
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
public class CommentNode extends Node {
|
||||
|
||||
private final String content;
|
||||
|
||||
public CommentNode(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String prettyPrint(String indent, boolean newline) {
|
||||
if (newline) {
|
||||
return indent + "<!--" + content + "-->" + "\n";
|
||||
} else {
|
||||
return indent + "<!--" + content + "-->";
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CommentNode{" +
|
||||
"content='" + content + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
import de.plugh.compositeparse.Block;
|
||||
import de.plugh.compositeparse.ParseException;
|
||||
import de.plugh.compositeparse.Parser;
|
||||
import de.plugh.compositeparse.parsers.Expression;
|
||||
import de.plugh.compositeparse.parsers.Literal;
|
||||
|
||||
public class CommentNodeParser implements Parser<CommentNode> {
|
||||
|
||||
@Override
|
||||
public CommentNode read(Block block) throws ParseException {
|
||||
Literal.literally("<!--").parse(block);
|
||||
String content = new Expression("^.*(?=--)").parse(block);
|
||||
Literal.literally("-->").parse(block);
|
||||
|
||||
return new CommentNode(content);
|
||||
}
|
||||
|
||||
}
|
||||
76
src/main/java/de/plugh/compositeparse/xml/ElementNode.java
Normal file
76
src/main/java/de/plugh/compositeparse/xml/ElementNode.java
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ElementNode extends Node {
|
||||
|
||||
private final String name;
|
||||
private final Map<String, String> attributes;
|
||||
private final List<Node> subnodes;
|
||||
|
||||
public ElementNode(String name, Map<String, String> attributes,
|
||||
List<Node> subnodes) {
|
||||
this.name = name;
|
||||
this.attributes = attributes;
|
||||
this.subnodes = subnodes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String prettyPrint(String indent, boolean newline) {
|
||||
StringBuilder result = new StringBuilder()
|
||||
.append(indent)
|
||||
.append("<")
|
||||
.append(name);
|
||||
|
||||
attributes.forEach((s, s2) -> {
|
||||
result
|
||||
.append(" ")
|
||||
.append(s)
|
||||
.append("=\"")
|
||||
.append(s2) // TODO escape
|
||||
.append("\"");
|
||||
});
|
||||
|
||||
if (subnodes.isEmpty()) {
|
||||
result.append("/>");
|
||||
} else {
|
||||
result.append(">");
|
||||
|
||||
if (subnodes.size() == 1 && subnodes.get(0) instanceof TextNode) {
|
||||
result.append(subnodes.get(0).prettyPrint("", false));
|
||||
|
||||
result
|
||||
.append("</")
|
||||
.append(name)
|
||||
.append(">");
|
||||
} else {
|
||||
result.append("\n");
|
||||
|
||||
subnodes.forEach(node -> result.append(node.prettyPrint(indent + " ", true)));
|
||||
|
||||
result
|
||||
.append(indent)
|
||||
.append("</")
|
||||
.append(name)
|
||||
.append(">");
|
||||
}
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
result.append("\n");
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ElementNode{" +
|
||||
"name='" + name + '\'' +
|
||||
", attributes=" + attributes +
|
||||
", subnodes=" + subnodes +
|
||||
'}';
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
import de.plugh.compositeparse.Block;
|
||||
import de.plugh.compositeparse.Pair;
|
||||
import de.plugh.compositeparse.ParseException;
|
||||
import de.plugh.compositeparse.Parser;
|
||||
import de.plugh.compositeparse.parsers.Decision;
|
||||
import de.plugh.compositeparse.parsers.Expression;
|
||||
import de.plugh.compositeparse.parsers.Literal;
|
||||
import de.plugh.compositeparse.parsers.Repeat;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ElementNodeParser implements Parser<ElementNode> {
|
||||
|
||||
private static final String REGEX_NAME = "[:a-zA-Z_][:a-zA-Z0-9_.-]*";
|
||||
|
||||
@Override
|
||||
public ElementNode read(Block block) throws ParseException {
|
||||
Literal.literally("<").parse(block);
|
||||
String name = new Expression(REGEX_NAME).parse(block);
|
||||
|
||||
Map<String, String> attributes = new AttributesParser().parse(block);
|
||||
|
||||
List<Node> subnodes = new Decision<List<Node>>(
|
||||
// First, try to find a close tag
|
||||
new Pair<>(
|
||||
block1 -> {
|
||||
new Repeat<>(Literal.literally(" ")).parse(block1);
|
||||
Literal.literally("/>").parse(block1);
|
||||
return null;
|
||||
},
|
||||
block1 -> new ArrayList<>()
|
||||
),
|
||||
// If that fails, actually parse the contents and the rest
|
||||
new Pair<>(
|
||||
Literal.literally(">"),
|
||||
block1 -> {
|
||||
List<Node> foundSubnodes = new Repeat<>(new NodeParser()).parse(block);
|
||||
Literal.literally("</" + name + ">").parse(block1);
|
||||
return foundSubnodes;
|
||||
}
|
||||
)
|
||||
).parse(block);
|
||||
|
||||
return new ElementNode(name, attributes, subnodes);
|
||||
}
|
||||
|
||||
}
|
||||
11
src/main/java/de/plugh/compositeparse/xml/Node.java
Normal file
11
src/main/java/de/plugh/compositeparse/xml/Node.java
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
public abstract class Node {
|
||||
|
||||
public abstract String prettyPrint(String indent, boolean newline);
|
||||
|
||||
public String prettyPrint() {
|
||||
return prettyPrint("", false);
|
||||
}
|
||||
|
||||
}
|
||||
19
src/main/java/de/plugh/compositeparse/xml/NodeParser.java
Normal file
19
src/main/java/de/plugh/compositeparse/xml/NodeParser.java
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
import de.plugh.compositeparse.Block;
|
||||
import de.plugh.compositeparse.ParseException;
|
||||
import de.plugh.compositeparse.Parser;
|
||||
import de.plugh.compositeparse.parsers.Options;
|
||||
|
||||
public class NodeParser implements Parser<Node> {
|
||||
|
||||
@Override
|
||||
public Node read(Block block) throws ParseException {
|
||||
return new Options<>(
|
||||
block1 -> new CommentNodeParser().parse(block1),
|
||||
block1 -> new ElementNodeParser().parse(block1),
|
||||
block1 -> new TextNodeParser().parse(block1)
|
||||
).parse(block);
|
||||
}
|
||||
|
||||
}
|
||||
27
src/main/java/de/plugh/compositeparse/xml/TextNode.java
Normal file
27
src/main/java/de/plugh/compositeparse/xml/TextNode.java
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
public class TextNode extends Node {
|
||||
|
||||
private final String text;
|
||||
|
||||
public TextNode(String text) {
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String prettyPrint(String indent, boolean newline) {
|
||||
if (newline) {
|
||||
return indent + text.trim() + "\n";
|
||||
} else {
|
||||
return indent + text.trim();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TextNode{" +
|
||||
"text='" + text + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
package de.plugh.compositeparse.xml;
|
||||
|
||||
import de.plugh.compositeparse.Block;
|
||||
import de.plugh.compositeparse.ParseException;
|
||||
import de.plugh.compositeparse.Parser;
|
||||
import de.plugh.compositeparse.parsers.Expression;
|
||||
|
||||
public class TextNodeParser implements Parser<TextNode> {
|
||||
|
||||
@Override
|
||||
public TextNode read(Block block) throws ParseException {
|
||||
String text = new Expression("^[^<]+").parse(block);
|
||||
|
||||
return new TextNode(text);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue