Model and render elements
This commit is contained in:
parent
c8d9cf16f5
commit
cc3f85e6e1
5 changed files with 694 additions and 0 deletions
78
src/check.rs
Normal file
78
src/check.rs
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
/// <https://infra.spec.whatwg.org/#ascii-alpha>
|
||||
pub fn is_ascii_alpha(c: char) -> bool {
|
||||
c.is_ascii_alphabetic()
|
||||
}
|
||||
|
||||
/// <https://infra.spec.whatwg.org/#ascii-alphanumeric>
|
||||
pub fn is_ascii_alphanumeric(c: char) -> bool {
|
||||
c.is_ascii_alphanumeric()
|
||||
}
|
||||
|
||||
/// <https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name>
|
||||
///
|
||||
/// The rules around what is a valid tag name are complicated. The standard
|
||||
/// doesn't give an easy answer. Because of this, we're conservative in what we
|
||||
/// allow. This way, the output we produce should parse correctly in a wide
|
||||
/// range of circumstances while following the standard.
|
||||
pub fn is_valid_tag_name(name: &str) -> bool {
|
||||
!name.is_empty()
|
||||
&& name.chars().take(1).all(is_ascii_alpha)
|
||||
&& name.chars().all(is_ascii_alphanumeric)
|
||||
}
|
||||
|
||||
/// <https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name>
|
||||
///
|
||||
/// The rules around what is a valid attribute name are complicated. The
|
||||
/// standard doesn't give an easy answer. Because of this, we're conservative in
|
||||
/// what we allow. This way, the output we produce should parse correctly in a
|
||||
/// wide range of circumstances while following the standard.
|
||||
pub fn is_valid_attribute_name(name: &str) -> bool {
|
||||
!name.is_empty()
|
||||
&& name.chars().take(1).all(is_ascii_alpha)
|
||||
&& name
|
||||
.chars()
|
||||
.all(|c| is_ascii_alphanumeric(c) || c == '-' || c == '_')
|
||||
}
|
||||
|
||||
/// https://html.spec.whatwg.org/multipage/syntax.html#cdata-rcdata-restrictions
|
||||
///
|
||||
/// The tag name must be ascii-only.
|
||||
pub fn is_valid_raw_text(tag_name: &str, text: &str) -> bool {
|
||||
// In case we ever decide to relax tag name ascii requirements.
|
||||
assert!(tag_name.is_ascii());
|
||||
|
||||
// "The text in raw text and escapable raw text elements must not contain
|
||||
// any occurrences of the string "</" (U+003C LESS-THAN SIGN, U+002F
|
||||
// SOLIDUS) [...]"
|
||||
for (i, _) in text.match_indices("</") {
|
||||
let start = i + "</".len();
|
||||
|
||||
let potential_tag_name = text[start..]
|
||||
.chars()
|
||||
.take(tag_name.chars().count())
|
||||
.collect::<String>();
|
||||
|
||||
// "[...] followed by characters that case-insensitively match the tag
|
||||
// name of the element [...]"
|
||||
//
|
||||
// Note: Since we know that tag names are ascii-only, we can convert
|
||||
// both to lowercase for a case-insensitive comparison without weird
|
||||
// unicode shenanigans.
|
||||
if potential_tag_name.to_ascii_lowercase() != tag_name.to_ascii_lowercase() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// "[...] followed by [...]"
|
||||
let Some(trailing) = text[start + potential_tag_name.len()..].chars().next() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// "[...] one of U+0009 CHARACTER TABULATION (tab), U+000A LINE FEED
|
||||
// (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), U+0020
|
||||
// SPACE, U+003E GREATER-THAN SIGN (>), or U+002F SOLIDUS (/)."
|
||||
if matches!(trailing, '\t' | '\n' | '\x0C' | '\r' | ' ' | '>' | '/') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue