diff --git a/CHANGELOG.md b/CHANGELOG.md index 99e9cc6..5334f21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ Procedure when bumping the version number: - `euphoxide::Emoji` for finding, replacing and removing colon-delimited emoji in text - `euphoxide::api::Time::new` - `euphoxide::nick::hue` +- `euphoxide::nick::mention` +- `euphoxide::nick::normalize` - Debug logging using the `log` crate - `testbot_instance` example using the new `euphoxide::bot::instance::Instance` - VSCode project settings diff --git a/Cargo.toml b/Cargo.toml index 31389f8..7e02c73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ bot = ["dep:async-trait", "dep:clap", "dep:cookie"] [dependencies] async-trait = { version = "0.1.63", optional = true } +caseless = "0.2.1" cookie = { version = "0.16.2", optional = true } futures-util = { version = "0.3.25", default-features = false, features = ["sink"] } log = "0.4.17" @@ -17,6 +18,7 @@ time = { version = "0.3.17", features = ["serde"] } tokio = { version = "1.23.0", features = ["time", "sync", "macros", "rt"] } tokio-stream = "0.1.11" tokio-tungstenite = { version = "0.18.0", features = ["rustls-tls-native-roots"] } +unicode-normalization = "0.1.22" [dependencies.clap] version = "4.1.3" diff --git a/src/nick.rs b/src/nick.rs index d4a176f..a6cadc2 100644 --- a/src/nick.rs +++ b/src/nick.rs @@ -1,5 +1,8 @@ //! Nick-related utility functions. +use caseless::Caseless; +use unicode_normalization::UnicodeNormalization; + use crate::emoji::Emoji; /// Does not remove emoji. @@ -45,7 +48,60 @@ pub fn hue_without_removing_emoji(nick: &str) -> u8 { /// This is a reimplementation of [euphoria's nick hue hashing algorithm][0]. It /// should always return the same value as the official client's implementation. /// -/// [0]: https://github.com/euphoria-io/heim/blob/master/client/lib/hueHash.js +/// [0]: https://github.com/euphoria-io/heim/blob/978c921063e6b06012fc8d16d9fbf1b3a0be1191/client/lib/hueHash.js pub fn hue(emoji: &Emoji, nick: &str) -> u8 { hue_without_removing_emoji(&emoji.remove(nick)) } + +fn delimits_mention(c: char) -> bool { + match c { + ',' | '.' | '!' | '?' | ';' | '&' | '<' | '>' | '\'' | '"' => true, + _ => c.is_whitespace(), + } +} + +/// Normalize a nick to a form that can be compared against other nicks. +/// +/// This normalization is less aggressive than the nick hue normalization. It is +/// also less aggressive than the normalization used by the euphoria client to +/// determine who is pinged by a mention. This means that it will not compute +/// the same normal form for all pairs of nicks that ping each other in the +/// euphoria client. +/// +/// A nick and its mention form calculated via [`mention`] will always evaluate +/// to the same normal form. +/// +/// The steps performed are as follows: +/// +/// 1. Remove all whitespace characters as well as the characters +/// `,`, `.`, `!`, `?`, `;`, `&`, `<`, `>`, `'`, `"`. +/// All of these are used by the [frontend to delimit mentions][0]. +/// The `>` character was confirmed to delimit mentions via experimentation. +/// +/// 2. Convert to NFKC +/// 3. Case fold +/// +/// Steps 2 and 3 are meant to be an alternative to the NKFC_Casefold derived +/// property that's easier to implement, even though it may be incorrect in some +/// edge cases. +/// +/// [0]: https://github.com/euphoria-io/heim/blob/978c921063e6b06012fc8d16d9fbf1b3a0be1191/client/lib/stores/chat.js#L14 +pub fn normalize(nick: &str) -> String { + nick.chars() + .filter(|c| !delimits_mention(*c)) // Step 1 + .nfkc() // Step 2 + .default_case_fold() // Step 3 + .collect() +} + +/// Compute a mentionable version of a nick while remaining as close to the +/// original as possible. +/// +/// The return value of this function appended to an `@` character will +/// highlight as a mention in the official euphoria client. It should ping any +/// people using the original nick. It might also ping other people. +/// +/// This function performs step 1 of [`normalize`]. +pub fn mention(nick: &str) -> String { + nick.replace(delimits_mention, "") +}