Implement nick normalization and mentioning

This commit is contained in:
Joscha 2023-01-30 18:25:32 +01:00
parent e6898cc9f7
commit 09a9620f8e
3 changed files with 61 additions and 1 deletions

View file

@ -19,6 +19,8 @@ Procedure when bumping the version number:
- `euphoxide::Emoji` for finding, replacing and removing colon-delimited emoji in text - `euphoxide::Emoji` for finding, replacing and removing colon-delimited emoji in text
- `euphoxide::api::Time::new` - `euphoxide::api::Time::new`
- `euphoxide::nick::hue` - `euphoxide::nick::hue`
- `euphoxide::nick::mention`
- `euphoxide::nick::normalize`
- Debug logging using the `log` crate - Debug logging using the `log` crate
- `testbot_instance` example using the new `euphoxide::bot::instance::Instance` - `testbot_instance` example using the new `euphoxide::bot::instance::Instance`
- VSCode project settings - VSCode project settings

View file

@ -8,6 +8,7 @@ bot = ["dep:async-trait", "dep:clap", "dep:cookie"]
[dependencies] [dependencies]
async-trait = { version = "0.1.63", optional = true } async-trait = { version = "0.1.63", optional = true }
caseless = "0.2.1"
cookie = { version = "0.16.2", optional = true } cookie = { version = "0.16.2", optional = true }
futures-util = { version = "0.3.25", default-features = false, features = ["sink"] } futures-util = { version = "0.3.25", default-features = false, features = ["sink"] }
log = "0.4.17" log = "0.4.17"
@ -17,6 +18,7 @@ time = { version = "0.3.17", features = ["serde"] }
tokio = { version = "1.23.0", features = ["time", "sync", "macros", "rt"] } tokio = { version = "1.23.0", features = ["time", "sync", "macros", "rt"] }
tokio-stream = "0.1.11" tokio-stream = "0.1.11"
tokio-tungstenite = { version = "0.18.0", features = ["rustls-tls-native-roots"] } tokio-tungstenite = { version = "0.18.0", features = ["rustls-tls-native-roots"] }
unicode-normalization = "0.1.22"
[dependencies.clap] [dependencies.clap]
version = "4.1.3" version = "4.1.3"

View file

@ -1,5 +1,8 @@
//! Nick-related utility functions. //! Nick-related utility functions.
use caseless::Caseless;
use unicode_normalization::UnicodeNormalization;
use crate::emoji::Emoji; use crate::emoji::Emoji;
/// Does not remove emoji. /// Does not remove emoji.
@ -45,7 +48,60 @@ pub fn hue_without_removing_emoji(nick: &str) -> u8 {
/// This is a reimplementation of [euphoria's nick hue hashing algorithm][0]. It /// This is a reimplementation of [euphoria's nick hue hashing algorithm][0]. It
/// should always return the same value as the official client's implementation. /// should always return the same value as the official client's implementation.
/// ///
/// [0]: https://github.com/euphoria-io/heim/blob/master/client/lib/hueHash.js /// [0]: https://github.com/euphoria-io/heim/blob/978c921063e6b06012fc8d16d9fbf1b3a0be1191/client/lib/hueHash.js
pub fn hue(emoji: &Emoji, nick: &str) -> u8 { pub fn hue(emoji: &Emoji, nick: &str) -> u8 {
hue_without_removing_emoji(&emoji.remove(nick)) hue_without_removing_emoji(&emoji.remove(nick))
} }
fn delimits_mention(c: char) -> bool {
match c {
',' | '.' | '!' | '?' | ';' | '&' | '<' | '>' | '\'' | '"' => true,
_ => c.is_whitespace(),
}
}
/// Normalize a nick to a form that can be compared against other nicks.
///
/// This normalization is less aggressive than the nick hue normalization. It is
/// also less aggressive than the normalization used by the euphoria client to
/// determine who is pinged by a mention. This means that it will not compute
/// the same normal form for all pairs of nicks that ping each other in the
/// euphoria client.
///
/// A nick and its mention form calculated via [`mention`] will always evaluate
/// to the same normal form.
///
/// The steps performed are as follows:
///
/// 1. Remove all whitespace characters as well as the characters
/// `,`, `.`, `!`, `?`, `;`, `&`, `<`, `>`, `'`, `"`.
/// All of these are used by the [frontend to delimit mentions][0].
/// The `>` character was confirmed to delimit mentions via experimentation.
///
/// 2. Convert to NFKC
/// 3. Case fold
///
/// Steps 2 and 3 are meant to be an alternative to the NKFC_Casefold derived
/// property that's easier to implement, even though it may be incorrect in some
/// edge cases.
///
/// [0]: https://github.com/euphoria-io/heim/blob/978c921063e6b06012fc8d16d9fbf1b3a0be1191/client/lib/stores/chat.js#L14
pub fn normalize(nick: &str) -> String {
nick.chars()
.filter(|c| !delimits_mention(*c)) // Step 1
.nfkc() // Step 2
.default_case_fold() // Step 3
.collect()
}
/// Compute a mentionable version of a nick while remaining as close to the
/// original as possible.
///
/// The return value of this function appended to an `@` character will
/// highlight as a mention in the official euphoria client. It should ping any
/// people using the original nick. It might also ping other people.
///
/// This function performs step 1 of [`normalize`].
pub fn mention(nick: &str) -> String {
nick.replace(delimits_mention, "")
}