diff --git a/Cargo.toml b/Cargo.toml index 1526a5a..95c030b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ version = "0.6.0" edition = "2021" [dependencies] +serde_json = "1.0.133" [lints] rust.unsafe_code = { level = "forbid", priority = 1 } diff --git a/src/emoji.rs b/src/emoji.rs index f7a9e75..c867d0d 100644 --- a/src/emoji.rs +++ b/src/emoji.rs @@ -1,10 +1,6 @@ -//! All emoji the euphoria.leet.nu client knows. +use std::{borrow::Cow, collections::HashMap, ops::Range}; -use std::borrow::Cow; -use std::collections::HashMap; -use std::ops::RangeInclusive; - -/// Euphoria.leet.nu emoji list, obtainable via shell command: +/// Emoji list from euphoria.leet.nu, obtainable via shell command: /// /// ```bash /// curl 'https://euphoria.leet.nu/static/emoji.json' \ @@ -13,9 +9,12 @@ use std::ops::RangeInclusive; /// ``` const EMOJI_JSON: &str = include_str!("emoji.json"); -/// A map from emoji names to their unicode representation. Not all emojis have -/// such a representation. -pub struct Emoji(pub HashMap>); +/// A database of emoji names and their unicode representation. +/// +/// Some emoji are rendered with custom icons in the web client and don't +/// correspond to an emoji in the unicode standard. These emoji don't have an +/// unicode representation. +pub struct Emoji(HashMap>); fn parse_hex_to_char(hex: &str) -> Option { u32::from_str_radix(hex, 16).ok()?.try_into().ok() @@ -29,7 +28,16 @@ fn parse_code_points(code_points: &str) -> Option { } impl Emoji { - /// Load a list of emoji compiled into the library. + /// Load the list of emoji compiled into the library. + /// + /// # Example + /// + /// ``` + /// use euphoxide::Emoji; + /// let emoji = Emoji::load(); + /// + /// assert_eq!(emoji.get("robot"), Some(Some("🤖"))); + /// ``` pub fn load() -> Self { Self::load_from_json(EMOJI_JSON).unwrap() } @@ -38,9 +46,26 @@ impl Emoji { /// /// The object keys are the emoji names (without colons `:`). The object /// values are the emoji code points encoded as hexadecimal numbers and - /// separated by a dash `-` (e.g. `"34-fe0f-20e3"`). Emojis whose values - /// don't match this schema are interpreted as emojis without unicode + /// separated by a dash `-` (e.g. `"34-fe0f-20e3"`). Emoji whose values + /// don't match this schema are interpreted as emoji without unicode /// representation. + /// + /// This is the format used by the [euphoria.leet.nu emoji listing][0]. + /// + /// [0]: https://euphoria.leet.nu/static/emoji.json + /// + /// # Example + /// + /// ``` + /// use euphoxide::Emoji; + /// + /// const EMOJI: &str = r#" {"Roboter": "1f916", "foo": "~bar"} "#; + /// let emoji = Emoji::load_from_json(EMOJI).unwrap(); + /// + /// assert_eq!(emoji.get("Roboter"), Some(Some("🤖"))); + /// assert_eq!(emoji.get("foo"), Some(None)); + /// assert_eq!(emoji.get("robot"), None); + /// ``` pub fn load_from_json(json: &str) -> Option { let map = serde_json::from_str::>(json) .ok()? @@ -51,6 +76,25 @@ impl Emoji { Some(Self(map)) } + /// Retrieve an emoji's unicode representation by name. + /// + /// Returns `None` if the emoji could not be found. Returns `Some(None)` if + /// the emoji could be found but does not have a unicode representation. + /// + /// The name is not colon-delimited. + /// + /// # Example + /// + /// ``` + /// use euphoxide::Emoji; + /// let emoji = Emoji::load(); + /// + /// assert_eq!(emoji.get("robot"), Some(Some("🤖"))); + /// assert_eq!(emoji.get("+1"), Some(None)); + /// assert_eq!(emoji.get("foobar"), None); + /// + /// assert_eq!(emoji.get(":robot:"), None); + /// ``` pub fn get(&self, name: &str) -> Option> { match self.0.get(name) { Some(Some(replace)) => Some(Some(replace)), @@ -59,7 +103,50 @@ impl Emoji { } } - pub fn find(&self, text: &str) -> Vec<(RangeInclusive, Option<&str>)> { + /// All known emoji and their unicode representation. + /// + /// The emoji are not in any particular order. + /// + /// # Example + /// + /// ``` + /// use euphoxide::Emoji; + /// let emoji = Emoji::load(); + /// + /// // List all emoji that don't have a unicode representation + /// let custom_emoji = emoji + /// .all() + /// .filter(|(_, unicode)| unicode.is_none()) + /// .map(|(name, _)| name) + /// .collect::>(); + /// + /// assert!(!custom_emoji.is_empty()); + /// ``` + pub fn all(&self) -> impl Iterator)> { + self.0 + .iter() + .map(|(k, v)| (k as &str, v.as_ref().map(|v| v as &str))) + } + + /// Find all colon-delimited emoji in a string. + /// + /// Returns a list of emoji locations (colons are included in the range) and + /// corresponding unicode representations. + /// + /// # Example + /// + /// ``` + /// use euphoxide::Emoji; + /// let emoji = Emoji::load(); + /// + /// let found = emoji.find("Hello :globe_with_meridians:!"); + /// assert_eq!(found, vec![(6..28, Some("🌐"))]); + /// + /// // Ignores nonexistent emoji + /// let found = emoji.find("Hello :sparkly_wizard:!"); + /// assert!(found.is_empty()); + /// ``` + pub fn find(&self, text: &str) -> Vec<(Range, Option<&str>)> { let mut result = vec![]; let mut prev_colon_idx = None; @@ -67,7 +154,7 @@ impl Emoji { if let Some(prev_idx) = prev_colon_idx { let name = &text[prev_idx + 1..colon_idx]; if let Some(replace) = self.get(name) { - let range = prev_idx..=colon_idx; + let range = prev_idx..colon_idx + 1; result.push((range, replace)); prev_colon_idx = None; continue; @@ -79,6 +166,21 @@ impl Emoji { result } + /// Replace all colon-delimited emoji in a string. + /// + /// # Example + /// + /// ``` + /// use euphoxide::Emoji; + /// let emoji = Emoji::load(); + /// + /// let replaced = emoji.replace("Hello :globe_with_meridians:!"); + /// assert_eq!(replaced, "Hello 🌐!"); + /// + /// // Ignores nonexistent emoji + /// let replaced = emoji.replace("Hello :sparkly_wizard:!"); + /// assert_eq!(replaced, "Hello :sparkly_wizard:!"); + /// ``` pub fn replace<'a>(&self, text: &'a str) -> Cow<'a, str> { let emoji = self.find(text); if emoji.is_empty() { @@ -91,13 +193,13 @@ impl Emoji { for (range, replace) in emoji { // Only replace emoji with a replacement if let Some(replace) = replace { - if *range.start() > after_last_emoji { + if range.start > after_last_emoji { // There were non-emoji characters between the last and the // current emoji. - result.push_str(&text[after_last_emoji..*range.start()]); + result.push_str(&text[after_last_emoji..range.start]); } result.push_str(replace); - after_last_emoji = range.end() + 1; + after_last_emoji = range.end; } } @@ -108,6 +210,21 @@ impl Emoji { Cow::Owned(result) } + /// Remove all colon-delimited emoji in a string. + /// + /// # Example + /// + /// ``` + /// use euphoxide::Emoji; + /// let emoji = Emoji::load(); + /// + /// let removed = emoji.remove("Hello :globe_with_meridians:!"); + /// assert_eq!(removed, "Hello !"); + /// + /// // Ignores nonexistent emoji + /// let removed = emoji.replace("Hello :sparkly_wizard:!"); + /// assert_eq!(removed, "Hello :sparkly_wizard:!"); + /// ``` pub fn remove<'a>(&self, text: &'a str) -> Cow<'a, str> { let emoji = self.find(text); if emoji.is_empty() { @@ -118,12 +235,12 @@ impl Emoji { let mut after_last_emoji = 0; for (range, _) in emoji { - if *range.start() > after_last_emoji { + if range.start > after_last_emoji { // There were non-emoji characters between the last and the // current emoji. - result.push_str(&text[after_last_emoji..*range.start()]); + result.push_str(&text[after_last_emoji..range.start]); } - after_last_emoji = range.end() + 1; + after_last_emoji = range.end; } if after_last_emoji < text.len() { @@ -149,15 +266,15 @@ mod test { // :bad: does not exist, while :x: and :o: do. - assert_eq!(emoji.find(":bad:x:o:"), vec![(4..=6, Some("❌"))]); + assert_eq!(emoji.find(":bad:x:o:"), vec![(4..7, Some("❌"))]); assert_eq!( emoji.find(":x:bad:o:"), - vec![(0..=2, Some("❌")), (6..=8, Some("⭕"))] + vec![(0..3, Some("❌")), (6..9, Some("⭕"))] ); - assert_eq!(emoji.find("ab:bad:x:o:cd"), vec![(6..=8, Some("❌"))]); + assert_eq!(emoji.find("ab:bad:x:o:cd"), vec![(6..9, Some("❌"))]); assert_eq!( emoji.find("ab:x:bad:o:cd"), - vec![(2..=4, Some("❌")), (8..=10, Some("⭕"))] + vec![(2..5, Some("❌")), (8..11, Some("⭕"))] ); } diff --git a/src/lib.rs b/src/lib.rs index 8b13789..adb9a54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,3 @@ +mod emoji; +pub use crate::emoji::Emoji;