Refactor and document emoji module

This commit is contained in:
Joscha 2024-12-05 11:32:43 +01:00
parent 904dda1af0
commit a661449b6f
3 changed files with 144 additions and 24 deletions

View file

@ -4,6 +4,7 @@ version = "0.5.1"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
serde_json = "1.0.133"
[lints] [lints]
rust.unsafe_code = { level = "forbid", priority = 1 } rust.unsafe_code = { level = "forbid", priority = 1 }

View file

@ -1,10 +1,6 @@
//! All emoji the euphoria.leet.nu client knows. use std::{borrow::Cow, collections::HashMap, ops::Range};
use std::borrow::Cow; /// Emoji list from euphoria.leet.nu, obtainable via shell command:
use std::collections::HashMap;
use std::ops::RangeInclusive;
/// Euphoria.leet.nu emoji list, obtainable via shell command:
/// ///
/// ```bash /// ```bash
/// curl 'https://euphoria.leet.nu/static/emoji.json' \ /// curl 'https://euphoria.leet.nu/static/emoji.json' \
@ -13,9 +9,12 @@ use std::ops::RangeInclusive;
/// ``` /// ```
const EMOJI_JSON: &str = include_str!("emoji.json"); const EMOJI_JSON: &str = include_str!("emoji.json");
/// A map from emoji names to their unicode representation. Not all emojis have /// A database of emoji names and their unicode representation.
/// such a representation. ///
pub struct Emoji(pub HashMap<String, Option<String>>); /// Some emoji are rendered with custom icons in the web client and don't
/// correspond to an emoji in the unicode standard. These emoji don't have an
/// unicode representation.
pub struct Emoji(HashMap<String, Option<String>>);
fn parse_hex_to_char(hex: &str) -> Option<char> { fn parse_hex_to_char(hex: &str) -> Option<char> {
u32::from_str_radix(hex, 16).ok()?.try_into().ok() u32::from_str_radix(hex, 16).ok()?.try_into().ok()
@ -29,7 +28,16 @@ fn parse_code_points(code_points: &str) -> Option<String> {
} }
impl Emoji { impl Emoji {
/// Load a list of emoji compiled into the library. /// Load the list of emoji compiled into the library.
///
/// # Example
///
/// ```
/// use euphoxide::Emoji;
/// let emoji = Emoji::load();
///
/// assert_eq!(emoji.get("robot"), Some(Some("🤖")));
/// ```
pub fn load() -> Self { pub fn load() -> Self {
Self::load_from_json(EMOJI_JSON).unwrap() Self::load_from_json(EMOJI_JSON).unwrap()
} }
@ -38,9 +46,26 @@ impl Emoji {
/// ///
/// The object keys are the emoji names (without colons `:`). The object /// The object keys are the emoji names (without colons `:`). The object
/// values are the emoji code points encoded as hexadecimal numbers and /// values are the emoji code points encoded as hexadecimal numbers and
/// separated by a dash `-` (e.g. `"34-fe0f-20e3"`). Emojis whose values /// separated by a dash `-` (e.g. `"34-fe0f-20e3"`). Emoji whose values
/// don't match this schema are interpreted as emojis without unicode /// don't match this schema are interpreted as emoji without unicode
/// representation. /// representation.
///
/// This is the format used by the [euphoria.leet.nu emoji listing][0].
///
/// [0]: https://euphoria.leet.nu/static/emoji.json
///
/// # Example
///
/// ```
/// use euphoxide::Emoji;
///
/// const EMOJI: &str = r#" {"Roboter": "1f916", "foo": "~bar"} "#;
/// let emoji = Emoji::load_from_json(EMOJI).unwrap();
///
/// assert_eq!(emoji.get("Roboter"), Some(Some("🤖")));
/// assert_eq!(emoji.get("foo"), Some(None));
/// assert_eq!(emoji.get("robot"), None);
/// ```
pub fn load_from_json(json: &str) -> Option<Self> { pub fn load_from_json(json: &str) -> Option<Self> {
let map = serde_json::from_str::<HashMap<String, String>>(json) let map = serde_json::from_str::<HashMap<String, String>>(json)
.ok()? .ok()?
@ -51,6 +76,25 @@ impl Emoji {
Some(Self(map)) Some(Self(map))
} }
/// Retrieve an emoji's unicode representation by name.
///
/// Returns `None` if the emoji could not be found. Returns `Some(None)` if
/// the emoji could be found but does not have a unicode representation.
///
/// The name is not colon-delimited.
///
/// # Example
///
/// ```
/// use euphoxide::Emoji;
/// let emoji = Emoji::load();
///
/// assert_eq!(emoji.get("robot"), Some(Some("🤖")));
/// assert_eq!(emoji.get("+1"), Some(None));
/// assert_eq!(emoji.get("foobar"), None);
///
/// assert_eq!(emoji.get(":robot:"), None);
/// ```
pub fn get(&self, name: &str) -> Option<Option<&str>> { pub fn get(&self, name: &str) -> Option<Option<&str>> {
match self.0.get(name) { match self.0.get(name) {
Some(Some(replace)) => Some(Some(replace)), Some(Some(replace)) => Some(Some(replace)),
@ -59,7 +103,50 @@ impl Emoji {
} }
} }
pub fn find(&self, text: &str) -> Vec<(RangeInclusive<usize>, Option<&str>)> { /// All known emoji and their unicode representation.
///
/// The emoji are not in any particular order.
///
/// # Example
///
/// ```
/// use euphoxide::Emoji;
/// let emoji = Emoji::load();
///
/// // List all emoji that don't have a unicode representation
/// let custom_emoji = emoji
/// .all()
/// .filter(|(_, unicode)| unicode.is_none())
/// .map(|(name, _)| name)
/// .collect::<Vec<_>>();
///
/// assert!(!custom_emoji.is_empty());
/// ```
pub fn all(&self) -> impl Iterator<Item = (&str, Option<&str>)> {
self.0
.iter()
.map(|(k, v)| (k as &str, v.as_ref().map(|v| v as &str)))
}
/// Find all colon-delimited emoji in a string.
///
/// Returns a list of emoji locations (colons are included in the range) and
/// corresponding unicode representations.
///
/// # Example
///
/// ```
/// use euphoxide::Emoji;
/// let emoji = Emoji::load();
///
/// let found = emoji.find("Hello :globe_with_meridians:!");
/// assert_eq!(found, vec![(6..28, Some("🌐"))]);
///
/// // Ignores nonexistent emoji
/// let found = emoji.find("Hello :sparkly_wizard:!");
/// assert!(found.is_empty());
/// ```
pub fn find(&self, text: &str) -> Vec<(Range<usize>, Option<&str>)> {
let mut result = vec![]; let mut result = vec![];
let mut prev_colon_idx = None; let mut prev_colon_idx = None;
@ -67,7 +154,7 @@ impl Emoji {
if let Some(prev_idx) = prev_colon_idx { if let Some(prev_idx) = prev_colon_idx {
let name = &text[prev_idx + 1..colon_idx]; let name = &text[prev_idx + 1..colon_idx];
if let Some(replace) = self.get(name) { if let Some(replace) = self.get(name) {
let range = prev_idx..=colon_idx; let range = prev_idx..colon_idx + 1;
result.push((range, replace)); result.push((range, replace));
prev_colon_idx = None; prev_colon_idx = None;
continue; continue;
@ -79,6 +166,21 @@ impl Emoji {
result result
} }
/// Replace all colon-delimited emoji in a string.
///
/// # Example
///
/// ```
/// use euphoxide::Emoji;
/// let emoji = Emoji::load();
///
/// let replaced = emoji.replace("Hello :globe_with_meridians:!");
/// assert_eq!(replaced, "Hello 🌐!");
///
/// // Ignores nonexistent emoji
/// let replaced = emoji.replace("Hello :sparkly_wizard:!");
/// assert_eq!(replaced, "Hello :sparkly_wizard:!");
/// ```
pub fn replace<'a>(&self, text: &'a str) -> Cow<'a, str> { pub fn replace<'a>(&self, text: &'a str) -> Cow<'a, str> {
let emoji = self.find(text); let emoji = self.find(text);
if emoji.is_empty() { if emoji.is_empty() {
@ -91,13 +193,13 @@ impl Emoji {
for (range, replace) in emoji { for (range, replace) in emoji {
// Only replace emoji with a replacement // Only replace emoji with a replacement
if let Some(replace) = replace { if let Some(replace) = replace {
if *range.start() > after_last_emoji { if range.start > after_last_emoji {
// There were non-emoji characters between the last and the // There were non-emoji characters between the last and the
// current emoji. // current emoji.
result.push_str(&text[after_last_emoji..*range.start()]); result.push_str(&text[after_last_emoji..range.start]);
} }
result.push_str(replace); result.push_str(replace);
after_last_emoji = range.end() + 1; after_last_emoji = range.end;
} }
} }
@ -108,6 +210,21 @@ impl Emoji {
Cow::Owned(result) Cow::Owned(result)
} }
/// Remove all colon-delimited emoji in a string.
///
/// # Example
///
/// ```
/// use euphoxide::Emoji;
/// let emoji = Emoji::load();
///
/// let removed = emoji.remove("Hello :globe_with_meridians:!");
/// assert_eq!(removed, "Hello !");
///
/// // Ignores nonexistent emoji
/// let removed = emoji.replace("Hello :sparkly_wizard:!");
/// assert_eq!(removed, "Hello :sparkly_wizard:!");
/// ```
pub fn remove<'a>(&self, text: &'a str) -> Cow<'a, str> { pub fn remove<'a>(&self, text: &'a str) -> Cow<'a, str> {
let emoji = self.find(text); let emoji = self.find(text);
if emoji.is_empty() { if emoji.is_empty() {
@ -118,12 +235,12 @@ impl Emoji {
let mut after_last_emoji = 0; let mut after_last_emoji = 0;
for (range, _) in emoji { for (range, _) in emoji {
if *range.start() > after_last_emoji { if range.start > after_last_emoji {
// There were non-emoji characters between the last and the // There were non-emoji characters between the last and the
// current emoji. // current emoji.
result.push_str(&text[after_last_emoji..*range.start()]); result.push_str(&text[after_last_emoji..range.start]);
} }
after_last_emoji = range.end() + 1; after_last_emoji = range.end;
} }
if after_last_emoji < text.len() { if after_last_emoji < text.len() {
@ -149,15 +266,15 @@ mod test {
// :bad: does not exist, while :x: and :o: do. // :bad: does not exist, while :x: and :o: do.
assert_eq!(emoji.find(":bad:x:o:"), vec![(4..=6, Some(""))]); assert_eq!(emoji.find(":bad:x:o:"), vec![(4..7, Some(""))]);
assert_eq!( assert_eq!(
emoji.find(":x:bad:o:"), emoji.find(":x:bad:o:"),
vec![(0..=2, Some("")), (6..=8, Some(""))] vec![(0..3, Some("")), (6..9, Some(""))]
); );
assert_eq!(emoji.find("ab:bad:x:o:cd"), vec![(6..=8, Some(""))]); assert_eq!(emoji.find("ab:bad:x:o:cd"), vec![(6..9, Some(""))]);
assert_eq!( assert_eq!(
emoji.find("ab:x:bad:o:cd"), emoji.find("ab:x:bad:o:cd"),
vec![(2..=4, Some("")), (8..=10, Some(""))] vec![(2..5, Some("")), (8..11, Some(""))]
); );
} }

View file

@ -1 +1,3 @@
mod emoji;
pub use crate::emoji::Emoji;