Update emoji

The new set was organically sourced from free-range JSON:

https://euphoria.leet.nu/static/emoji.json

Since I'm loading emoji from JSON anyways, the corresponding function
is also exposed. Clients may want to load the emoji list dynamically at
runtime instead of using the built-in emoji.
This commit is contained in:
Joscha 2024-05-20 18:46:09 +02:00
parent 276ff68512
commit 69a4a2c07f
4 changed files with 3881 additions and 1559 deletions

View file

@ -14,6 +14,14 @@ Procedure when bumping the version number:
## Unreleased ## Unreleased
### Added
- `Emoji::load_from_json`
### Changed
- Updated set of emoji names
## v0.5.0 - 2023-12-27 ## v0.5.0 - 2023-12-27
### Changed ### Changed

3839
src/emoji.json Normal file

File diff suppressed because it is too large Load diff

View file

@ -4,33 +4,51 @@ use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::ops::RangeInclusive; use std::ops::RangeInclusive;
const EMOJI_RAW: &str = include_str!("emoji.txt"); /// Euphoria.leet.nu emoji list, obtainable via shell command:
///
/// ```bash
/// curl 'https://euphoria.leet.nu/static/emoji.json' \
/// | jq 'to_entries | sort_by(.key) | from_entries' \
/// > emoji.json
/// ```
const EMOJI_JSON: &str = include_str!("emoji.json");
/// A map from emoji names to their unicode representation. Not all emojis have /// A map from emoji names to their unicode representation. Not all emojis have
/// such a representation. /// such a representation.
pub struct Emoji(pub HashMap<String, Option<String>>); pub struct Emoji(pub HashMap<String, Option<String>>);
fn parse_hex_to_char(hex: &str) -> char { fn parse_hex_to_char(hex: &str) -> Option<char> {
u32::from_str_radix(hex, 16).unwrap().try_into().unwrap() u32::from_str_radix(hex, 16).ok()?.try_into().ok()
} }
fn parse_line(line: &str) -> (String, Option<String>) { fn parse_code_points(code_points: &str) -> Option<String> {
let mut line = line.split_ascii_whitespace(); code_points
let name = line.next().unwrap().to_string(); .split('-')
let unicode = line.map(parse_hex_to_char).collect::<String>(); .map(parse_hex_to_char)
let unicode = Some(unicode).filter(|u| !u.is_empty()); .collect::<Option<String>>()
(name, unicode)
} }
impl Emoji { impl Emoji {
/// Load a list of emoji compiled into the library.
pub fn load() -> Self { pub fn load() -> Self {
let map = EMOJI_RAW Self::load_from_json(EMOJI_JSON).unwrap()
.lines() }
.map(|l| l.trim())
.filter(|l| !l.is_empty() && !l.starts_with('#')) /// Load a list of emoji from a string containing a JSON object.
.map(parse_line) ///
.collect(); /// The object keys are the emoji names (without colons `:`). The object
Self(map) /// values are the emoji code points encoded as hexadecimal numbers and
/// separated by a dash `-` (e.g. `"34-fe0f-20e3"`). Emojis whose values
/// don't match this schema are interpreted as emojis without unicode
/// representation.
pub fn load_from_json(json: &str) -> Option<Self> {
let map = serde_json::from_str::<HashMap<String, String>>(json)
.ok()?
.into_iter()
.map(|(k, v)| (k, parse_code_points(&v)))
.collect::<HashMap<_, _>>();
Some(Self(map))
} }
pub fn get(&self, name: &str) -> Option<Option<&str>> { pub fn get(&self, name: &str) -> Option<Option<&str>> {

File diff suppressed because it is too large Load diff