From bcf6193cd92accd6a3a5d365bfeebc995f4cd597 Mon Sep 17 00:00:00 2001 From: Joscha Date: Tue, 31 May 2022 15:05:30 +0200 Subject: [PATCH] Measure widths of various unicode characters --- Cargo.toml | 3 + examples/measure_widths.rs | 375 +++++++++++++++++++++++++++++++++++++ 2 files changed, 378 insertions(+) create mode 100644 examples/measure_widths.rs diff --git a/Cargo.toml b/Cargo.toml index ecf6494..5b3afd6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,6 @@ crossterm = "0.23.2" unicode-linebreak = "0.1.2" unicode-segmentation = "1.9.0" unicode-width = "0.1.9" + +[dev-dependencies] +unicode-blocks = "0.1.4" diff --git a/examples/measure_widths.rs b/examples/measure_widths.rs new file mode 100644 index 0000000..1a05d26 --- /dev/null +++ b/examples/measure_widths.rs @@ -0,0 +1,375 @@ +use std::io; + +use crossterm::cursor::MoveTo; +use crossterm::execute; +use crossterm::style::Print; +use crossterm::terminal::{Clear, ClearType, EnterAlternateScreen, LeaveAlternateScreen}; +use unicode_blocks::UnicodeBlock; +use unicode_width::UnicodeWidthChar; + +use unicode_blocks as ub; + +fn measure_width(c: char) { + if let Some(predicted_width) = c.width() { + execute!( + io::stdout(), + Clear(ClearType::CurrentLine), + MoveTo(0, 1), + Print(c), + ) + .unwrap(); + let actual_width: usize = crossterm::cursor::position().unwrap().0.into(); + if predicted_width != actual_width { + eprintln!( + "{}: actual {actual_width}, expected {predicted_width}", + c as u32 + ); + } + } +} + +fn measure_widths(block: UnicodeBlock) { + execute!( + io::stdout(), + Clear(ClearType::All), + MoveTo(0, 0), + Print(block.name()), + MoveTo(0, 1), + ) + .unwrap(); + for c in block.start()..=block.end() { + if let Some(c) = char::from_u32(c) { + measure_width(c); + } else { + eprintln!("{c}: error"); + } + } +} + +fn main() { + let mut stdout = io::stdout(); + execute!(stdout, EnterAlternateScreen).unwrap(); + + measure_widths(ub::ADLAM); + measure_widths(ub::AEGEAN_NUMBERS); + measure_widths(ub::AHOM); + measure_widths(ub::ALCHEMICAL_SYMBOLS); + measure_widths(ub::ALPHABETIC_PRESENTATION_FORMS); + measure_widths(ub::ANATOLIAN_HIEROGLYPHS); + measure_widths(ub::ANCIENT_GREEK_MUSICAL_NOTATION); + measure_widths(ub::ANCIENT_GREEK_NUMBERS); + measure_widths(ub::ANCIENT_SYMBOLS); + measure_widths(ub::ARABIC); + measure_widths(ub::ARABIC_EXTENDED_A); + measure_widths(ub::ARABIC_EXTENDED_B); + measure_widths(ub::ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS); + measure_widths(ub::ARABIC_PRESENTATION_FORMS_A); + measure_widths(ub::ARABIC_PRESENTATION_FORMS_B); + measure_widths(ub::ARABIC_SUPPLEMENT); + measure_widths(ub::ARMENIAN); + measure_widths(ub::ARROWS); + measure_widths(ub::AVESTAN); + measure_widths(ub::BALINESE); + measure_widths(ub::BAMUM); + measure_widths(ub::BAMUM_SUPPLEMENT); + measure_widths(ub::BASIC_LATIN); + measure_widths(ub::BASSA_VAH); + measure_widths(ub::BATAK); + measure_widths(ub::BENGALI); + measure_widths(ub::BHAIKSUKI); + measure_widths(ub::BLOCK_ELEMENTS); + measure_widths(ub::BOPOMOFO); + measure_widths(ub::BOPOMOFO_EXTENDED); + measure_widths(ub::BOX_DRAWING); + measure_widths(ub::BRAHMI); + measure_widths(ub::BRAILLE_PATTERNS); + measure_widths(ub::BUGINESE); + measure_widths(ub::BUHID); + measure_widths(ub::BYZANTINE_MUSICAL_SYMBOLS); + measure_widths(ub::CARIAN); + measure_widths(ub::CAUCASIAN_ALBANIAN); + measure_widths(ub::CHAKMA); + measure_widths(ub::CHAM); + measure_widths(ub::CHEROKEE); + measure_widths(ub::CHEROKEE_SUPPLEMENT); + measure_widths(ub::CHESS_SYMBOLS); + measure_widths(ub::CHORASMIAN); + measure_widths(ub::CJK_COMPATIBILITY); + measure_widths(ub::CJK_COMPATIBILITY_FORMS); + measure_widths(ub::CJK_COMPATIBILITY_IDEOGRAPHS); + measure_widths(ub::CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT); + measure_widths(ub::CJK_RADICALS_SUPPLEMENT); + measure_widths(ub::CJK_STROKES); + measure_widths(ub::CJK_SYMBOLS_AND_PUNCTUATION); + measure_widths(ub::CJK_UNIFIED_IDEOGRAPHS); + measure_widths(ub::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A); + measure_widths(ub::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B); + measure_widths(ub::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C); + measure_widths(ub::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D); + measure_widths(ub::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E); + measure_widths(ub::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F); + measure_widths(ub::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G); + measure_widths(ub::COMBINING_DIACRITICAL_MARKS); + measure_widths(ub::COMBINING_DIACRITICAL_MARKS_EXTENDED); + measure_widths(ub::COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS); + measure_widths(ub::COMBINING_DIACRITICAL_MARKS_SUPPLEMENT); + measure_widths(ub::COMBINING_HALF_MARKS); + measure_widths(ub::COMMON_INDIC_NUMBER_FORMS); + measure_widths(ub::CONTROL_PICTURES); + measure_widths(ub::COPTIC); + measure_widths(ub::COPTIC_EPACT_NUMBERS); + measure_widths(ub::COUNTING_ROD_NUMERALS); + measure_widths(ub::CUNEIFORM); + measure_widths(ub::CUNEIFORM_NUMBERS_AND_PUNCTUATION); + measure_widths(ub::CURRENCY_SYMBOLS); + measure_widths(ub::CYPRIOT_SYLLABARY); + measure_widths(ub::CYPRO_MINOAN); + measure_widths(ub::CYRILLIC); + measure_widths(ub::CYRILLIC_EXTENDED_A); + measure_widths(ub::CYRILLIC_EXTENDED_B); + measure_widths(ub::CYRILLIC_EXTENDED_C); + measure_widths(ub::CYRILLIC_SUPPLEMENT); + measure_widths(ub::DESERET); + measure_widths(ub::DEVANAGARI); + measure_widths(ub::DEVANAGARI_EXTENDED); + measure_widths(ub::DINGBATS); + measure_widths(ub::DIVES_AKURU); + measure_widths(ub::DOGRA); + measure_widths(ub::DOMINO_TILES); + measure_widths(ub::DUPLOYAN); + measure_widths(ub::EARLY_DYNASTIC_CUNEIFORM); + measure_widths(ub::EGYPTIAN_HIEROGLYPHS); + measure_widths(ub::EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS); + measure_widths(ub::ELBASAN); + measure_widths(ub::ELYMAIC); + measure_widths(ub::EMOTICONS); + measure_widths(ub::ENCLOSED_ALPHANUMERICS); + measure_widths(ub::ENCLOSED_ALPHANUMERIC_SUPPLEMENT); + measure_widths(ub::ENCLOSED_CJK_LETTERS_AND_MONTHS); + measure_widths(ub::ENCLOSED_IDEOGRAPHIC_SUPPLEMENT); + measure_widths(ub::ETHIOPIC); + measure_widths(ub::ETHIOPIC_EXTENDED); + measure_widths(ub::ETHIOPIC_EXTENDED_A); + measure_widths(ub::ETHIOPIC_EXTENDED_B); + measure_widths(ub::ETHIOPIC_SUPPLEMENT); + measure_widths(ub::GENERAL_PUNCTUATION); + measure_widths(ub::GEOMETRIC_SHAPES); + measure_widths(ub::GEOMETRIC_SHAPES_EXTENDED); + measure_widths(ub::GEORGIAN); + measure_widths(ub::GEORGIAN_EXTENDED); + measure_widths(ub::GEORGIAN_SUPPLEMENT); + measure_widths(ub::GLAGOLITIC); + measure_widths(ub::GLAGOLITIC_SUPPLEMENT); + measure_widths(ub::GOTHIC); + measure_widths(ub::GRANTHA); + measure_widths(ub::GREEK_AND_COPTIC); + measure_widths(ub::GREEK_EXTENDED); + measure_widths(ub::GUJARATI); + measure_widths(ub::GUNJALA_GONDI); + measure_widths(ub::GURMUKHI); + measure_widths(ub::HALFWIDTH_AND_FULLWIDTH_FORMS); + measure_widths(ub::HANGUL_COMPATIBILITY_JAMO); + measure_widths(ub::HANGUL_JAMO); + measure_widths(ub::HANGUL_JAMO_EXTENDED_A); + measure_widths(ub::HANGUL_JAMO_EXTENDED_B); + measure_widths(ub::HANGUL_SYLLABLES); + measure_widths(ub::HANIFI_ROHINGYA); + measure_widths(ub::HANUNOO); + measure_widths(ub::HATRAN); + measure_widths(ub::HEBREW); + // measure_widths(ub::HIGH_PRIVATE_USE_SURROGATES); + // measure_widths(ub::HIGH_SURROGATES); + measure_widths(ub::HIRAGANA); + measure_widths(ub::IDEOGRAPHIC_DESCRIPTION_CHARACTERS); + measure_widths(ub::IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION); + measure_widths(ub::IMPERIAL_ARAMAIC); + measure_widths(ub::INDIC_SIYAQ_NUMBERS); + measure_widths(ub::INSCRIPTIONAL_PAHLAVI); + measure_widths(ub::INSCRIPTIONAL_PARTHIAN); + measure_widths(ub::IPA_EXTENSIONS); + measure_widths(ub::JAVANESE); + measure_widths(ub::KAITHI); + measure_widths(ub::KANA_EXTENDED_A); + measure_widths(ub::KANA_EXTENDED_B); + measure_widths(ub::KANA_SUPPLEMENT); + measure_widths(ub::KANBUN); + measure_widths(ub::KANGXI_RADICALS); + measure_widths(ub::KANNADA); + measure_widths(ub::KATAKANA); + measure_widths(ub::KATAKANA_PHONETIC_EXTENSIONS); + measure_widths(ub::KAYAH_LI); + measure_widths(ub::KHAROSHTHI); + measure_widths(ub::KHITAN_SMALL_SCRIPT); + measure_widths(ub::KHMER); + measure_widths(ub::KHMER_SYMBOLS); + measure_widths(ub::KHOJKI); + measure_widths(ub::KHUDAWADI); + measure_widths(ub::LAO); + measure_widths(ub::LATIN_1_SUPPLEMENT); + measure_widths(ub::LATIN_EXTENDED_A); + measure_widths(ub::LATIN_EXTENDED_ADDITIONAL); + measure_widths(ub::LATIN_EXTENDED_B); + measure_widths(ub::LATIN_EXTENDED_C); + measure_widths(ub::LATIN_EXTENDED_D); + measure_widths(ub::LATIN_EXTENDED_E); + measure_widths(ub::LATIN_EXTENDED_F); + measure_widths(ub::LATIN_EXTENDED_G); + measure_widths(ub::LEPCHA); + measure_widths(ub::LETTERLIKE_SYMBOLS); + measure_widths(ub::LIMBU); + measure_widths(ub::LINEAR_A); + measure_widths(ub::LINEAR_B_IDEOGRAMS); + measure_widths(ub::LINEAR_B_SYLLABARY); + measure_widths(ub::LISU); + measure_widths(ub::LISU_SUPPLEMENT); + // measure_widths(ub::LOW_SURROGATES); + measure_widths(ub::LYCIAN); + measure_widths(ub::LYDIAN); + measure_widths(ub::MAHAJANI); + measure_widths(ub::MAHJONG_TILES); + measure_widths(ub::MAKASAR); + measure_widths(ub::MALAYALAM); + measure_widths(ub::MANDAIC); + measure_widths(ub::MANICHAEAN); + measure_widths(ub::MARCHEN); + measure_widths(ub::MASARAM_GONDI); + measure_widths(ub::MATHEMATICAL_ALPHANUMERIC_SYMBOLS); + measure_widths(ub::MATHEMATICAL_OPERATORS); + measure_widths(ub::MAYAN_NUMERALS); + measure_widths(ub::MEDEFAIDRIN); + measure_widths(ub::MEETEI_MAYEK); + measure_widths(ub::MEETEI_MAYEK_EXTENSIONS); + measure_widths(ub::MENDE_KIKAKUI); + measure_widths(ub::MEROITIC_CURSIVE); + measure_widths(ub::MEROITIC_HIEROGLYPHS); + measure_widths(ub::MIAO); + measure_widths(ub::MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A); + measure_widths(ub::MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B); + measure_widths(ub::MISCELLANEOUS_SYMBOLS); + measure_widths(ub::MISCELLANEOUS_SYMBOLS_AND_ARROWS); + measure_widths(ub::MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS); + measure_widths(ub::MISCELLANEOUS_TECHNICAL); + measure_widths(ub::MODI); + measure_widths(ub::MODIFIER_TONE_LETTERS); + measure_widths(ub::MONGOLIAN); + measure_widths(ub::MONGOLIAN_SUPPLEMENT); + measure_widths(ub::MRO); + measure_widths(ub::MULTANI); + measure_widths(ub::MUSICAL_SYMBOLS); + measure_widths(ub::MYANMAR); + measure_widths(ub::MYANMAR_EXTENDED_A); + measure_widths(ub::MYANMAR_EXTENDED_B); + measure_widths(ub::NABATAEAN); + measure_widths(ub::NANDINAGARI); + measure_widths(ub::NEWA); + measure_widths(ub::NEW_TAI_LUE); + measure_widths(ub::NKO); + measure_widths(ub::NUMBER_FORMS); + measure_widths(ub::NUSHU); + measure_widths(ub::NYIAKENG_PUACHUE_HMONG); + measure_widths(ub::OGHAM); + measure_widths(ub::OLD_HUNGARIAN); + measure_widths(ub::OLD_ITALIC); + measure_widths(ub::OLD_NORTH_ARABIAN); + measure_widths(ub::OLD_PERMIC); + measure_widths(ub::OLD_PERSIAN); + measure_widths(ub::OLD_SOGDIAN); + measure_widths(ub::OLD_SOUTH_ARABIAN); + measure_widths(ub::OLD_TURKIC); + measure_widths(ub::OLD_UYGHUR); + measure_widths(ub::OL_CHIKI); + measure_widths(ub::OPTICAL_CHARACTER_RECOGNITION); + measure_widths(ub::ORIYA); + measure_widths(ub::ORNAMENTAL_DINGBATS); + measure_widths(ub::OSAGE); + measure_widths(ub::OSMANYA); + measure_widths(ub::OTTOMAN_SIYAQ_NUMBERS); + measure_widths(ub::PAHAWH_HMONG); + measure_widths(ub::PALMYRENE); + measure_widths(ub::PAU_CIN_HAU); + measure_widths(ub::PHAGS_PA); + measure_widths(ub::PHAISTOS_DISC); + measure_widths(ub::PHOENICIAN); + measure_widths(ub::PHONETIC_EXTENSIONS); + measure_widths(ub::PHONETIC_EXTENSIONS_SUPPLEMENT); + measure_widths(ub::PLAYING_CARDS); + measure_widths(ub::PRIVATE_USE_AREA); + measure_widths(ub::PSALTER_PAHLAVI); + measure_widths(ub::REJANG); + measure_widths(ub::RUMI_NUMERAL_SYMBOLS); + measure_widths(ub::RUNIC); + measure_widths(ub::SAMARITAN); + measure_widths(ub::SAURASHTRA); + measure_widths(ub::SHARADA); + measure_widths(ub::SHAVIAN); + measure_widths(ub::SHORTHAND_FORMAT_CONTROLS); + measure_widths(ub::SIDDHAM); + measure_widths(ub::SINHALA); + measure_widths(ub::SINHALA_ARCHAIC_NUMBERS); + measure_widths(ub::SMALL_FORM_VARIANTS); + measure_widths(ub::SMALL_KANA_EXTENSION); + measure_widths(ub::SOGDIAN); + measure_widths(ub::SORA_SOMPENG); + measure_widths(ub::SOYOMBO); + measure_widths(ub::SPACING_MODIFIER_LETTERS); + measure_widths(ub::SPECIALS); + measure_widths(ub::SUNDANESE); + measure_widths(ub::SUNDANESE_SUPPLEMENT); + measure_widths(ub::SUPERSCRIPTS_AND_SUBSCRIPTS); + measure_widths(ub::SUPPLEMENTAL_ARROWS_A); + measure_widths(ub::SUPPLEMENTAL_ARROWS_B); + measure_widths(ub::SUPPLEMENTAL_ARROWS_C); + measure_widths(ub::SUPPLEMENTAL_MATHEMATICAL_OPERATORS); + measure_widths(ub::SUPPLEMENTAL_PUNCTUATION); + measure_widths(ub::SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS); + measure_widths(ub::SUPPLEMENTARY_PRIVATE_USE_AREA_A); + measure_widths(ub::SUPPLEMENTARY_PRIVATE_USE_AREA_B); + measure_widths(ub::SUTTON_SIGNWRITING); + measure_widths(ub::SYLOTI_NAGRI); + measure_widths(ub::SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A); + measure_widths(ub::SYMBOLS_FOR_LEGACY_COMPUTING); + measure_widths(ub::SYRIAC); + measure_widths(ub::SYRIAC_SUPPLEMENT); + measure_widths(ub::TAGALOG); + measure_widths(ub::TAGBANWA); + measure_widths(ub::TAGS); + measure_widths(ub::TAI_LE); + measure_widths(ub::TAI_THAM); + measure_widths(ub::TAI_VIET); + measure_widths(ub::TAI_XUAN_JING_SYMBOLS); + measure_widths(ub::TAKRI); + measure_widths(ub::TAMIL); + measure_widths(ub::TAMIL_SUPPLEMENT); + measure_widths(ub::TANGSA); + measure_widths(ub::TANGUT); + measure_widths(ub::TANGUT_COMPONENTS); + measure_widths(ub::TANGUT_SUPPLEMENT); + measure_widths(ub::TELUGU); + measure_widths(ub::THAANA); + measure_widths(ub::THAI); + measure_widths(ub::TIBETAN); + measure_widths(ub::TIFINAGH); + measure_widths(ub::TIRHUTA); + measure_widths(ub::TOTO); + measure_widths(ub::TRANSPORT_AND_MAP_SYMBOLS); + measure_widths(ub::UGARITIC); + measure_widths(ub::UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS); + measure_widths(ub::UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED); + measure_widths(ub::UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A); + measure_widths(ub::VAI); + measure_widths(ub::VARIATION_SELECTORS); + measure_widths(ub::VARIATION_SELECTORS_SUPPLEMENT); + measure_widths(ub::VEDIC_EXTENSIONS); + measure_widths(ub::VERTICAL_FORMS); + measure_widths(ub::VITHKUQI); + measure_widths(ub::WANCHO); + measure_widths(ub::WARANG_CITI); + measure_widths(ub::YEZIDI); + measure_widths(ub::YIJING_HEXAGRAM_SYMBOLS); + measure_widths(ub::YI_RADICALS); + measure_widths(ub::YI_SYLLABLES); + measure_widths(ub::ZANABAZAR_SQUARE); + measure_widths(ub::ZNAMENNY_MUSICAL_NOTATION); + + execute!(stdout, LeaveAlternateScreen).unwrap(); +}