From 77a02116a64d300264277bfade5553a1a8d9f01d Mon Sep 17 00:00:00 2001 From: Joscha Date: Thu, 20 Feb 2025 22:08:10 +0100 Subject: [PATCH] Fix grapheme width estimation I'm pretty sure it still breaks in lots of terminal emulators, but it works far better than what recent versions of the unicode_width crate were doing. --- CHANGELOG.md | 3 +++ src/widthdb.rs | 30 +++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13edb2a..e18e88a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ Procedure when bumping the version number: ## Unreleased +### Fixed +- Rendering glitches, mainly related to emoji + ## v0.3.0 - 2024-11-06 ### Added diff --git a/src/widthdb.rs b/src/widthdb.rs index 9815190..53f20ec 100644 --- a/src/widthdb.rs +++ b/src/widthdb.rs @@ -6,7 +6,7 @@ use crossterm::style::Print; use crossterm::terminal::{Clear, ClearType}; use crossterm::QueueableCommand; use unicode_segmentation::UnicodeSegmentation; -use unicode_width::UnicodeWidthStr; +use unicode_width::UnicodeWidthChar; use crate::wrap; @@ -36,6 +36,26 @@ impl WidthDb { self.tab_width - (col % self.tab_width as usize) as u8 } + /// Estimate what our terminal emulator thinks the width of a grapheme is. + /// + /// Different terminal emulators are all broken in different ways, so this + /// method will never be able to give a correct solution. For that, the only + /// possible method is actually measuring. + /// + /// Instead, it implements a character-wise width calculation. The hope is + /// that dumb terminal emulators do something roughly like this, and smart + /// terminal emulators try to emulate dumb ones for compatibility. In + /// practice, this counting approach seems to be fairly robust. + fn grapheme_width_estimate(grapheme: &str) -> u8 { + grapheme + .chars() + .filter(|c| !c.is_ascii_control()) + .flat_map(|c| c.width()) + .sum::() + .try_into() + .unwrap_or(u8::MAX) + } + /// Determine the width of a grapheme. /// /// If the grapheme is a tab, the column is used to determine its width. @@ -47,18 +67,14 @@ impl WidthDb { if grapheme == "\t" { return self.tab_width_at_column(col); } - if grapheme.chars().any(|c| c.is_ascii_control()) { - return 0; // See measure_widths function - } if !self.active { - return grapheme.width() as u8; + return Self::grapheme_width_estimate(grapheme); } - if let Some(width) = self.known.get(grapheme) { *width } else { self.requested.insert(grapheme.to_string()); - grapheme.width() as u8 + Self::grapheme_width_estimate(grapheme) } }