Add unicode-based grapheme width estimation method
This commit is contained in:
parent
be7eff0979
commit
423dd100c1
3 changed files with 73 additions and 35 deletions
|
|
@ -13,6 +13,9 @@ Procedure when bumping the version number:
|
||||||
|
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Unicode-based grapheme width estimation method
|
||||||
|
|
||||||
## v0.3.1 - 2025-02-21
|
## v0.3.1 - 2025-02-21
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ use crossterm::terminal::{
|
||||||
use crossterm::{ExecutableCommand, QueueableCommand};
|
use crossterm::{ExecutableCommand, QueueableCommand};
|
||||||
|
|
||||||
use crate::buffer::Buffer;
|
use crate::buffer::Buffer;
|
||||||
use crate::{AsyncWidget, Frame, Size, Widget, WidthDb};
|
use crate::{AsyncWidget, Frame, Size, Widget, WidthDb, WidthEstimationMethod};
|
||||||
|
|
||||||
/// Wrapper that manages terminal output.
|
/// Wrapper that manages terminal output.
|
||||||
///
|
///
|
||||||
|
|
@ -112,11 +112,25 @@ impl Terminal {
|
||||||
self.frame.widthdb.tab_width
|
self.frame.widthdb.tab_width
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the grapheme width estimation method.
|
||||||
|
///
|
||||||
|
/// For more details, see [`WidthEstimationMethod`].
|
||||||
|
pub fn set_width_estimation_method(&mut self, method: WidthEstimationMethod) {
|
||||||
|
self.frame.widthdb.estimate = method;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The grapheme width estimation method.
|
||||||
|
///
|
||||||
|
/// For more details, see [`WidthEstimationMethod`].
|
||||||
|
pub fn width_estimation_method(&mut self) -> WidthEstimationMethod {
|
||||||
|
self.frame.widthdb.estimate
|
||||||
|
}
|
||||||
|
|
||||||
/// Enable or disable grapheme width measurements.
|
/// Enable or disable grapheme width measurements.
|
||||||
///
|
///
|
||||||
/// For more details, see [`Self::measuring`].
|
/// For more details, see [`Self::measuring`].
|
||||||
pub fn set_measuring(&mut self, active: bool) {
|
pub fn set_measuring(&mut self, active: bool) {
|
||||||
self.frame.widthdb.active = active;
|
self.frame.widthdb.measure = active;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether grapheme widths should be measured or estimated.
|
/// Whether grapheme widths should be measured or estimated.
|
||||||
|
|
@ -135,7 +149,7 @@ impl Terminal {
|
||||||
/// Standard Annex #11. This usually works fine, but may break on some emoji
|
/// Standard Annex #11. This usually works fine, but may break on some emoji
|
||||||
/// or other less commonly used character sequences.
|
/// or other less commonly used character sequences.
|
||||||
pub fn measuring(&self) -> bool {
|
pub fn measuring(&self) -> bool {
|
||||||
self.frame.widthdb.active
|
self.frame.widthdb.measure
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether any unmeasured graphemes were seen since the last call to
|
/// Whether any unmeasured graphemes were seen since the last call to
|
||||||
|
|
|
||||||
|
|
@ -6,14 +6,31 @@ use crossterm::style::Print;
|
||||||
use crossterm::terminal::{Clear, ClearType};
|
use crossterm::terminal::{Clear, ClearType};
|
||||||
use crossterm::QueueableCommand;
|
use crossterm::QueueableCommand;
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
use unicode_width::UnicodeWidthChar;
|
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
|
||||||
|
|
||||||
use crate::wrap;
|
use crate::wrap;
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum WidthEstimationMethod {
|
||||||
|
/// Estimate the width of a grapheme using legacy methods.
|
||||||
|
///
|
||||||
|
/// Different terminal emulators all use different approaches to determine
|
||||||
|
/// grapheme widths, so this method will never be able to give a fully
|
||||||
|
/// correct solution. For that, the only possible approach is measuring the
|
||||||
|
/// actual grapheme width.
|
||||||
|
#[default]
|
||||||
|
Legacy,
|
||||||
|
|
||||||
|
/// Estimate the width of a grapheme using the unicode standard in a
|
||||||
|
/// best-effort manner.
|
||||||
|
Unicode,
|
||||||
|
}
|
||||||
|
|
||||||
/// Measures and stores the with (in terminal coordinates) of graphemes.
|
/// Measures and stores the with (in terminal coordinates) of graphemes.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct WidthDb {
|
pub struct WidthDb {
|
||||||
pub(crate) active: bool,
|
pub(crate) estimate: WidthEstimationMethod,
|
||||||
|
pub(crate) measure: bool,
|
||||||
pub(crate) tab_width: u8,
|
pub(crate) tab_width: u8,
|
||||||
known: HashMap<String, u8>,
|
known: HashMap<String, u8>,
|
||||||
requested: HashSet<String>,
|
requested: HashSet<String>,
|
||||||
|
|
@ -22,7 +39,8 @@ pub struct WidthDb {
|
||||||
impl Default for WidthDb {
|
impl Default for WidthDb {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
active: false,
|
estimate: WidthEstimationMethod::default(),
|
||||||
|
measure: false,
|
||||||
tab_width: 8,
|
tab_width: 8,
|
||||||
known: Default::default(),
|
known: Default::default(),
|
||||||
requested: Default::default(),
|
requested: Default::default(),
|
||||||
|
|
@ -36,26 +54,6 @@ impl WidthDb {
|
||||||
self.tab_width - (col % self.tab_width as usize) as u8
|
self.tab_width - (col % self.tab_width as usize) as u8
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate what our terminal emulator thinks the width of a grapheme is.
|
|
||||||
///
|
|
||||||
/// Different terminal emulators are all broken in different ways, so this
|
|
||||||
/// method will never be able to give a correct solution. For that, the only
|
|
||||||
/// possible method is actually measuring.
|
|
||||||
///
|
|
||||||
/// Instead, it implements a character-wise width calculation. The hope is
|
|
||||||
/// that dumb terminal emulators do something roughly like this, and smart
|
|
||||||
/// terminal emulators try to emulate dumb ones for compatibility. In
|
|
||||||
/// practice, this counting approach seems to be fairly robust.
|
|
||||||
fn grapheme_width_estimate(grapheme: &str) -> u8 {
|
|
||||||
grapheme
|
|
||||||
.chars()
|
|
||||||
.filter(|c| !c.is_ascii_control())
|
|
||||||
.flat_map(|c| c.width())
|
|
||||||
.sum::<usize>()
|
|
||||||
.try_into()
|
|
||||||
.unwrap_or(u8::MAX)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Determine the width of a grapheme.
|
/// Determine the width of a grapheme.
|
||||||
///
|
///
|
||||||
/// If the grapheme is a tab, the column is used to determine its width.
|
/// If the grapheme is a tab, the column is used to determine its width.
|
||||||
|
|
@ -67,14 +65,37 @@ impl WidthDb {
|
||||||
if grapheme == "\t" {
|
if grapheme == "\t" {
|
||||||
return self.tab_width_at_column(col);
|
return self.tab_width_at_column(col);
|
||||||
}
|
}
|
||||||
if !self.active {
|
|
||||||
return Self::grapheme_width_estimate(grapheme);
|
if self.measure {
|
||||||
}
|
|
||||||
if let Some(width) = self.known.get(grapheme) {
|
if let Some(width) = self.known.get(grapheme) {
|
||||||
*width
|
return *width;
|
||||||
} else {
|
}
|
||||||
self.requested.insert(grapheme.to_string());
|
self.requested.insert(grapheme.to_string());
|
||||||
Self::grapheme_width_estimate(grapheme)
|
}
|
||||||
|
|
||||||
|
match self.estimate {
|
||||||
|
// A character-wise width calculation is a simple and obvious
|
||||||
|
// approach to compute character widths. The idea is that dumb
|
||||||
|
// terminal emulators tend to do something roughly like this, and
|
||||||
|
// smart terminal emulators try to emulate dumb ones for
|
||||||
|
// compatibility. In practice, this approach seems to be fairly
|
||||||
|
// robust.
|
||||||
|
WidthEstimationMethod::Legacy => grapheme
|
||||||
|
.chars()
|
||||||
|
.filter(|c| !c.is_ascii_control())
|
||||||
|
.flat_map(|c| c.width())
|
||||||
|
.sum::<usize>()
|
||||||
|
.try_into()
|
||||||
|
.unwrap_or(u8::MAX),
|
||||||
|
|
||||||
|
// The unicode width crate considers newlines to have a width of 1
|
||||||
|
// while the rendering code expects it to have a width of 0.
|
||||||
|
WidthEstimationMethod::Unicode => grapheme
|
||||||
|
.split('\n')
|
||||||
|
.map(|s| s.width())
|
||||||
|
.sum::<usize>()
|
||||||
|
.try_into()
|
||||||
|
.unwrap_or(u8::MAX),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,7 +128,7 @@ impl WidthDb {
|
||||||
/// Whether any new graphemes have been seen since the last time
|
/// Whether any new graphemes have been seen since the last time
|
||||||
/// [`Self::measure_widths`] was called.
|
/// [`Self::measure_widths`] was called.
|
||||||
pub(crate) fn measuring_required(&self) -> bool {
|
pub(crate) fn measuring_required(&self) -> bool {
|
||||||
self.active && !self.requested.is_empty()
|
self.measure && !self.requested.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Measure the width of all new graphemes that have been seen since the
|
/// Measure the width of all new graphemes that have been seen since the
|
||||||
|
|
@ -117,7 +138,7 @@ impl WidthDb {
|
||||||
/// the terminal. After it finishes, the terminal's contents should be
|
/// the terminal. After it finishes, the terminal's contents should be
|
||||||
/// assumed to be garbage and a full redraw should be performed.
|
/// assumed to be garbage and a full redraw should be performed.
|
||||||
pub(crate) fn measure_widths(&mut self, out: &mut impl Write) -> io::Result<()> {
|
pub(crate) fn measure_widths(&mut self, out: &mut impl Write) -> io::Result<()> {
|
||||||
if !self.active {
|
if !self.measure {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
for grapheme in self.requested.drain() {
|
for grapheme in self.requested.drain() {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue