Improve word wrapping

Now supports long trailing whitespace as well as tabs.
This commit is contained in:
Joscha 2022-07-04 18:56:52 +02:00
parent 9b0d80873f
commit 11b2211fad
5 changed files with 106 additions and 41 deletions

View file

@ -14,7 +14,20 @@ fn draw(f: &mut Frame) {
"This\u{00a0}sentence\u{00a0}is\u{00a0}separated\u{00a0}by\u{00a0}non-\u{2060}breaking\u{00a0}spaces.\n", "This\u{00a0}sentence\u{00a0}is\u{00a0}separated\u{00a0}by\u{00a0}non-\u{2060}breaking\u{00a0}spaces.\n",
"\n", "\n",
"It can also properly handle wide graphemes (like emoji 🤔), ", "It can also properly handle wide graphemes (like emoji 🤔), ",
"including ones usually displayed incorrectly by terminal emulators, like 👩‍🔬 (a female scientist emoji).", "including ones usually displayed incorrectly by terminal emulators, like 👩‍🔬 (a female scientist emoji).\n",
"\n",
"Finally, tabs are supported as well. ",
"The following text is rendered with a tab width of 4:\n",
"\tx\n",
"1\tx\n",
"12\tx\n",
"123\tx\n",
"1234\tx\n",
"12345\tx\n",
"123456\tx\n",
"1234567\tx\n",
"12345678\tx\n",
"123456789\tx\n",
); );
let breaks = f.wrap(text, f.size().width.into()); let breaks = f.wrap(text, f.size().width.into());
@ -46,6 +59,7 @@ fn main() {
// Automatically enters alternate screen and enables raw mode // Automatically enters alternate screen and enables raw mode
let mut term = Terminal::new().unwrap(); let mut term = Terminal::new().unwrap();
term.set_measuring(true); term.set_measuring(true);
term.set_tab_width(4);
loop { loop {
// Render and display a frame. A full frame is displayed on the terminal // Render and display a frame. A full frame is displayed on the terminal

View file

@ -1,8 +1,8 @@
use crossterm::style::ContentStyle; use crossterm::style::ContentStyle;
use unicode_segmentation::UnicodeSegmentation;
use crate::styled::Styled; use crate::styled::Styled;
use crate::widthdb::WidthDB; use crate::widthdb::WidthDB;
use crate::wrap;
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct Size { pub struct Size {
@ -128,25 +128,31 @@ impl Buffer {
} }
} }
pub fn write(&mut self, widthdb: &mut WidthDB, mut pos: Pos, styled: &Styled) { pub fn write(&mut self, widthdb: &mut WidthDB, tab_width: u8, pos: Pos, styled: &Styled) {
// If we're not even visible, there's nothing to do // If we're not even visible, there's nothing to do
if pos.y < 0 || pos.y >= self.size.height as i32 { if pos.y < 0 || pos.y >= self.size.height as i32 {
return; return;
} }
let y = pos.y as u16; let y = pos.y as u16;
let mut col: usize = 0;
for styled_grapheme in styled.styled_graphemes() { for styled_grapheme in styled.styled_graphemes() {
let width = widthdb.grapheme_width(styled_grapheme.content()); let x = pos.x + col as i32;
if width > 0 { let g = *styled_grapheme.content();
self.write_grapheme( let style = *styled_grapheme.style();
pos.x, if g == "\t" {
y, let width = wrap::tab_width_at_column(tab_width, col);
width, col += width as usize;
styled_grapheme.content(), for dx in 0..width {
*styled_grapheme.style(), self.write_grapheme(x + dx as i32, y, width, " ", style);
); }
} else {
let width = widthdb.grapheme_width(g);
col += width as usize;
if width > 0 {
self.write_grapheme(x, y, width, g, style);
}
} }
pos.x += width as i32;
} }
} }

View file

@ -1,18 +1,28 @@
//! Rendering the next frame. //! Rendering the next frame.
use crossterm::style::ContentStyle;
use crate::buffer::Buffer; use crate::buffer::Buffer;
pub use crate::buffer::{Pos, Size}; pub use crate::buffer::{Pos, Size};
use crate::styled::Styled; use crate::styled::Styled;
use crate::widthdb::WidthDB; use crate::widthdb::WidthDB;
use crate::wrap; use crate::wrap;
#[derive(Debug, Default)] #[derive(Debug)]
pub struct Frame { pub struct Frame {
pub(crate) widthdb: WidthDB, pub(crate) widthdb: WidthDB,
pub(crate) buffer: Buffer, pub(crate) buffer: Buffer,
cursor: Option<Pos>, cursor: Option<Pos>,
pub(crate) tab_width: u8,
}
impl Default for Frame {
fn default() -> Self {
Self {
widthdb: Default::default(),
buffer: Default::default(),
cursor: None,
tab_width: 8,
}
}
} }
impl Frame { impl Frame {
@ -58,10 +68,11 @@ impl Frame {
} }
pub fn wrap(&mut self, text: &str, width: usize) -> Vec<usize> { pub fn wrap(&mut self, text: &str, width: usize) -> Vec<usize> {
wrap::wrap(text, width, &mut self.widthdb) wrap::wrap(&mut self.widthdb, self.tab_width, text, width)
} }
pub fn write<S: Into<Styled>>(&mut self, pos: Pos, styled: S) { pub fn write<S: Into<Styled>>(&mut self, pos: Pos, styled: S) {
self.buffer.write(&mut self.widthdb, pos, &styled.into()); self.buffer
.write(&mut self.widthdb, self.tab_width, pos, &styled.into());
} }
} }

View file

@ -59,6 +59,14 @@ impl Terminal {
Ok(()) Ok(())
} }
pub fn set_tab_width(&mut self, tab_width: u8) {
self.frame.tab_width = tab_width;
}
pub fn tab_width(&self) -> u8 {
self.frame.tab_width
}
pub fn set_measuring(&mut self, active: bool) { pub fn set_measuring(&mut self, active: bool) {
self.frame.widthdb.active = active; self.frame.widthdb.active = active;
} }

View file

@ -5,9 +5,11 @@ use unicode_segmentation::UnicodeSegmentation;
use crate::widthdb::WidthDB; use crate::widthdb::WidthDB;
// TODO Handle tabs separately? pub fn tab_width_at_column(tab_width: u8, col: usize) -> u8 {
// TODO Convert into an iterator? tab_width - (col % tab_width as usize) as u8
pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> { }
pub fn wrap(widthdb: &mut WidthDB, tab_width: u8, text: &str, width: usize) -> Vec<usize> {
let mut breaks = vec![]; let mut breaks = vec![];
let mut break_options = unicode_linebreak::linebreaks(text).peekable(); let mut break_options = unicode_linebreak::linebreaks(text).peekable();
@ -16,8 +18,10 @@ pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
let mut valid_break = None; let mut valid_break = None;
let mut valid_break_width = 0; let mut valid_break_width = 0;
// Width of the line at the current grapheme // Width of the line at the current grapheme (with and without trailing
// whitespace)
let mut current_width = 0; let mut current_width = 0;
let mut current_width_trimmed = 0;
for (gi, g) in text.grapheme_indices(true) { for (gi, g) in text.grapheme_indices(true) {
// Advance break options // Advance break options
@ -38,6 +42,7 @@ pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
valid_break = None; valid_break = None;
valid_break_width = 0; valid_break_width = 0;
current_width = 0; current_width = 0;
current_width_trimmed = 0;
} }
BreakOpportunity::Allowed => { BreakOpportunity::Allowed => {
valid_break = Some(bi); valid_break = Some(bi);
@ -46,31 +51,52 @@ pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
} }
} }
let grapheme_width: usize = widthdb.grapheme_width(g).into(); // Calculate widths after current grapheme
if current_width + grapheme_width > width { let g_width = if g == "\t" {
if current_width == 0 { tab_width_at_column(tab_width, current_width) as usize
// The grapheme is wider than the maximum width, so we'll allow } else {
// it, thereby forcing the following grapheme to break no matter widthdb.grapheme_width(g) as usize
// what (either because of a mandatory or allowed break, or via };
// a forced break). let mut new_width = current_width + g_width;
} else if let Some(bi) = valid_break { let mut new_width_trimmed = if g.chars().all(|c| c.is_whitespace()) {
// We can't fit the grapheme onto the current line, so we'll current_width_trimmed
// just break at the last valid break point. } else {
new_width
};
// Wrap at last break point if necessary
if new_width_trimmed > width {
if let Some(bi) = valid_break {
breaks.push(bi); breaks.push(bi);
current_width -= valid_break_width; new_width -= valid_break_width;
new_width_trimmed = new_width_trimmed.saturating_sub(valid_break_width);
valid_break = None; valid_break = None;
valid_break_width = 0; valid_break_width = 0;
} else {
// Forced break in the midde of a normally non-breakable chunk
// because there have been no valid break points yet.
breaks.push(gi);
valid_break = None;
valid_break_width = 0;
current_width = 0;
} }
} }
current_width += grapheme_width; // Perform a forced break if still necessary
if new_width_trimmed > width {
if new_width == g_width {
// The grapheme is the only thing on the current line and it is
// wider than the maximum width, so we'll allow it, thereby
// forcing the following grapheme to break no matter what
// (either because of a mandatory or allowed break, or via a
// forced break).
} else {
// Forced break in the midde of a normally non-breakable chunk
// because there are no valid break points.
breaks.push(gi);
new_width = 0;
new_width_trimmed = 0;
valid_break = None;
valid_break_width = 0;
}
}
// Update current width
current_width = new_width;
current_width_trimmed = new_width_trimmed;
} }
breaks breaks