Improve word wrapping

Now supports long trailing whitespace as well as tabs.
This commit is contained in:
Joscha 2022-07-04 18:56:52 +02:00
parent 9b0d80873f
commit 11b2211fad
5 changed files with 106 additions and 41 deletions

View file

@ -14,7 +14,20 @@ fn draw(f: &mut Frame) {
"This\u{00a0}sentence\u{00a0}is\u{00a0}separated\u{00a0}by\u{00a0}non-\u{2060}breaking\u{00a0}spaces.\n",
"\n",
"It can also properly handle wide graphemes (like emoji 🤔), ",
"including ones usually displayed incorrectly by terminal emulators, like 👩‍🔬 (a female scientist emoji).",
"including ones usually displayed incorrectly by terminal emulators, like 👩‍🔬 (a female scientist emoji).\n",
"\n",
"Finally, tabs are supported as well. ",
"The following text is rendered with a tab width of 4:\n",
"\tx\n",
"1\tx\n",
"12\tx\n",
"123\tx\n",
"1234\tx\n",
"12345\tx\n",
"123456\tx\n",
"1234567\tx\n",
"12345678\tx\n",
"123456789\tx\n",
);
let breaks = f.wrap(text, f.size().width.into());
@ -46,6 +59,7 @@ fn main() {
// Automatically enters alternate screen and enables raw mode
let mut term = Terminal::new().unwrap();
term.set_measuring(true);
term.set_tab_width(4);
loop {
// Render and display a frame. A full frame is displayed on the terminal

View file

@ -1,8 +1,8 @@
use crossterm::style::ContentStyle;
use unicode_segmentation::UnicodeSegmentation;
use crate::styled::Styled;
use crate::widthdb::WidthDB;
use crate::wrap;
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct Size {
@ -128,25 +128,31 @@ impl Buffer {
}
}
pub fn write(&mut self, widthdb: &mut WidthDB, mut pos: Pos, styled: &Styled) {
pub fn write(&mut self, widthdb: &mut WidthDB, tab_width: u8, pos: Pos, styled: &Styled) {
// If we're not even visible, there's nothing to do
if pos.y < 0 || pos.y >= self.size.height as i32 {
return;
}
let y = pos.y as u16;
let mut col: usize = 0;
for styled_grapheme in styled.styled_graphemes() {
let width = widthdb.grapheme_width(styled_grapheme.content());
if width > 0 {
self.write_grapheme(
pos.x,
y,
width,
styled_grapheme.content(),
*styled_grapheme.style(),
);
let x = pos.x + col as i32;
let g = *styled_grapheme.content();
let style = *styled_grapheme.style();
if g == "\t" {
let width = wrap::tab_width_at_column(tab_width, col);
col += width as usize;
for dx in 0..width {
self.write_grapheme(x + dx as i32, y, width, " ", style);
}
} else {
let width = widthdb.grapheme_width(g);
col += width as usize;
if width > 0 {
self.write_grapheme(x, y, width, g, style);
}
}
pos.x += width as i32;
}
}

View file

@ -1,18 +1,28 @@
//! Rendering the next frame.
use crossterm::style::ContentStyle;
use crate::buffer::Buffer;
pub use crate::buffer::{Pos, Size};
use crate::styled::Styled;
use crate::widthdb::WidthDB;
use crate::wrap;
#[derive(Debug, Default)]
#[derive(Debug)]
pub struct Frame {
pub(crate) widthdb: WidthDB,
pub(crate) buffer: Buffer,
cursor: Option<Pos>,
pub(crate) tab_width: u8,
}
impl Default for Frame {
fn default() -> Self {
Self {
widthdb: Default::default(),
buffer: Default::default(),
cursor: None,
tab_width: 8,
}
}
}
impl Frame {
@ -58,10 +68,11 @@ impl Frame {
}
pub fn wrap(&mut self, text: &str, width: usize) -> Vec<usize> {
wrap::wrap(text, width, &mut self.widthdb)
wrap::wrap(&mut self.widthdb, self.tab_width, text, width)
}
pub fn write<S: Into<Styled>>(&mut self, pos: Pos, styled: S) {
self.buffer.write(&mut self.widthdb, pos, &styled.into());
self.buffer
.write(&mut self.widthdb, self.tab_width, pos, &styled.into());
}
}

View file

@ -59,6 +59,14 @@ impl Terminal {
Ok(())
}
pub fn set_tab_width(&mut self, tab_width: u8) {
self.frame.tab_width = tab_width;
}
pub fn tab_width(&self) -> u8 {
self.frame.tab_width
}
pub fn set_measuring(&mut self, active: bool) {
self.frame.widthdb.active = active;
}

View file

@ -5,9 +5,11 @@ use unicode_segmentation::UnicodeSegmentation;
use crate::widthdb::WidthDB;
// TODO Handle tabs separately?
// TODO Convert into an iterator?
pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
pub fn tab_width_at_column(tab_width: u8, col: usize) -> u8 {
tab_width - (col % tab_width as usize) as u8
}
pub fn wrap(widthdb: &mut WidthDB, tab_width: u8, text: &str, width: usize) -> Vec<usize> {
let mut breaks = vec![];
let mut break_options = unicode_linebreak::linebreaks(text).peekable();
@ -16,8 +18,10 @@ pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
let mut valid_break = None;
let mut valid_break_width = 0;
// Width of the line at the current grapheme
// Width of the line at the current grapheme (with and without trailing
// whitespace)
let mut current_width = 0;
let mut current_width_trimmed = 0;
for (gi, g) in text.grapheme_indices(true) {
// Advance break options
@ -38,6 +42,7 @@ pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
valid_break = None;
valid_break_width = 0;
current_width = 0;
current_width_trimmed = 0;
}
BreakOpportunity::Allowed => {
valid_break = Some(bi);
@ -46,31 +51,52 @@ pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
}
}
let grapheme_width: usize = widthdb.grapheme_width(g).into();
if current_width + grapheme_width > width {
if current_width == 0 {
// The grapheme is wider than the maximum width, so we'll allow
// it, thereby forcing the following grapheme to break no matter
// what (either because of a mandatory or allowed break, or via
// a forced break).
} else if let Some(bi) = valid_break {
// We can't fit the grapheme onto the current line, so we'll
// just break at the last valid break point.
breaks.push(bi);
current_width -= valid_break_width;
valid_break = None;
valid_break_width = 0;
// Calculate widths after current grapheme
let g_width = if g == "\t" {
tab_width_at_column(tab_width, current_width) as usize
} else {
// Forced break in the midde of a normally non-breakable chunk
// because there have been no valid break points yet.
breaks.push(gi);
widthdb.grapheme_width(g) as usize
};
let mut new_width = current_width + g_width;
let mut new_width_trimmed = if g.chars().all(|c| c.is_whitespace()) {
current_width_trimmed
} else {
new_width
};
// Wrap at last break point if necessary
if new_width_trimmed > width {
if let Some(bi) = valid_break {
breaks.push(bi);
new_width -= valid_break_width;
new_width_trimmed = new_width_trimmed.saturating_sub(valid_break_width);
valid_break = None;
valid_break_width = 0;
current_width = 0;
}
}
current_width += grapheme_width;
// Perform a forced break if still necessary
if new_width_trimmed > width {
if new_width == g_width {
// The grapheme is the only thing on the current line and it is
// wider than the maximum width, so we'll allow it, thereby
// forcing the following grapheme to break no matter what
// (either because of a mandatory or allowed break, or via a
// forced break).
} else {
// Forced break in the midde of a normally non-breakable chunk
// because there are no valid break points.
breaks.push(gi);
new_width = 0;
new_width_trimmed = 0;
valid_break = None;
valid_break_width = 0;
}
}
// Update current width
current_width = new_width;
current_width_trimmed = new_width_trimmed;
}
breaks