Wrap text in a unicode-aware way

This commit is contained in:
Joscha 2022-05-28 20:55:22 +02:00
parent 8fae7d2bf1
commit 37634139b0
5 changed files with 164 additions and 0 deletions

View file

@ -5,5 +5,6 @@ edition = "2021"
[dependencies] [dependencies]
crossterm = "0.23.2" crossterm = "0.23.2"
unicode-linebreak = "0.1.2"
unicode-segmentation = "1.9.0" unicode-segmentation = "1.9.0"
unicode-width = "0.1.9" unicode-width = "0.1.9"

60
examples/text_wrapping.rs Normal file
View file

@ -0,0 +1,60 @@
use crossterm::event::Event;
use crossterm::style::ContentStyle;
use toss::frame::{Frame, Pos};
use toss::terminal::{Redraw, Terminal};
fn draw(f: &mut Frame) {
let text = concat!(
"This is a short paragraph in order to demonstrate unicode-aware word wrapping. ",
"Resize your terminal to different widths to try it out. ",
"After this sentence come two newlines, so it should always break here.\n",
"\n",
"Since the wrapping algorithm is aware of the Unicode Standard Annex #14, ",
"it understands things like nonbreaking spaces: ",
"This\u{00a0}sentence\u{00a0}is\u{00a0}separated\u{00a0}by\u{00a0}nonbreaking\u{00a0}spaces.\n",
"\n",
"It can also properly handle wide graphemes (like emoji 🤔), ",
"including ones usually displayed incorrectly by terminal emulators, like 👩‍🔬 (a female scientist emoji).",
);
// TODO Actually use nbsp
let breaks = f.wrap(text, f.size().width.into());
let lines = toss::split_at_indices(text, &breaks);
for (i, line) in lines.iter().enumerate() {
f.write(
Pos::new(0, i as i32),
line.trim_end(),
ContentStyle::default(),
);
}
}
fn render_frame(term: &mut Terminal) {
loop {
// Must be called before rendering, otherwise the terminal has out-of-date
// size information and will present garbage.
term.autoresize().unwrap();
draw(term.frame());
if term.present().unwrap() == Redraw::NotRequired {
break;
}
}
}
fn main() {
// Automatically enters alternate screen and enables raw mode
let mut term = Terminal::new().unwrap();
loop {
// Render and display a frame. A full frame is displayed on the terminal
// once this function exits.
render_frame(&mut term);
// Exit if the user presses any buttons
if !matches!(crossterm::event::read().unwrap(), Event::Resize(_, _)) {
break;
}
}
}

View file

@ -5,6 +5,7 @@ use crossterm::style::ContentStyle;
use crate::buffer::Buffer; use crate::buffer::Buffer;
pub use crate::buffer::{Pos, Size}; pub use crate::buffer::{Pos, Size};
use crate::widthdb::WidthDB; use crate::widthdb::WidthDB;
use crate::wrap;
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct Frame { pub struct Frame {
@ -55,6 +56,10 @@ impl Frame {
self.widthdb.width(s) self.widthdb.width(s)
} }
pub fn wrap(&mut self, text: &str, width: usize) -> Vec<usize> {
wrap::wrap(text, width, &mut self.widthdb)
}
pub fn write(&mut self, pos: Pos, content: &str, style: ContentStyle) { pub fn write(&mut self, pos: Pos, content: &str, style: ContentStyle) {
self.buffer.write(&mut self.widthdb, pos, content, style); self.buffer.write(&mut self.widthdb, pos, content, style);
} }

View file

@ -2,3 +2,6 @@ mod buffer;
pub mod frame; pub mod frame;
pub mod terminal; pub mod terminal;
mod widthdb; mod widthdb;
mod wrap;
pub use wrap::split_at_indices;

95
src/wrap.rs Normal file
View file

@ -0,0 +1,95 @@
//! Word wrapping for text.
use unicode_linebreak::BreakOpportunity;
use unicode_segmentation::UnicodeSegmentation;
use crate::widthdb::WidthDB;
// TODO Handle tabs separately?
// TODO Convert into an iterator?
pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
let mut breaks = vec![];
let mut break_options = unicode_linebreak::linebreaks(text).peekable();
// The last valid break point encountered and its width
let mut valid_break = None;
let mut valid_break_width = 0;
// Width of the line at the current grapheme
let mut current_width = 0;
for (gi, g) in text.grapheme_indices(true) {
// Advance break options
let (bi, b) = loop {
let (bi, b) = break_options.peek().expect("not at end of string yet");
if *bi < gi {
break_options.next();
} else {
break (*bi, b);
}
};
// Evaluate break options at the current position
if bi == gi {
match b {
BreakOpportunity::Mandatory => {
breaks.push(bi);
valid_break = None;
valid_break_width = 0;
current_width = 0;
}
BreakOpportunity::Allowed => {
valid_break = Some(bi);
valid_break_width = current_width;
}
}
}
let grapheme_width: usize = widthdb.grapheme_width(g).into();
if current_width + grapheme_width > width {
if current_width == 0 {
// The grapheme is wider than the maximum width, so we'll allow
// it, thereby forcing the following grapheme to break no matter
// what (either because of a mandatory or allowed break, or via
// a forced break).
} else if let Some(bi) = valid_break {
// We can't fit the grapheme onto the current line, so we'll
// just break at the last valid break point.
breaks.push(bi);
current_width -= valid_break_width;
valid_break = None;
valid_break_width = 0;
} else {
// Forced break in the midde of a normally non-breakable chunk
// because there have been no valid break points yet.
breaks.push(gi);
valid_break = None;
valid_break_width = 0;
current_width = 0;
}
}
current_width += grapheme_width;
}
breaks
}
pub fn split_at_indices<'a>(s: &'a str, indices: &[usize]) -> Vec<&'a str> {
let mut slices = vec![];
let mut rest = s;
let mut offset = 0;
for i in indices {
let (left, right) = rest.split_at(i - offset);
slices.push(left);
rest = right;
offset = *i;
}
slices.push(rest);
slices
}