Wrap text in a unicode-aware way
This commit is contained in:
parent
8fae7d2bf1
commit
37634139b0
5 changed files with 164 additions and 0 deletions
|
|
@ -5,5 +5,6 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
crossterm = "0.23.2"
|
||||
unicode-linebreak = "0.1.2"
|
||||
unicode-segmentation = "1.9.0"
|
||||
unicode-width = "0.1.9"
|
||||
|
|
|
|||
60
examples/text_wrapping.rs
Normal file
60
examples/text_wrapping.rs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
use crossterm::event::Event;
|
||||
use crossterm::style::ContentStyle;
|
||||
use toss::frame::{Frame, Pos};
|
||||
use toss::terminal::{Redraw, Terminal};
|
||||
|
||||
fn draw(f: &mut Frame) {
|
||||
let text = concat!(
|
||||
"This is a short paragraph in order to demonstrate unicode-aware word wrapping. ",
|
||||
"Resize your terminal to different widths to try it out. ",
|
||||
"After this sentence come two newlines, so it should always break here.\n",
|
||||
"\n",
|
||||
"Since the wrapping algorithm is aware of the Unicode Standard Annex #14, ",
|
||||
"it understands things like nonbreaking spaces: ",
|
||||
"This\u{00a0}sentence\u{00a0}is\u{00a0}separated\u{00a0}by\u{00a0}nonbreaking\u{00a0}spaces.\n",
|
||||
"\n",
|
||||
"It can also properly handle wide graphemes (like emoji 🤔), ",
|
||||
"including ones usually displayed incorrectly by terminal emulators, like 👩🔬 (a female scientist emoji).",
|
||||
);
|
||||
// TODO Actually use nbsp
|
||||
|
||||
let breaks = f.wrap(text, f.size().width.into());
|
||||
let lines = toss::split_at_indices(text, &breaks);
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
f.write(
|
||||
Pos::new(0, i as i32),
|
||||
line.trim_end(),
|
||||
ContentStyle::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn render_frame(term: &mut Terminal) {
|
||||
loop {
|
||||
// Must be called before rendering, otherwise the terminal has out-of-date
|
||||
// size information and will present garbage.
|
||||
term.autoresize().unwrap();
|
||||
|
||||
draw(term.frame());
|
||||
|
||||
if term.present().unwrap() == Redraw::NotRequired {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Automatically enters alternate screen and enables raw mode
|
||||
let mut term = Terminal::new().unwrap();
|
||||
|
||||
loop {
|
||||
// Render and display a frame. A full frame is displayed on the terminal
|
||||
// once this function exits.
|
||||
render_frame(&mut term);
|
||||
|
||||
// Exit if the user presses any buttons
|
||||
if !matches!(crossterm::event::read().unwrap(), Event::Resize(_, _)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ use crossterm::style::ContentStyle;
|
|||
use crate::buffer::Buffer;
|
||||
pub use crate::buffer::{Pos, Size};
|
||||
use crate::widthdb::WidthDB;
|
||||
use crate::wrap;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Frame {
|
||||
|
|
@ -55,6 +56,10 @@ impl Frame {
|
|||
self.widthdb.width(s)
|
||||
}
|
||||
|
||||
pub fn wrap(&mut self, text: &str, width: usize) -> Vec<usize> {
|
||||
wrap::wrap(text, width, &mut self.widthdb)
|
||||
}
|
||||
|
||||
pub fn write(&mut self, pos: Pos, content: &str, style: ContentStyle) {
|
||||
self.buffer.write(&mut self.widthdb, pos, content, style);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,3 +2,6 @@ mod buffer;
|
|||
pub mod frame;
|
||||
pub mod terminal;
|
||||
mod widthdb;
|
||||
mod wrap;
|
||||
|
||||
pub use wrap::split_at_indices;
|
||||
|
|
|
|||
95
src/wrap.rs
Normal file
95
src/wrap.rs
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
//! Word wrapping for text.
|
||||
|
||||
use unicode_linebreak::BreakOpportunity;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
use crate::widthdb::WidthDB;
|
||||
|
||||
// TODO Handle tabs separately?
|
||||
// TODO Convert into an iterator?
|
||||
pub fn wrap(text: &str, width: usize, widthdb: &mut WidthDB) -> Vec<usize> {
|
||||
let mut breaks = vec![];
|
||||
|
||||
let mut break_options = unicode_linebreak::linebreaks(text).peekable();
|
||||
|
||||
// The last valid break point encountered and its width
|
||||
let mut valid_break = None;
|
||||
let mut valid_break_width = 0;
|
||||
|
||||
// Width of the line at the current grapheme
|
||||
let mut current_width = 0;
|
||||
|
||||
for (gi, g) in text.grapheme_indices(true) {
|
||||
// Advance break options
|
||||
let (bi, b) = loop {
|
||||
let (bi, b) = break_options.peek().expect("not at end of string yet");
|
||||
if *bi < gi {
|
||||
break_options.next();
|
||||
} else {
|
||||
break (*bi, b);
|
||||
}
|
||||
};
|
||||
|
||||
// Evaluate break options at the current position
|
||||
if bi == gi {
|
||||
match b {
|
||||
BreakOpportunity::Mandatory => {
|
||||
breaks.push(bi);
|
||||
valid_break = None;
|
||||
valid_break_width = 0;
|
||||
current_width = 0;
|
||||
}
|
||||
BreakOpportunity::Allowed => {
|
||||
valid_break = Some(bi);
|
||||
valid_break_width = current_width;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let grapheme_width: usize = widthdb.grapheme_width(g).into();
|
||||
if current_width + grapheme_width > width {
|
||||
if current_width == 0 {
|
||||
// The grapheme is wider than the maximum width, so we'll allow
|
||||
// it, thereby forcing the following grapheme to break no matter
|
||||
// what (either because of a mandatory or allowed break, or via
|
||||
// a forced break).
|
||||
} else if let Some(bi) = valid_break {
|
||||
// We can't fit the grapheme onto the current line, so we'll
|
||||
// just break at the last valid break point.
|
||||
breaks.push(bi);
|
||||
current_width -= valid_break_width;
|
||||
valid_break = None;
|
||||
valid_break_width = 0;
|
||||
} else {
|
||||
// Forced break in the midde of a normally non-breakable chunk
|
||||
// because there have been no valid break points yet.
|
||||
breaks.push(gi);
|
||||
valid_break = None;
|
||||
valid_break_width = 0;
|
||||
current_width = 0;
|
||||
}
|
||||
}
|
||||
|
||||
current_width += grapheme_width;
|
||||
}
|
||||
|
||||
breaks
|
||||
}
|
||||
|
||||
pub fn split_at_indices<'a>(s: &'a str, indices: &[usize]) -> Vec<&'a str> {
|
||||
let mut slices = vec![];
|
||||
|
||||
let mut rest = s;
|
||||
let mut offset = 0;
|
||||
|
||||
for i in indices {
|
||||
let (left, right) = rest.split_at(i - offset);
|
||||
slices.push(left);
|
||||
rest = right;
|
||||
offset = *i;
|
||||
}
|
||||
|
||||
slices.push(rest);
|
||||
|
||||
slices
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue