Show more info and optionally links

This commit is contained in:
Joscha 2024-12-31 15:13:11 +01:00
parent ceb987bbbc
commit 693ae9eb81
4 changed files with 94 additions and 103 deletions

View file

@ -1,87 +0,0 @@
use std::{
collections::HashSet,
fs::File,
io::{self, BufReader},
path::Path,
};
use crate::{
data::{
adjacency_list::AdjacencyList,
info::{LinkInfo, PageInfo},
store,
},
util,
};
fn links_from(data: &AdjacencyList<PageInfo, LinkInfo>, idx: u32) -> HashSet<u32> {
data.links(idx).map(|(_, ld)| ld.to).collect()
}
fn links_to(data: &AdjacencyList<PageInfo, LinkInfo>, idx: u32) -> HashSet<u32> {
let mut links = HashSet::<u32>::new();
for (pi, _) in data.pages() {
for (_, ld) in data.links(pi) {
if ld.to == idx {
links.insert(pi);
continue;
}
}
}
links
}
fn print_links(data: &AdjacencyList<PageInfo, LinkInfo>, name: &str, links: &HashSet<u32>) {
let mut links = links
.iter()
.map(|pi| {
let page = data.page(*pi);
(&page.data.title as &str, page.data.redirect)
})
.collect::<Vec<_>>();
links.sort();
println!(">> {name} ({}):", links.len());
for (title, redirect) in links {
if redirect {
println!("v {title}");
} else {
println!("- {title}");
}
}
}
pub fn run(datafile: &Path, page: &str) -> io::Result<()> {
println!(">> Import");
let mut databuf = BufReader::new(File::open(datafile)?);
let data = store::read_adjacency_list(&mut databuf)?;
println!(">> Locate page");
let idx = util::resolve_redirects(&data, util::find_index_of_title(&data.pages, page));
println!("Page: {:?}", data.page(idx).data.title);
println!(">> Find links");
let from = links_from(&data, idx);
let to = links_to(&data, idx);
let twins = from.intersection(&to).copied().collect::<HashSet<_>>();
let twinless_from = from.difference(&twins).copied().collect::<HashSet<_>>();
let twinless_to = to.difference(&twins).copied().collect::<HashSet<_>>();
println!();
print_links(&data, "From", &from);
println!();
print_links(&data, "To", &to);
println!();
print_links(&data, "Twins", &twins);
println!();
print_links(&data, "From without twins", &twinless_from);
println!();
print_links(&data, "To without twins", &twinless_to);
Ok(())
}

View file

@ -48,12 +48,7 @@ impl Cmd {
println!();
println!("Path found (cost {cost}, length {}):", path.len());
for page in path {
let info = &data.pages[page.usize()];
if info.redirect {
println!("v {:?}", info.title);
} else {
println!("- {:?}", info.title);
}
println!("{}", util::fmt_page(&data.pages[page.usize()]));
}
Ok(())

View file

@ -1,4 +1,4 @@
use std::{io, path::Path};
use std::{collections::HashSet, io, path::Path};
use thousands::Separable;
@ -11,6 +11,10 @@ use crate::{
#[derive(Debug, clap::Parser)]
pub struct Cmd {
title: String,
/// Print links in more detail.
#[arg(long, short)]
links: bool,
}
impl Cmd {
@ -49,25 +53,96 @@ impl Cmd {
page.length.separate_with_underscores()
);
let outlinks = data.graph.edge_slice(node).to_vec();
let inlinks = data
.graph
.edges()
.filter(|(_, target)| *target == node)
.map(|(source, _)| source)
.collect::<Vec<_>>();
let outlinks_set = outlinks.iter().copied().collect::<HashSet<_>>();
let inlinks_set = inlinks.iter().copied().collect::<HashSet<_>>();
let twins_set = outlinks_set
.intersection(&inlinks_set)
.copied()
.collect::<HashSet<_>>();
println!(
"{:>W_LABEL$}: {:>W_NUM$}",
"Links (out)",
data.graph
.edge_range(node)
.len()
.separate_with_underscores()
outlinks.len().separate_with_underscores()
);
println!(
"{:>W_LABEL$}: {:>W_NUM$}",
"unique",
outlinks_set.len().separate_with_underscores()
);
println!(
"{:>W_LABEL$}: {:>W_NUM$}",
"Links (in)",
data.graph
.edges()
.filter(|(_, target)| *target == node)
.count()
.separate_with_underscores()
inlinks.len().separate_with_underscores()
);
println!(
"{:>W_LABEL$}: {:>W_NUM$}",
"unique",
inlinks_set.len().separate_with_underscores()
);
println!(
"{:>W_LABEL$}: {:>W_NUM$}",
"Twins",
twins_set.len().separate_with_underscores()
);
if self.links {
let mut twin_pages = twins_set
.iter()
.map(|n| &data.pages[n.usize()])
.collect::<Vec<_>>();
let mut outlink_only_pages = outlinks_set
.difference(&twins_set)
.map(|n| &data.pages[n.usize()])
.collect::<Vec<_>>();
let mut inlink_only_pages = inlinks_set
.difference(&twins_set)
.map(|n| &data.pages[n.usize()])
.collect::<Vec<_>>();
twin_pages.sort_by_key(|p| &p.title);
outlink_only_pages.sort_by_key(|p| &p.title);
inlink_only_pages.sort_by_key(|p| &p.title);
println!();
println!("Twins ({}):", twin_pages.len().separate_with_underscores());
for page in twin_pages {
println!("{}", util::fmt_page(page));
}
println!();
println!(
"Only outlinks ({}):",
outlink_only_pages.len().separate_with_underscores()
);
for page in outlink_only_pages {
println!("{}", util::fmt_page(page));
}
println!();
println!(
"Only inlinks ({}):",
inlink_only_pages.len().separate_with_underscores()
);
for page in inlink_only_pages {
println!("{}", util::fmt_page(page));
}
}
node = match data.redirect_target(node) {
Some(target) => target,
None => break,

View file

@ -170,3 +170,11 @@ pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx {
pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {
resolve_redirects(data, locate_title(normalizer, data, title))
}
pub fn fmt_page(page: &Page) -> String {
if page.redirect {
format!("v {}", page.title)
} else {
format!("- {}", page.title)
}
}