Show more info and optionally links
This commit is contained in:
parent
ceb987bbbc
commit
693ae9eb81
4 changed files with 94 additions and 103 deletions
|
|
@ -1,87 +0,0 @@
|
|||
use std::{
|
||||
collections::HashSet,
|
||||
fs::File,
|
||||
io::{self, BufReader},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
data::{
|
||||
adjacency_list::AdjacencyList,
|
||||
info::{LinkInfo, PageInfo},
|
||||
store,
|
||||
},
|
||||
util,
|
||||
};
|
||||
|
||||
fn links_from(data: &AdjacencyList<PageInfo, LinkInfo>, idx: u32) -> HashSet<u32> {
|
||||
data.links(idx).map(|(_, ld)| ld.to).collect()
|
||||
}
|
||||
|
||||
fn links_to(data: &AdjacencyList<PageInfo, LinkInfo>, idx: u32) -> HashSet<u32> {
|
||||
let mut links = HashSet::<u32>::new();
|
||||
for (pi, _) in data.pages() {
|
||||
for (_, ld) in data.links(pi) {
|
||||
if ld.to == idx {
|
||||
links.insert(pi);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
links
|
||||
}
|
||||
|
||||
fn print_links(data: &AdjacencyList<PageInfo, LinkInfo>, name: &str, links: &HashSet<u32>) {
|
||||
let mut links = links
|
||||
.iter()
|
||||
.map(|pi| {
|
||||
let page = data.page(*pi);
|
||||
(&page.data.title as &str, page.data.redirect)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
links.sort();
|
||||
|
||||
println!(">> {name} ({}):", links.len());
|
||||
for (title, redirect) in links {
|
||||
if redirect {
|
||||
println!("v {title}");
|
||||
} else {
|
||||
println!("- {title}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(datafile: &Path, page: &str) -> io::Result<()> {
|
||||
println!(">> Import");
|
||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||
let data = store::read_adjacency_list(&mut databuf)?;
|
||||
|
||||
println!(">> Locate page");
|
||||
let idx = util::resolve_redirects(&data, util::find_index_of_title(&data.pages, page));
|
||||
println!("Page: {:?}", data.page(idx).data.title);
|
||||
|
||||
println!(">> Find links");
|
||||
let from = links_from(&data, idx);
|
||||
let to = links_to(&data, idx);
|
||||
let twins = from.intersection(&to).copied().collect::<HashSet<_>>();
|
||||
let twinless_from = from.difference(&twins).copied().collect::<HashSet<_>>();
|
||||
let twinless_to = to.difference(&twins).copied().collect::<HashSet<_>>();
|
||||
|
||||
println!();
|
||||
print_links(&data, "From", &from);
|
||||
|
||||
println!();
|
||||
print_links(&data, "To", &to);
|
||||
|
||||
println!();
|
||||
print_links(&data, "Twins", &twins);
|
||||
|
||||
println!();
|
||||
print_links(&data, "From without twins", &twinless_from);
|
||||
|
||||
println!();
|
||||
print_links(&data, "To without twins", &twinless_to);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -48,12 +48,7 @@ impl Cmd {
|
|||
println!();
|
||||
println!("Path found (cost {cost}, length {}):", path.len());
|
||||
for page in path {
|
||||
let info = &data.pages[page.usize()];
|
||||
if info.redirect {
|
||||
println!("v {:?}", info.title);
|
||||
} else {
|
||||
println!("- {:?}", info.title);
|
||||
}
|
||||
println!("{}", util::fmt_page(&data.pages[page.usize()]));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use std::{io, path::Path};
|
||||
use std::{collections::HashSet, io, path::Path};
|
||||
|
||||
use thousands::Separable;
|
||||
|
||||
|
|
@ -11,6 +11,10 @@ use crate::{
|
|||
#[derive(Debug, clap::Parser)]
|
||||
pub struct Cmd {
|
||||
title: String,
|
||||
|
||||
/// Print links in more detail.
|
||||
#[arg(long, short)]
|
||||
links: bool,
|
||||
}
|
||||
|
||||
impl Cmd {
|
||||
|
|
@ -49,25 +53,96 @@ impl Cmd {
|
|||
page.length.separate_with_underscores()
|
||||
);
|
||||
|
||||
let outlinks = data.graph.edge_slice(node).to_vec();
|
||||
let inlinks = data
|
||||
.graph
|
||||
.edges()
|
||||
.filter(|(_, target)| *target == node)
|
||||
.map(|(source, _)| source)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outlinks_set = outlinks.iter().copied().collect::<HashSet<_>>();
|
||||
let inlinks_set = inlinks.iter().copied().collect::<HashSet<_>>();
|
||||
let twins_set = outlinks_set
|
||||
.intersection(&inlinks_set)
|
||||
.copied()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
println!(
|
||||
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||
"Links (out)",
|
||||
data.graph
|
||||
.edge_range(node)
|
||||
.len()
|
||||
.separate_with_underscores()
|
||||
outlinks.len().separate_with_underscores()
|
||||
);
|
||||
|
||||
println!(
|
||||
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||
"unique",
|
||||
outlinks_set.len().separate_with_underscores()
|
||||
);
|
||||
|
||||
println!(
|
||||
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||
"Links (in)",
|
||||
data.graph
|
||||
.edges()
|
||||
.filter(|(_, target)| *target == node)
|
||||
.count()
|
||||
.separate_with_underscores()
|
||||
inlinks.len().separate_with_underscores()
|
||||
);
|
||||
|
||||
println!(
|
||||
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||
"unique",
|
||||
inlinks_set.len().separate_with_underscores()
|
||||
);
|
||||
|
||||
println!(
|
||||
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||
"Twins",
|
||||
twins_set.len().separate_with_underscores()
|
||||
);
|
||||
|
||||
if self.links {
|
||||
let mut twin_pages = twins_set
|
||||
.iter()
|
||||
.map(|n| &data.pages[n.usize()])
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut outlink_only_pages = outlinks_set
|
||||
.difference(&twins_set)
|
||||
.map(|n| &data.pages[n.usize()])
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut inlink_only_pages = inlinks_set
|
||||
.difference(&twins_set)
|
||||
.map(|n| &data.pages[n.usize()])
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
twin_pages.sort_by_key(|p| &p.title);
|
||||
outlink_only_pages.sort_by_key(|p| &p.title);
|
||||
inlink_only_pages.sort_by_key(|p| &p.title);
|
||||
|
||||
println!();
|
||||
println!("Twins ({}):", twin_pages.len().separate_with_underscores());
|
||||
for page in twin_pages {
|
||||
println!("{}", util::fmt_page(page));
|
||||
}
|
||||
|
||||
println!();
|
||||
println!(
|
||||
"Only outlinks ({}):",
|
||||
outlink_only_pages.len().separate_with_underscores()
|
||||
);
|
||||
for page in outlink_only_pages {
|
||||
println!("{}", util::fmt_page(page));
|
||||
}
|
||||
|
||||
println!();
|
||||
println!(
|
||||
"Only inlinks ({}):",
|
||||
inlink_only_pages.len().separate_with_underscores()
|
||||
);
|
||||
for page in inlink_only_pages {
|
||||
println!("{}", util::fmt_page(page));
|
||||
}
|
||||
}
|
||||
|
||||
node = match data.redirect_target(node) {
|
||||
Some(target) => target,
|
||||
None => break,
|
||||
|
|
|
|||
|
|
@ -170,3 +170,11 @@ pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx {
|
|||
pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {
|
||||
resolve_redirects(data, locate_title(normalizer, data, title))
|
||||
}
|
||||
|
||||
pub fn fmt_page(page: &Page) -> String {
|
||||
if page.redirect {
|
||||
format!("v {}", page.title)
|
||||
} else {
|
||||
format!("- {}", page.title)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue