Show more info and optionally links
This commit is contained in:
parent
ceb987bbbc
commit
693ae9eb81
4 changed files with 94 additions and 103 deletions
|
|
@ -1,87 +0,0 @@
|
||||||
use std::{
|
|
||||||
collections::HashSet,
|
|
||||||
fs::File,
|
|
||||||
io::{self, BufReader},
|
|
||||||
path::Path,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
data::{
|
|
||||||
adjacency_list::AdjacencyList,
|
|
||||||
info::{LinkInfo, PageInfo},
|
|
||||||
store,
|
|
||||||
},
|
|
||||||
util,
|
|
||||||
};
|
|
||||||
|
|
||||||
fn links_from(data: &AdjacencyList<PageInfo, LinkInfo>, idx: u32) -> HashSet<u32> {
|
|
||||||
data.links(idx).map(|(_, ld)| ld.to).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn links_to(data: &AdjacencyList<PageInfo, LinkInfo>, idx: u32) -> HashSet<u32> {
|
|
||||||
let mut links = HashSet::<u32>::new();
|
|
||||||
for (pi, _) in data.pages() {
|
|
||||||
for (_, ld) in data.links(pi) {
|
|
||||||
if ld.to == idx {
|
|
||||||
links.insert(pi);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
links
|
|
||||||
}
|
|
||||||
|
|
||||||
fn print_links(data: &AdjacencyList<PageInfo, LinkInfo>, name: &str, links: &HashSet<u32>) {
|
|
||||||
let mut links = links
|
|
||||||
.iter()
|
|
||||||
.map(|pi| {
|
|
||||||
let page = data.page(*pi);
|
|
||||||
(&page.data.title as &str, page.data.redirect)
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
links.sort();
|
|
||||||
|
|
||||||
println!(">> {name} ({}):", links.len());
|
|
||||||
for (title, redirect) in links {
|
|
||||||
if redirect {
|
|
||||||
println!("v {title}");
|
|
||||||
} else {
|
|
||||||
println!("- {title}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn run(datafile: &Path, page: &str) -> io::Result<()> {
|
|
||||||
println!(">> Import");
|
|
||||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
|
||||||
let data = store::read_adjacency_list(&mut databuf)?;
|
|
||||||
|
|
||||||
println!(">> Locate page");
|
|
||||||
let idx = util::resolve_redirects(&data, util::find_index_of_title(&data.pages, page));
|
|
||||||
println!("Page: {:?}", data.page(idx).data.title);
|
|
||||||
|
|
||||||
println!(">> Find links");
|
|
||||||
let from = links_from(&data, idx);
|
|
||||||
let to = links_to(&data, idx);
|
|
||||||
let twins = from.intersection(&to).copied().collect::<HashSet<_>>();
|
|
||||||
let twinless_from = from.difference(&twins).copied().collect::<HashSet<_>>();
|
|
||||||
let twinless_to = to.difference(&twins).copied().collect::<HashSet<_>>();
|
|
||||||
|
|
||||||
println!();
|
|
||||||
print_links(&data, "From", &from);
|
|
||||||
|
|
||||||
println!();
|
|
||||||
print_links(&data, "To", &to);
|
|
||||||
|
|
||||||
println!();
|
|
||||||
print_links(&data, "Twins", &twins);
|
|
||||||
|
|
||||||
println!();
|
|
||||||
print_links(&data, "From without twins", &twinless_from);
|
|
||||||
|
|
||||||
println!();
|
|
||||||
print_links(&data, "To without twins", &twinless_to);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
@ -48,12 +48,7 @@ impl Cmd {
|
||||||
println!();
|
println!();
|
||||||
println!("Path found (cost {cost}, length {}):", path.len());
|
println!("Path found (cost {cost}, length {}):", path.len());
|
||||||
for page in path {
|
for page in path {
|
||||||
let info = &data.pages[page.usize()];
|
println!("{}", util::fmt_page(&data.pages[page.usize()]));
|
||||||
if info.redirect {
|
|
||||||
println!("v {:?}", info.title);
|
|
||||||
} else {
|
|
||||||
println!("- {:?}", info.title);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use std::{io, path::Path};
|
use std::{collections::HashSet, io, path::Path};
|
||||||
|
|
||||||
use thousands::Separable;
|
use thousands::Separable;
|
||||||
|
|
||||||
|
|
@ -11,6 +11,10 @@ use crate::{
|
||||||
#[derive(Debug, clap::Parser)]
|
#[derive(Debug, clap::Parser)]
|
||||||
pub struct Cmd {
|
pub struct Cmd {
|
||||||
title: String,
|
title: String,
|
||||||
|
|
||||||
|
/// Print links in more detail.
|
||||||
|
#[arg(long, short)]
|
||||||
|
links: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Cmd {
|
impl Cmd {
|
||||||
|
|
@ -49,25 +53,96 @@ impl Cmd {
|
||||||
page.length.separate_with_underscores()
|
page.length.separate_with_underscores()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let outlinks = data.graph.edge_slice(node).to_vec();
|
||||||
|
let inlinks = data
|
||||||
|
.graph
|
||||||
|
.edges()
|
||||||
|
.filter(|(_, target)| *target == node)
|
||||||
|
.map(|(source, _)| source)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let outlinks_set = outlinks.iter().copied().collect::<HashSet<_>>();
|
||||||
|
let inlinks_set = inlinks.iter().copied().collect::<HashSet<_>>();
|
||||||
|
let twins_set = outlinks_set
|
||||||
|
.intersection(&inlinks_set)
|
||||||
|
.copied()
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"{:>W_LABEL$}: {:>W_NUM$}",
|
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||||
"Links (out)",
|
"Links (out)",
|
||||||
data.graph
|
outlinks.len().separate_with_underscores()
|
||||||
.edge_range(node)
|
);
|
||||||
.len()
|
|
||||||
.separate_with_underscores()
|
println!(
|
||||||
|
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||||
|
"unique",
|
||||||
|
outlinks_set.len().separate_with_underscores()
|
||||||
);
|
);
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"{:>W_LABEL$}: {:>W_NUM$}",
|
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||||
"Links (in)",
|
"Links (in)",
|
||||||
data.graph
|
inlinks.len().separate_with_underscores()
|
||||||
.edges()
|
|
||||||
.filter(|(_, target)| *target == node)
|
|
||||||
.count()
|
|
||||||
.separate_with_underscores()
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||||
|
"unique",
|
||||||
|
inlinks_set.len().separate_with_underscores()
|
||||||
|
);
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"{:>W_LABEL$}: {:>W_NUM$}",
|
||||||
|
"Twins",
|
||||||
|
twins_set.len().separate_with_underscores()
|
||||||
|
);
|
||||||
|
|
||||||
|
if self.links {
|
||||||
|
let mut twin_pages = twins_set
|
||||||
|
.iter()
|
||||||
|
.map(|n| &data.pages[n.usize()])
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut outlink_only_pages = outlinks_set
|
||||||
|
.difference(&twins_set)
|
||||||
|
.map(|n| &data.pages[n.usize()])
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut inlink_only_pages = inlinks_set
|
||||||
|
.difference(&twins_set)
|
||||||
|
.map(|n| &data.pages[n.usize()])
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
twin_pages.sort_by_key(|p| &p.title);
|
||||||
|
outlink_only_pages.sort_by_key(|p| &p.title);
|
||||||
|
inlink_only_pages.sort_by_key(|p| &p.title);
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("Twins ({}):", twin_pages.len().separate_with_underscores());
|
||||||
|
for page in twin_pages {
|
||||||
|
println!("{}", util::fmt_page(page));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!(
|
||||||
|
"Only outlinks ({}):",
|
||||||
|
outlink_only_pages.len().separate_with_underscores()
|
||||||
|
);
|
||||||
|
for page in outlink_only_pages {
|
||||||
|
println!("{}", util::fmt_page(page));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!(
|
||||||
|
"Only inlinks ({}):",
|
||||||
|
inlink_only_pages.len().separate_with_underscores()
|
||||||
|
);
|
||||||
|
for page in inlink_only_pages {
|
||||||
|
println!("{}", util::fmt_page(page));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
node = match data.redirect_target(node) {
|
node = match data.redirect_target(node) {
|
||||||
Some(target) => target,
|
Some(target) => target,
|
||||||
None => break,
|
None => break,
|
||||||
|
|
|
||||||
|
|
@ -170,3 +170,11 @@ pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx {
|
||||||
pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {
|
pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {
|
||||||
resolve_redirects(data, locate_title(normalizer, data, title))
|
resolve_redirects(data, locate_title(normalizer, data, title))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn fmt_page(page: &Page) -> String {
|
||||||
|
if page.redirect {
|
||||||
|
format!("v {}", page.title)
|
||||||
|
} else {
|
||||||
|
format!("- {}", page.title)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue