Compute article rankings by link degrees
This commit is contained in:
parent
5b8feb6368
commit
cdf9a7d7ae
2 changed files with 87 additions and 0 deletions
|
|
@ -1,3 +1,4 @@
|
||||||
|
mod degrees;
|
||||||
mod redirects;
|
mod redirects;
|
||||||
|
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
@ -8,6 +9,7 @@ use crate::data::Data;
|
||||||
|
|
||||||
#[derive(Debug, clap::Parser)]
|
#[derive(Debug, clap::Parser)]
|
||||||
enum Command {
|
enum Command {
|
||||||
|
Degrees(degrees::Cmd),
|
||||||
Redirects(redirects::Cmd),
|
Redirects(redirects::Cmd),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -22,6 +24,7 @@ impl Cmd {
|
||||||
pub fn run(self, data: Data) -> io::Result<()> {
|
pub fn run(self, data: Data) -> io::Result<()> {
|
||||||
if let Some(cmd) = self.command {
|
if let Some(cmd) = self.command {
|
||||||
return match cmd {
|
return match cmd {
|
||||||
|
Command::Degrees(cmd) => cmd.run(data),
|
||||||
Command::Redirects(cmd) => cmd.run(data),
|
Command::Redirects(cmd) => cmd.run(data),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
84
brood/src/commands/stats/degrees.rs
Normal file
84
brood/src/commands/stats/degrees.rs
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
use std::{cmp::Reverse, io};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
algo,
|
||||||
|
data::{Data, Page},
|
||||||
|
util,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Show stats on article in- and out-degrees.
|
||||||
|
#[derive(Debug, clap::Parser)]
|
||||||
|
pub struct Cmd {
|
||||||
|
#[arg(long, short, default_value_t = 5)]
|
||||||
|
top: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Cmd {
|
||||||
|
pub fn run(self, mut data: Data) -> io::Result<()> {
|
||||||
|
println!(">> Outdegree");
|
||||||
|
println!("> Counting links");
|
||||||
|
let mut outdegree = vec![usize::MAX; data.pages.len()];
|
||||||
|
for node in data.graph.nodes() {
|
||||||
|
outdegree[node.usize()] = data.graph.edge_range(node).len();
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(">> Indegree");
|
||||||
|
println!("> Inverting edges");
|
||||||
|
algo::invert(&mut data);
|
||||||
|
let mut indegree = vec![usize::MAX; data.pages.len()];
|
||||||
|
println!("> Counting links");
|
||||||
|
for node in data.graph.nodes() {
|
||||||
|
indegree[node.usize()] = data.graph.edge_range(node).len();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut by_degrees = data
|
||||||
|
.pages
|
||||||
|
.iter()
|
||||||
|
.zip(outdegree)
|
||||||
|
.zip(indegree)
|
||||||
|
.map(|((p, od), id)| (p, od, id))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("Most outlinks");
|
||||||
|
println!("¯¯¯¯¯¯¯¯¯¯¯¯¯");
|
||||||
|
|
||||||
|
by_degrees.sort_by_key(|(_, od, _)| Reverse(*od));
|
||||||
|
self.print_links(&by_degrees);
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("Most inlinks");
|
||||||
|
println!("¯¯¯¯¯¯¯¯¯¯¯¯");
|
||||||
|
|
||||||
|
by_degrees.sort_by_key(|(_, _, id)| Reverse(*id));
|
||||||
|
self.print_links(&by_degrees);
|
||||||
|
|
||||||
|
by_degrees.retain(|(_, od, id)| *od > 0 && *id > 0);
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("Most outlinks per non-zero inlink");
|
||||||
|
println!("¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯");
|
||||||
|
|
||||||
|
by_degrees.sort_by(|(_, od1, id1), (_, od2, id2)| {
|
||||||
|
let r1 = *od1 as f32 / *id1 as f32;
|
||||||
|
let r2 = *od2 as f32 / *id2 as f32;
|
||||||
|
r2.total_cmp(&r1) // Reverse order so max values are at beginnibg
|
||||||
|
});
|
||||||
|
self.print_links(&by_degrees);
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("Most inlinks per non-zero outlink");
|
||||||
|
println!("¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯");
|
||||||
|
|
||||||
|
by_degrees.reverse();
|
||||||
|
self.print_links(&by_degrees);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_links(&self, by_degrees: &Vec<(&Page, usize, usize)>) {
|
||||||
|
for (page, od, id) in by_degrees.iter().take(self.top) {
|
||||||
|
println!("{} ({od} out, {id} in)", util::fmt_page(page));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue