Add philosophy-game command
This commit is contained in:
parent
8642778402
commit
ed611d0841
3 changed files with 128 additions and 0 deletions
|
|
@ -2,4 +2,5 @@ pub mod ingest;
|
||||||
pub mod list_pages;
|
pub mod list_pages;
|
||||||
pub mod longest_shortest_path;
|
pub mod longest_shortest_path;
|
||||||
pub mod path;
|
pub mod path;
|
||||||
|
pub mod philosophy_game;
|
||||||
pub mod reexport;
|
pub mod reexport;
|
||||||
|
|
|
||||||
124
brood/src/commands/philosophy_game.rs
Normal file
124
brood/src/commands/philosophy_game.rs
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
use std::{
|
||||||
|
collections::{BTreeSet, HashMap, HashSet},
|
||||||
|
fs::File,
|
||||||
|
io::{self, BufReader},
|
||||||
|
path::Path,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::data::{
|
||||||
|
adjacency_list::AdjacencyList,
|
||||||
|
info::{LinkInfo, PageInfo},
|
||||||
|
store,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PageMap(Vec<u32>);
|
||||||
|
|
||||||
|
impl PageMap {
|
||||||
|
fn new(len: usize) -> Self {
|
||||||
|
Self(vec![u32::MAX; len])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get(&self, page_idx: u32) -> u32 {
|
||||||
|
self.0[page_idx as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set(&mut self, page_idx: u32, to: u32) {
|
||||||
|
self.0[page_idx as usize] = to;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn first_viable_link(data: &AdjacencyList<PageInfo, LinkInfo>, page_idx: u32) -> Option<u32> {
|
||||||
|
for link_idx in data.link_range(page_idx) {
|
||||||
|
let link = data.link(link_idx);
|
||||||
|
if !link.data.in_parens() && !link.data.in_structure() {
|
||||||
|
return Some(link.to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn run(datafile: &Path) -> io::Result<()> {
|
||||||
|
println!(">> Import");
|
||||||
|
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||||
|
let data = store::read_adjacency_list(&mut databuf)?;
|
||||||
|
|
||||||
|
// Compute forward and backward edges
|
||||||
|
let mut forward = PageMap::new(data.pages.len());
|
||||||
|
for (page_idx, _) in data.pages() {
|
||||||
|
if let Some(first_link) = first_viable_link(&data, page_idx) {
|
||||||
|
forward.set(page_idx, first_link);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine cluster for each page, represented via canonical page. The
|
||||||
|
// canonical page of a cluster is either a dead-end or the loop member with
|
||||||
|
// the smallest index.
|
||||||
|
println!(">> Cluster");
|
||||||
|
let mut cluster = PageMap::new(data.pages.len());
|
||||||
|
for (page_idx, _) in data.pages() {
|
||||||
|
let mut current = page_idx;
|
||||||
|
let mut visited = HashSet::new();
|
||||||
|
let canonical = loop {
|
||||||
|
// We've already determined the canonical element for this page.
|
||||||
|
if cluster.get(current) != u32::MAX {
|
||||||
|
break cluster.get(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We've hit a loop
|
||||||
|
if visited.contains(¤t) {
|
||||||
|
let mut loop_members = BTreeSet::new();
|
||||||
|
while !loop_members.contains(¤t) {
|
||||||
|
loop_members.insert(current);
|
||||||
|
current = forward.get(current);
|
||||||
|
}
|
||||||
|
break loop_members.pop_first().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
visited.insert(current);
|
||||||
|
|
||||||
|
let next = forward.get(current);
|
||||||
|
if next == u32::MAX {
|
||||||
|
// We've hit a dead-end
|
||||||
|
break current;
|
||||||
|
}
|
||||||
|
|
||||||
|
current = next;
|
||||||
|
};
|
||||||
|
|
||||||
|
for i in visited {
|
||||||
|
cluster.set(i, canonical);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Measure cluster size
|
||||||
|
let mut cluster_size = HashMap::<u32, u32>::new();
|
||||||
|
for (i, canonical) in cluster.0.iter().enumerate() {
|
||||||
|
assert!(*canonical != u32::MAX, "{}", data.page(i as u32).data.title);
|
||||||
|
*cluster_size.entry(*canonical).or_default() += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut cluster_by_size = cluster_size.into_iter().collect::<Vec<_>>();
|
||||||
|
cluster_by_size.sort_by_key(|(c, s)| (*s, *c));
|
||||||
|
|
||||||
|
// Print clusters
|
||||||
|
for (canonical, size) in cluster_by_size {
|
||||||
|
if forward.get(canonical) == u32::MAX {
|
||||||
|
let title = &data.page(canonical).data.title;
|
||||||
|
println!("Cluster (dead-end, {size}): {title}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Cluster (loop, {size}):");
|
||||||
|
let mut current = canonical;
|
||||||
|
loop {
|
||||||
|
let title = &data.page(current).data.title;
|
||||||
|
println!(" - {title}");
|
||||||
|
current = forward.get(current);
|
||||||
|
if current == canonical {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
@ -29,6 +29,8 @@ enum Command {
|
||||||
},
|
},
|
||||||
/// Find the longest shortest path starting at an article.
|
/// Find the longest shortest path starting at an article.
|
||||||
LongestShortestPath { from: String },
|
LongestShortestPath { from: String },
|
||||||
|
/// Analyze articles using "Philosophy Game" rules.
|
||||||
|
PhilosophyGame,
|
||||||
/// Print all page titles.
|
/// Print all page titles.
|
||||||
ListPages,
|
ListPages,
|
||||||
}
|
}
|
||||||
|
|
@ -59,6 +61,7 @@ fn main() -> io::Result<()> {
|
||||||
Command::LongestShortestPath { from } => {
|
Command::LongestShortestPath { from } => {
|
||||||
commands::longest_shortest_path::run(&args.datafile, &from)
|
commands::longest_shortest_path::run(&args.datafile, &from)
|
||||||
}
|
}
|
||||||
|
Command::PhilosophyGame => commands::philosophy_game::run(&args.datafile),
|
||||||
Command::ListPages => commands::list_pages::run(&args.datafile),
|
Command::ListPages => commands::list_pages::run(&args.datafile),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue