Add more philosophy game subcommands
This commit is contained in:
parent
ed611d0841
commit
9c73e691a8
2 changed files with 112 additions and 23 deletions
|
|
@ -1,14 +1,17 @@
|
||||||
use std::{
|
use std::{
|
||||||
collections::{BTreeSet, HashMap, HashSet},
|
collections::{BTreeSet, HashMap, HashSet},
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{self, BufReader},
|
io::{self, BufReader, BufWriter},
|
||||||
path::Path,
|
path::Path,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::data::{
|
use crate::{
|
||||||
adjacency_list::AdjacencyList,
|
data::{
|
||||||
info::{LinkInfo, PageInfo},
|
adjacency_list::AdjacencyList,
|
||||||
store,
|
info::{LinkInfo, PageInfo},
|
||||||
|
store,
|
||||||
|
},
|
||||||
|
PhilosophyGameCmd,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PageMap(Vec<u32>);
|
struct PageMap(Vec<u32>);
|
||||||
|
|
@ -37,23 +40,17 @@ fn first_viable_link(data: &AdjacencyList<PageInfo, LinkInfo>, page_idx: u32) ->
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(datafile: &Path) -> io::Result<()> {
|
fn find_forward_edges(data: &AdjacencyList<PageInfo, LinkInfo>) -> PageMap {
|
||||||
println!(">> Import");
|
let mut result = PageMap::new(data.pages.len());
|
||||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
|
||||||
let data = store::read_adjacency_list(&mut databuf)?;
|
|
||||||
|
|
||||||
// Compute forward and backward edges
|
|
||||||
let mut forward = PageMap::new(data.pages.len());
|
|
||||||
for (page_idx, _) in data.pages() {
|
for (page_idx, _) in data.pages() {
|
||||||
if let Some(first_link) = first_viable_link(&data, page_idx) {
|
if let Some(first_link) = first_viable_link(data, page_idx) {
|
||||||
forward.set(page_idx, first_link);
|
result.set(page_idx, first_link);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
// Determine cluster for each page, represented via canonical page. The
|
fn find_clusters(data: &AdjacencyList<PageInfo, LinkInfo>, forward: &PageMap) -> PageMap {
|
||||||
// canonical page of a cluster is either a dead-end or the loop member with
|
|
||||||
// the smallest index.
|
|
||||||
println!(">> Cluster");
|
|
||||||
let mut cluster = PageMap::new(data.pages.len());
|
let mut cluster = PageMap::new(data.pages.len());
|
||||||
for (page_idx, _) in data.pages() {
|
for (page_idx, _) in data.pages() {
|
||||||
let mut current = page_idx;
|
let mut current = page_idx;
|
||||||
|
|
@ -90,17 +87,92 @@ pub fn run(datafile: &Path) -> io::Result<()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cluster
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_forward_edges_as_json(
|
||||||
|
data: &AdjacencyList<PageInfo, LinkInfo>,
|
||||||
|
forward: &PageMap,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
let map = forward
|
||||||
|
.0
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(page, first_link)| {
|
||||||
|
let page_title = &data.page(page as u32).data.title;
|
||||||
|
let first_link_title = if *first_link == u32::MAX {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(&data.page(*first_link).data.title)
|
||||||
|
};
|
||||||
|
(page_title, first_link_title)
|
||||||
|
})
|
||||||
|
.collect::<HashMap<_, _>>();
|
||||||
|
|
||||||
|
let writer = BufWriter::new(io::stdout());
|
||||||
|
serde_json::to_writer_pretty(writer, &map)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_canonical_pages_as_json(
|
||||||
|
data: &AdjacencyList<PageInfo, LinkInfo>,
|
||||||
|
cluster: &PageMap,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
let map = cluster
|
||||||
|
.0
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(page, canonical)| {
|
||||||
|
(
|
||||||
|
&data.page(page as u32).data.title,
|
||||||
|
&data.page(*canonical).data.title,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<HashMap<_, _>>();
|
||||||
|
|
||||||
|
let writer = BufWriter::new(io::stdout());
|
||||||
|
serde_json::to_writer_pretty(writer, &map)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn run(datafile: &Path, subcmd: PhilosophyGameCmd) -> io::Result<()> {
|
||||||
|
eprintln!(">> Import");
|
||||||
|
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||||
|
let data = store::read_adjacency_list(&mut databuf)?;
|
||||||
|
|
||||||
|
eprintln!(">> Forward");
|
||||||
|
let forward = find_forward_edges(&data);
|
||||||
|
|
||||||
|
if subcmd == PhilosophyGameCmd::First {
|
||||||
|
eprintln!(">> First links");
|
||||||
|
print_forward_edges_as_json(&data, &forward)?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine cluster for each page, represented via canonical page. The
|
||||||
|
// canonical page of a cluster is either a dead-end or the loop member with
|
||||||
|
// the smallest index.
|
||||||
|
eprintln!(">> Find clusters");
|
||||||
|
let cluster = find_clusters(&data, &forward);
|
||||||
|
|
||||||
|
if subcmd == PhilosophyGameCmd::Canonical {
|
||||||
|
print_canonical_pages_as_json(&data, &cluster)?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
// Measure cluster size
|
// Measure cluster size
|
||||||
|
eprintln!(">> Measure clusters");
|
||||||
let mut cluster_size = HashMap::<u32, u32>::new();
|
let mut cluster_size = HashMap::<u32, u32>::new();
|
||||||
for (i, canonical) in cluster.0.iter().enumerate() {
|
for (i, canonical) in cluster.0.iter().enumerate() {
|
||||||
assert!(*canonical != u32::MAX, "{}", data.page(i as u32).data.title);
|
assert!(*canonical != u32::MAX, "{}", data.page(i as u32).data.title);
|
||||||
*cluster_size.entry(*canonical).or_default() += 1;
|
*cluster_size.entry(*canonical).or_default() += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut cluster_by_size = cluster_size.into_iter().collect::<Vec<_>>();
|
let mut cluster_by_size = cluster_size.into_iter().collect::<Vec<_>>();
|
||||||
cluster_by_size.sort_by_key(|(c, s)| (*s, *c));
|
cluster_by_size.sort_by_key(|(c, s)| (*s, *c));
|
||||||
|
cluster_by_size.reverse();
|
||||||
|
|
||||||
// Print clusters
|
// Print clusters
|
||||||
|
assert!(subcmd == PhilosophyGameCmd::Cluster);
|
||||||
for (canonical, size) in cluster_by_size {
|
for (canonical, size) in cluster_by_size {
|
||||||
if forward.get(canonical) == u32::MAX {
|
if forward.get(canonical) == u32::MAX {
|
||||||
let title = &data.page(canonical).data.title;
|
let title = &data.page(canonical).data.title;
|
||||||
|
|
@ -111,8 +183,13 @@ pub fn run(datafile: &Path) -> io::Result<()> {
|
||||||
println!("Cluster (loop, {size}):");
|
println!("Cluster (loop, {size}):");
|
||||||
let mut current = canonical;
|
let mut current = canonical;
|
||||||
loop {
|
loop {
|
||||||
let title = &data.page(current).data.title;
|
let page = data.page(current);
|
||||||
println!(" - {title}");
|
let title = &page.data.title;
|
||||||
|
if page.data.redirect {
|
||||||
|
println!(" v {title}");
|
||||||
|
} else {
|
||||||
|
println!(" - {title}");
|
||||||
|
}
|
||||||
current = forward.get(current);
|
current = forward.get(current);
|
||||||
if current == canonical {
|
if current == canonical {
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,13 @@ use std::path::PathBuf;
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Parser)]
|
||||||
|
pub enum PhilosophyGameCmd {
|
||||||
|
First,
|
||||||
|
Canonical,
|
||||||
|
Cluster,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Parser)]
|
#[derive(Debug, Parser)]
|
||||||
enum Command {
|
enum Command {
|
||||||
/// Read sift data on stdin and output brood data.
|
/// Read sift data on stdin and output brood data.
|
||||||
|
|
@ -30,7 +37,10 @@ enum Command {
|
||||||
/// Find the longest shortest path starting at an article.
|
/// Find the longest shortest path starting at an article.
|
||||||
LongestShortestPath { from: String },
|
LongestShortestPath { from: String },
|
||||||
/// Analyze articles using "Philosophy Game" rules.
|
/// Analyze articles using "Philosophy Game" rules.
|
||||||
PhilosophyGame,
|
PhilosophyGame {
|
||||||
|
#[command(subcommand)]
|
||||||
|
subcmd: PhilosophyGameCmd,
|
||||||
|
},
|
||||||
/// Print all page titles.
|
/// Print all page titles.
|
||||||
ListPages,
|
ListPages,
|
||||||
}
|
}
|
||||||
|
|
@ -61,7 +71,9 @@ fn main() -> io::Result<()> {
|
||||||
Command::LongestShortestPath { from } => {
|
Command::LongestShortestPath { from } => {
|
||||||
commands::longest_shortest_path::run(&args.datafile, &from)
|
commands::longest_shortest_path::run(&args.datafile, &from)
|
||||||
}
|
}
|
||||||
Command::PhilosophyGame => commands::philosophy_game::run(&args.datafile),
|
Command::PhilosophyGame { subcmd } => {
|
||||||
|
commands::philosophy_game::run(&args.datafile, subcmd)
|
||||||
|
}
|
||||||
Command::ListPages => commands::list_pages::run(&args.datafile),
|
Command::ListPages => commands::list_pages::run(&args.datafile),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue