diff --git a/.sqlx/query-3620648a1003211c184423a3c13b6eae3500b888490430fbaf20517830db508e.json b/.sqlx/query-3620648a1003211c184423a3c13b6eae3500b888490430fbaf20517830db508e.json new file mode 100644 index 0000000..ccf5f18 --- /dev/null +++ b/.sqlx/query-3620648a1003211c184423a3c13b6eae3500b888490430fbaf20517830db508e.json @@ -0,0 +1,26 @@ +{ + "db_name": "SQLite", + "query": "SELECT parent, child FROM commit_links JOIN commits AS p ON p.hash = parent JOIN commits AS c ON c.hash = child ORDER BY unixepoch(p.committer_date) ASC, p.hash ASC, unixepoch(c.committer_date) ASC, c.hash ASC ", + "describe": { + "columns": [ + { + "name": "parent", + "ordinal": 0, + "type_info": "Text" + }, + { + "name": "child", + "ordinal": 1, + "type_info": "Text" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false, + false + ] + }, + "hash": "3620648a1003211c184423a3c13b6eae3500b888490430fbaf20517830db508e" +} diff --git a/.sqlx/query-639b559608466d60bc9cbfb9f543db10b596edee0e9bcb590c6139a7e8927945.json b/.sqlx/query-639b559608466d60bc9cbfb9f543db10b596edee0e9bcb590c6139a7e8927945.json new file mode 100644 index 0000000..fe7a3ef --- /dev/null +++ b/.sqlx/query-639b559608466d60bc9cbfb9f543db10b596edee0e9bcb590c6139a7e8927945.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "SELECT committer_date AS \"time: OffsetDateTime\" FROM commits ORDER BY unixepoch(committer_date) ASC, hash ASC ", + "describe": { + "columns": [ + { + "name": "time: OffsetDateTime", + "ordinal": 0, + "type_info": "Text" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "639b559608466d60bc9cbfb9f543db10b596edee0e9bcb590c6139a7e8927945" +} diff --git a/.sqlx/query-a718c407e9620bf99ed97247b89dd0afb7ef657ee0a302433641f651e72b8cbe.json b/.sqlx/query-a718c407e9620bf99ed97247b89dd0afb7ef657ee0a302433641f651e72b8cbe.json new file mode 100644 index 0000000..80d01c2 --- /dev/null +++ b/.sqlx/query-a718c407e9620bf99ed97247b89dd0afb7ef657ee0a302433641f651e72b8cbe.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "SELECT hash FROM commits ORDER BY unixepoch(committer_date) ASC, hash ASC ", + "describe": { + "columns": [ + { + "name": "hash", + "ordinal": 0, + "type_info": "Text" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "a718c407e9620bf99ed97247b89dd0afb7ef657ee0a302433641f651e72b8cbe" +} diff --git a/.sqlx/query-ac7a2e727b2ff96ae6d6be2c05e9991a7a27708a69fbf89b43231927fc85519f.json b/.sqlx/query-ac7a2e727b2ff96ae6d6be2c05e9991a7a27708a69fbf89b43231927fc85519f.json deleted file mode 100644 index 194280e..0000000 --- a/.sqlx/query-ac7a2e727b2ff96ae6d6be2c05e9991a7a27708a69fbf89b43231927fc85519f.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT hash, committer_date AS \"committer_date: time::OffsetDateTime\" FROM commits ORDER BY unixepoch(committer_date) ASC, hash ASC ", - "describe": { - "columns": [ - { - "name": "hash", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "committer_date: time::OffsetDateTime", - "ordinal": 1, - "type_info": "Text" - } - ], - "parameters": { - "Right": 0 - }, - "nullable": [ - false, - false - ] - }, - "hash": "ac7a2e727b2ff96ae6d6be2c05e9991a7a27708a69fbf89b43231927fc85519f" -} diff --git a/scripts/graph.ts b/scripts/graph.ts index e6a6006..f5b44f6 100644 --- a/scripts/graph.ts +++ b/scripts/graph.ts @@ -3,7 +3,7 @@ import uPlot from "./uPlot.js"; interface GraphData { hashes: string[]; times: number[]; - metrics: { [key: string]: (number | null)[]; }; + measurements: { [key: string]: (number | null)[]; }; } let opts = { @@ -29,7 +29,7 @@ fetch("data?metric=wall-clock/build") console.log(data); plot.setData([ data.times, - data.metrics["wall-clock/build"]!, + data.measurements["wall-clock/build"]!, ]); }); diff --git a/src/server/web/pages/graph.rs b/src/server/web/pages/graph.rs index 4d58603..ddc2fa7 100644 --- a/src/server/web/pages/graph.rs +++ b/src/server/web/pages/graph.rs @@ -1,10 +1,14 @@ +mod util; + use std::collections::HashMap; use askama::Template; use axum::{extract::State, response::IntoResponse, Json}; use axum_extra::extract::Query; +use futures::{StreamExt, TryStreamExt}; use serde::{Deserialize, Serialize}; use sqlx::{Acquire, SqlitePool}; +use time::OffsetDateTime; use crate::{ config::Config, @@ -45,9 +49,11 @@ pub struct QueryGraphData { #[derive(Serialize)] struct GraphData { hashes: Vec, + parents: HashMap>, times: Vec, + // TODO f32 for smaller transmission size? - metrics: HashMap>>, + measurements: HashMap>>, } pub async fn get_graph_data( @@ -58,30 +64,76 @@ pub async fn get_graph_data( let mut tx = db.begin().await?; let conn = tx.acquire().await?; - let rows = sqlx::query!( + // The SQL queries that return one result per commit *must* return the same + // amount of rows in the same order! + + let unsorted_hashes = sqlx::query_scalar!( "\ - SELECT \ - hash, \ - committer_date AS \"committer_date: time::OffsetDateTime\" \ - FROM commits \ + SELECT hash FROM commits \ ORDER BY unixepoch(committer_date) ASC, hash ASC \ " ) .fetch_all(&mut *conn) .await?; - let mut hashes = Vec::with_capacity(rows.len()); - let mut times = Vec::with_capacity(rows.len()); - for row in rows { - hashes.push(row.hash); - times.push(row.committer_date.unix_timestamp()); + let parent_child_pairs = sqlx::query!( + "\ + SELECT parent, child \ + FROM commit_links \ + JOIN commits AS p ON p.hash = parent \ + JOIN commits AS c ON c.hash = child \ + ORDER BY \ + unixepoch(p.committer_date) ASC, p.hash ASC, \ + unixepoch(c.committer_date) ASC, c.hash ASC \ + " + ) + .fetch(&mut *conn) + .map_ok(|r| (r.parent, r.child)) + .try_collect::>() + .await?; + + let sorted_hashes = util::sort_topologically(&unsorted_hashes, &parent_child_pairs); + + let sorted_hash_indices = sorted_hashes + .iter() + .cloned() + .enumerate() + .map(|(i, hash)| (hash, i)) + .collect::>(); + + let mut parents = HashMap::>::new(); + for (parent, child) in &parent_child_pairs { + let parent_idx = sorted_hash_indices[parent]; + let child_idx = sorted_hash_indices[child]; + parents.entry(parent_idx).or_default().push(child_idx); } - // TODO Topological sort (s. velcom) - // TODO Redo indices once queries are finalized - let mut metrics = HashMap::new(); + // permutation[unsorted_index] = sorted_index + let permutation = unsorted_hashes + .iter() + .map(|h| sorted_hash_indices[h]) + .collect::>(); + + // Collect and permutate commit times + let mut times = vec![0; sorted_hashes.len()]; + let mut rows = sqlx::query_scalar!( + "\ + SELECT committer_date AS \"time: OffsetDateTime\" FROM commits \ + ORDER BY unixepoch(committer_date) ASC, hash ASC \ + " + ) + .fetch(&mut *conn) + .enumerate(); + while let Some((i, time)) = rows.next().await { + times[permutation[i]] = time?.unix_timestamp(); + } + drop(rows); + + // Collect and permutate measurements + let mut measurements = HashMap::new(); for metric in form.metric { - let values = sqlx::query_scalar!( + let mut values = vec![None; sorted_hashes.len()]; + let mut rows = sqlx::query_scalar!( "\ WITH \ measurements AS ( \ @@ -99,15 +151,20 @@ pub async fn get_graph_data( ", metric, ) - .fetch_all(&mut *conn) - .await?; + .fetch(&mut *conn) + .enumerate(); + while let Some((i, value)) = rows.next().await { + values[permutation[i]] = value?; + } + drop(rows); - metrics.insert(metric, values); + measurements.insert(metric, values); } Ok(Json(GraphData { - hashes, + hashes: sorted_hashes, + parents, times, - metrics, + measurements, })) } diff --git a/src/server/web/pages/graph/util.rs b/src/server/web/pages/graph/util.rs new file mode 100644 index 0000000..23ea854 --- /dev/null +++ b/src/server/web/pages/graph/util.rs @@ -0,0 +1,87 @@ +use std::collections::{HashMap, HashSet}; + +/// Sort commits topologically such that parents come before their children. +/// +/// Assumes that `parent_child_pairs` contains no duplicates and is in the +/// desired order (see below for more info on the order). +/// +/// The algorithm used is a version of [Kahn's algorithm][0] that starts at the +/// nodes with no parents. It uses a stack for the set of parentless nodes, +/// meaning the resulting commit order is depth-first-y, not breadth-first-y. +/// For example, this commit graph (where children are ordered top to bottom) +/// results in the order `A, B, C, D, E, F` and not an interleaved order like +/// `A, B, D, C, E, F` (which a queue would produce): +/// +/// ```text +/// A - B - C +/// \ \ +/// D - E - F +/// ``` +/// +/// When a node is visited and added to the list of sorted nodes, it is removed +/// as parent from all its children. Those who had no other parents are added to +/// the stack in reverse order. In the final list, the children appear in the +/// order they appeared in the parent child pairs, if possible. This means that +/// the order of the commits and of the pairs matters and should probably be +/// deterministic. +/// +/// [0]: https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm +pub fn sort_topologically( + commits: &[String], + parent_child_pairs: &[(String, String)], +) -> Vec { + // These maps have entries for each commit hash we might want to inspect, so + // we know `.get()`, `.get_mut()` and `.remove()` must always succeed. + let mut parent_child_map = commits + .iter() + .map(|hash| (hash.clone(), Vec::::new())) + .collect::>(); + let mut child_parent_map = commits + .iter() + .map(|hash| (hash.clone(), HashSet::::new())) + .collect::>(); + for (parent, child) in parent_child_pairs { + parent_child_map + .get_mut(parent) + .unwrap() + .push(child.clone()); + child_parent_map + .get_mut(child) + .unwrap() + .insert(parent.clone()); + } + + // Initialize parentless stack using commit list, in reverse order so that + // the order is right when popping. + let mut parentless = Vec::::new(); + for commit in commits.iter().rev() { + if child_parent_map[commit].is_empty() { + // A (quadratic-time) linear scan here is OK since the number of + // parentless commits is usually fairly small. + if !parentless.contains(commit) { + parentless.push(commit.clone()); + } + } + } + + let mut sorted = Vec::::new(); + while let Some(hash) = parentless.pop() { + // Inspect children in reverse order so that the order is right when + // popping off the parentless stack. + for child in parent_child_map.remove(&hash).unwrap().into_iter().rev() { + let child_parents = child_parent_map.get_mut(&child).unwrap(); + child_parents.remove(&hash); + if child_parents.is_empty() { + parentless.push(child); + } + } + + sorted.push(hash); + } + + assert!(parent_child_map.is_empty()); + assert!(child_parent_map.values().all(|v| v.is_empty())); + assert!(parentless.is_empty()); + assert_eq!(commits.len(), sorted.len()); + sorted +}