From 603e7166381a4ad87a1522150450476ebb42f00d Mon Sep 17 00:00:00 2001 From: Joscha Date: Mon, 23 Oct 2023 20:04:35 +0200 Subject: [PATCH] Fetch and process commits --- ...17784fd2f8597021bc22657b9211825916dc2.json | 26 ++++ ...b6782312deef803ed6231658c6721a26a3639.json | 38 +++++ scripts/graph/commits.ts | 130 ++++++++++++++++++ scripts/graph/requests.ts | 4 +- scripts/graph/state.ts | 34 ++++- src/server/web/pages/graph.rs | 91 ++++++++++-- 6 files changed, 301 insertions(+), 22 deletions(-) create mode 100644 .sqlx/query-f9eefc496eb423eb9a73767d3ff17784fd2f8597021bc22657b9211825916dc2.json create mode 100644 .sqlx/query-fa9d5c2db5228e513a4c50f1b85b6782312deef803ed6231658c6721a26a3639.json create mode 100644 scripts/graph/commits.ts diff --git a/.sqlx/query-f9eefc496eb423eb9a73767d3ff17784fd2f8597021bc22657b9211825916dc2.json b/.sqlx/query-f9eefc496eb423eb9a73767d3ff17784fd2f8597021bc22657b9211825916dc2.json new file mode 100644 index 0000000..7a0e55f --- /dev/null +++ b/.sqlx/query-f9eefc496eb423eb9a73767d3ff17784fd2f8597021bc22657b9211825916dc2.json @@ -0,0 +1,26 @@ +{ + "db_name": "SQLite", + "query": "SELECT child, parent FROM commit_links JOIN commits ON hash = child WHERE reachable = 2 ORDER BY hash ASC ", + "describe": { + "columns": [ + { + "name": "child", + "ordinal": 0, + "type_info": "Text" + }, + { + "name": "parent", + "ordinal": 1, + "type_info": "Text" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false, + false + ] + }, + "hash": "f9eefc496eb423eb9a73767d3ff17784fd2f8597021bc22657b9211825916dc2" +} diff --git a/.sqlx/query-fa9d5c2db5228e513a4c50f1b85b6782312deef803ed6231658c6721a26a3639.json b/.sqlx/query-fa9d5c2db5228e513a4c50f1b85b6782312deef803ed6231658c6721a26a3639.json new file mode 100644 index 0000000..605e501 --- /dev/null +++ b/.sqlx/query-fa9d5c2db5228e513a4c50f1b85b6782312deef803ed6231658c6721a26a3639.json @@ -0,0 +1,38 @@ +{ + "db_name": "SQLite", + "query": "SELECT hash, author, committer_date AS \"committer_date: time::OffsetDateTime\", message FROM commits WHERE reachable = 2 ORDER BY hash ASC ", + "describe": { + "columns": [ + { + "name": "hash", + "ordinal": 0, + "type_info": "Text" + }, + { + "name": "author", + "ordinal": 1, + "type_info": "Text" + }, + { + "name": "committer_date: time::OffsetDateTime", + "ordinal": 2, + "type_info": "Text" + }, + { + "name": "message", + "ordinal": 3, + "type_info": "Text" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false, + false, + false, + false + ] + }, + "hash": "fa9d5c2db5228e513a4c50f1b85b6782312deef803ed6231658c6721a26a3639" +} diff --git a/scripts/graph/commits.ts b/scripts/graph/commits.ts new file mode 100644 index 0000000..9e6e081 --- /dev/null +++ b/scripts/graph/commits.ts @@ -0,0 +1,130 @@ +import { CommitsResponse } from "./requests"; + +type Commit = { + indexByHash: number; + indexByGraph: number; + hash: string; + parents: Commit[]; + children: Commit[]; + author: string; + committerDate: number; + summary: string; +}; + +export class Commits { + #graphId: number | null = null; + #commitsByGraph: Commit[] = []; + + requiresUpdate(graphId: number): boolean { + return this.#graphId === null || this.#graphId < graphId; + } + + update(response: CommitsResponse) { + console.assert(response.hashByHash.length == response.authorByHash.length); + console.assert(response.hashByHash.length == response.committerDateByHash.length); + console.assert(response.hashByHash.length == response.summaryByHash.length); + + let commits = this.#loadCommits(response); + commits = this.#sortCommitsTopologically(commits); + this.#sortCommitsByCommitterDate(commits); + + // Fill in indexes - "later" is now + for (const [idx, commit] of commits.entries()) { + commit.indexByGraph = idx; + } + + this.#graphId = response.graphId; + this.#commitsByGraph = commits; + } + + #loadCommits(response: CommitsResponse): Commit[] { + const commits = new Map(); + const commitsByHash = []; + + for (const [idx, hash] of response.hashByHash.entries()) { + const commit = { + indexByHash: idx, + indexByGraph: NaN, // Filled in later + hash, + parents: [], + children: [], + author: response.authorByHash[idx]!, + committerDate: response.committerDateByHash[idx]!, + summary: response.summaryByHash[idx]!, + }; + commits.set(hash, commit); + commitsByHash.push(commit); + } + + // Fill in parents and children + for (const [childIdx, parentIdx] of response.childParentIndexPairs) { + const childHash = response.hashByHash[childIdx]!; + const parentHash = response.hashByHash[parentIdx]!; + + const child = commits.get(childHash)!; + const parent = commits.get(parentHash)!; + + child.parents.push(parent); + parent.children.push(child); + } + + return commitsByHash; + } + + #sortCommitsByCommitterDate(commits: Commit[]) { + commits.sort((a, b) => a.committerDate - b.committerDate); + } + + /** + * Sort commits topologically such that parents come before their children. + * + * Assumes that there are no duplicated commits anywhere. + * + * The algorithm used is a version of [Kahn's algorithm][0] that starts at the + * nodes with no parents. It uses a stack for the set of parentless nodes, + * meaning the resulting commit order is depth-first-y, not breadth-first-y. + * For example, this commit graph (where children are ordered top to bottom) + * results in the order `A, B, C, D, E, F` and not an interleaved order like + * `A, B, D, C, E, F` (which a queue would produce): + * + * ```text + * A - B - C + * \ \ + * D - E - F + * ``` + * + * [0]: https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm + */ + #sortCommitsTopologically(commits: Commit[]): Commit[] { + // Track which unvisited parents are left for each commit + const childParentMap: Map> = new Map(); + for (const commit of commits) { + childParentMap.set(commit.hash, new Set(commit.parents.map(p => p.hash))); + } + + // Stack of parentless commits + const parentless = commits.filter(c => c.parents.length == 0); + + const sorted: Commit[] = []; + while (parentless.length > 0) { + // Visit commit + const commit = parentless.pop()!; + sorted.push(commit); + + for (const child of commit.children) { + const parents = childParentMap.get(child.hash)!; + parents.delete(commit.hash); + if (parents.size == 0) { + parentless.push(child); + } + } + } + + for (const [child, parents] of childParentMap.entries()) { + console.assert(parents.size == 0, child, "still has parents"); + } + console.assert(parentless.length == 0); + console.assert(commits.length == sorted.length, "topo sort changed commit amount"); + return sorted; + } +} diff --git a/scripts/graph/requests.ts b/scripts/graph/requests.ts index e08e9e3..9dea802 100644 --- a/scripts/graph/requests.ts +++ b/scripts/graph/requests.ts @@ -14,8 +14,8 @@ export type CommitsResponse = { hashByHash: string[]; authorByHash: string[]; committerDateByHash: number[]; - messageByHash: string[]; - parentsByHash: number[][]; + summaryByHash: string[]; + childParentIndexPairs: [number, number][]; }; /** diff --git a/scripts/graph/state.ts b/scripts/graph/state.ts index ee6cdd0..ec7c5ce 100644 --- a/scripts/graph/state.ts +++ b/scripts/graph/state.ts @@ -1,15 +1,17 @@ +import { Commits } from "./commits.js"; import { Metrics } from "./metrics.js"; -import { getMetrics } from "./requests.js"; +import { getCommits, getMetrics } from "./requests.js"; export class State { #latestGraphId: number = -Infinity; #latestDataId: number = -Infinity; #metrics: Metrics; + #commits: Commits = new Commits(); - #requestingNewMetrics: boolean = false; + #requestingMetrics: boolean = false; + #requestingCommits: boolean = false; - // commits (with graph id and data id) // raw measurements (with graph id and data id) // processed measurements (with graph id and data id) @@ -49,19 +51,37 @@ export class State { if (this.#metrics.requiresUpdate(this.#latestDataId)) { this.#requestMetrics(); } + + if (this.#commits.requiresUpdate(this.#latestGraphId)) { + this.#requestCommits(); + } } async #requestMetrics() { - if (this.#requestingNewMetrics) return; - console.log("Requesting new metrics"); + if (this.#requestingMetrics) return; + console.log("Requesting metrics"); try { - this.#requestingNewMetrics = true; + this.#requestingMetrics = true; const response = await getMetrics(); this.#updateDataId(response.dataId); this.#metrics.update(response); this.update(); } finally { - this.#requestingNewMetrics = false; + this.#requestingMetrics = false; + } + } + + async #requestCommits() { + if (this.#requestingCommits) return; + console.log("Requesting commits"); + try { + this.#requestingCommits = true; + const response = await getCommits(); + this.#updateGraphId(response.graphId); + this.#commits.update(response); + this.update(); + } finally { + this.#requestingCommits = false; } } } diff --git a/src/server/web/pages/graph.rs b/src/server/web/pages/graph.rs index 7cc6604..9841e1e 100644 --- a/src/server/web/pages/graph.rs +++ b/src/server/web/pages/graph.rs @@ -3,15 +3,19 @@ use std::collections::HashMap; use askama::Template; use axum::{extract::State, response::IntoResponse, Json}; use axum_extra::extract::Query; +use futures::TryStreamExt; use serde::{Deserialize, Serialize}; -use sqlx::SqlitePool; +use sqlx::{Acquire, SqlitePool}; use crate::{ config::ServerConfig, - server::web::{ - base::{Base, Link, Tab}, - paths::{PathGraph, PathGraphCommits, PathGraphMeasurements, PathGraphMetrics}, - r#static::{GRAPH_JS, UPLOT_CSS}, + server::{ + util, + web::{ + base::{Base, Link, Tab}, + paths::{PathGraph, PathGraphCommits, PathGraphMeasurements, PathGraphMetrics}, + r#static::{GRAPH_JS, UPLOT_CSS}, + }, }, somehow, }; @@ -65,21 +69,82 @@ struct CommitsResponse { hash_by_hash: Vec, author_by_hash: Vec, committer_date_by_hash: Vec, - message_by_hash: Vec, - parents_by_hash: Vec>, + summary_by_hash: Vec, + child_parent_index_pairs: Vec<(usize, usize)>, } pub async fn get_graph_commits( _path: PathGraphCommits, State(db): State, ) -> somehow::Result { + let mut tx = db.begin().await?; + let conn = tx.acquire().await?; + + let mut hash_by_hash = vec![]; + let mut author_by_hash = vec![]; + let mut committer_date_by_hash = vec![]; + let mut summary_by_hash = vec![]; + let mut child_parent_index_pairs = vec![]; + + // Fetch main commit info + let mut rows = sqlx::query!( + "\ + SELECT \ + hash, \ + author, \ + committer_date AS \"committer_date: time::OffsetDateTime\", \ + message \ + FROM commits \ + WHERE reachable = 2 \ + ORDER BY hash ASC \ + " + ) + .fetch(&mut *conn); + while let Some(row) = rows.try_next().await? { + hash_by_hash.push(row.hash); + author_by_hash.push(row.author); + committer_date_by_hash.push(row.committer_date.unix_timestamp()); + summary_by_hash.push(util::format_commit_summary(&row.message)); + } + drop(rows); + + // Map from hash to index in "by hash" order + let index_of_hash = hash_by_hash + .iter() + .cloned() + .enumerate() + .map(|(idx, hash)| (hash, idx)) + .collect::>(); + + // Fetch parent info + let mut rows = sqlx::query!( + "\ + SELECT child, parent \ + FROM commit_links \ + JOIN commits ON hash = child \ + WHERE reachable = 2 \ + ORDER BY hash ASC \ + " + ) + .fetch(&mut *conn); + while let Some(row) = rows.try_next().await? { + // The child is tracked and must thus be in our map. + let child_index = *index_of_hash.get(&row.child).unwrap(); + + // The parent of a tracked commit must also be tracked. + let parent_index = *index_of_hash.get(&row.parent).unwrap(); + + child_parent_index_pairs.push((child_index, parent_index)); + } + drop(rows); + Ok(Json(CommitsResponse { - graph_id: 0, // TODO Implement - hash_by_hash: vec![], // TODO Implement - author_by_hash: vec![], // TODO Implement - committer_date_by_hash: vec![], // TODO Implement - message_by_hash: vec![], // TODO Implement - parents_by_hash: vec![], // TODO Implement + graph_id: 0, // TODO Implement + hash_by_hash, + author_by_hash, + committer_date_by_hash, + summary_by_hash, + child_parent_index_pairs, })) }