From 0d3cd15b03a2ae57249169658cc009d13d923c72 Mon Sep 17 00:00:00 2001 From: Joscha Date: Sun, 6 Aug 2023 10:45:27 +0200 Subject: [PATCH] Store more commit info in db --- ...39cf5110d11798d749f523d0e0c98a77194f5.json | 12 ++++ ...65b3221314c59b1865c50b9bff51529c93d7.json} | 4 +- ...e0f0e205e82b8c17406b4f613066a7f3979da.json | 12 ---- ...e204ca93fd93778b360730976a76e299ffef.json} | 6 +- migrations/20230805101911_commits.sql | 11 ++- src/recurring/repo.rs | 68 +++++++++++++------ src/repo.rs | 2 +- src/somehow.rs | 6 ++ src/web/commit_hash.rs | 4 +- 9 files changed, 82 insertions(+), 43 deletions(-) create mode 100644 .sqlx/query-35e5c16c2952f550783b234f27839cf5110d11798d749f523d0e0c98a77194f5.json rename .sqlx/{query-6398e5b1dce1142d3460f9d067588bcb42b0be5278ce7524280a4786c8b41786.json => query-afe943820305632281456f0a629065b3221314c59b1865c50b9bff51529c93d7.json} (73%) delete mode 100644 .sqlx/query-d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da.json rename .sqlx/{query-2846970f979d84f4ba4ba2cd659fc37347ea280ebdf584bc535f135f63610a66.json => query-e58a4211444bfe1c965c021085f0e204ca93fd93778b360730976a76e299ffef.json} (59%) diff --git a/.sqlx/query-35e5c16c2952f550783b234f27839cf5110d11798d749f523d0e0c98a77194f5.json b/.sqlx/query-35e5c16c2952f550783b234f27839cf5110d11798d749f523d0e0c98a77194f5.json new file mode 100644 index 0000000..cf4eeea --- /dev/null +++ b/.sqlx/query-35e5c16c2952f550783b234f27839cf5110d11798d749f523d0e0c98a77194f5.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\nINSERT OR IGNORE INTO commits (hash, author, author_date, committer, committer_date, message)\nVALUES (?, ?, ?, ?, ?, ?)\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 6 + }, + "nullable": [] + }, + "hash": "35e5c16c2952f550783b234f27839cf5110d11798d749f523d0e0c98a77194f5" +} diff --git a/.sqlx/query-6398e5b1dce1142d3460f9d067588bcb42b0be5278ce7524280a4786c8b41786.json b/.sqlx/query-afe943820305632281456f0a629065b3221314c59b1865c50b9bff51529c93d7.json similarity index 73% rename from .sqlx/query-6398e5b1dce1142d3460f9d067588bcb42b0be5278ce7524280a4786c8b41786.json rename to .sqlx/query-afe943820305632281456f0a629065b3221314c59b1865c50b9bff51529c93d7.json index b59c1be..2ac9e05 100644 --- a/.sqlx/query-6398e5b1dce1142d3460f9d067588bcb42b0be5278ce7524280a4786c8b41786.json +++ b/.sqlx/query-afe943820305632281456f0a629065b3221314c59b1865c50b9bff51529c93d7.json @@ -1,6 +1,6 @@ { "db_name": "SQLite", - "query": "\nWITH RECURSIVE reachable(hash) AS (\n SELECT hash FROM tracked_refs\n UNION\n SELECT parent FROM commit_links\n JOIN reachable ON hash = child\n)\n\nUPDATE commits\nSET tracked = (hash IN reachable)\n", + "query": "\nWITH RECURSIVE reachable(hash) AS (\n SELECT hash FROM tracked_refs\n UNION\n SELECT parent FROM commit_links\n JOIN reachable ON hash = child\n)\n\nUPDATE commits\nSET reachable = (hash IN reachable)\n", "describe": { "columns": [], "parameters": { @@ -8,5 +8,5 @@ }, "nullable": [] }, - "hash": "6398e5b1dce1142d3460f9d067588bcb42b0be5278ce7524280a4786c8b41786" + "hash": "afe943820305632281456f0a629065b3221314c59b1865c50b9bff51529c93d7" } diff --git a/.sqlx/query-d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da.json b/.sqlx/query-d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da.json deleted file mode 100644 index 76fd2f7..0000000 --- a/.sqlx/query-d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "db_name": "SQLite", - "query": "INSERT OR IGNORE INTO commits (hash) VALUES (?)", - "describe": { - "columns": [], - "parameters": { - "Right": 1 - }, - "nullable": [] - }, - "hash": "d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da" -} diff --git a/.sqlx/query-2846970f979d84f4ba4ba2cd659fc37347ea280ebdf584bc535f135f63610a66.json b/.sqlx/query-e58a4211444bfe1c965c021085f0e204ca93fd93778b360730976a76e299ffef.json similarity index 59% rename from .sqlx/query-2846970f979d84f4ba4ba2cd659fc37347ea280ebdf584bc535f135f63610a66.json rename to .sqlx/query-e58a4211444bfe1c965c021085f0e204ca93fd93778b360730976a76e299ffef.json index 78ede63..aa3b2cb 100644 --- a/.sqlx/query-2846970f979d84f4ba4ba2cd659fc37347ea280ebdf584bc535f135f63610a66.json +++ b/.sqlx/query-e58a4211444bfe1c965c021085f0e204ca93fd93778b360730976a76e299ffef.json @@ -1,6 +1,6 @@ { "db_name": "SQLite", - "query": "\nSELECT child, tracked FROM commit_links\nJOIN commits ON hash = child\nWHERE parent = ?\n ", + "query": "\nSELECT child, reachable FROM commit_links\nJOIN commits ON hash = child\nWHERE parent = ?\n ", "describe": { "columns": [ { @@ -9,7 +9,7 @@ "type_info": "Text" }, { - "name": "tracked", + "name": "reachable", "ordinal": 1, "type_info": "Int64" } @@ -22,5 +22,5 @@ false ] }, - "hash": "2846970f979d84f4ba4ba2cd659fc37347ea280ebdf584bc535f135f63610a66" + "hash": "e58a4211444bfe1c965c021085f0e204ca93fd93778b360730976a76e299ffef" } diff --git a/migrations/20230805101911_commits.sql b/migrations/20230805101911_commits.sql index 2f76a5f..44c22c8 100644 --- a/migrations/20230805101911_commits.sql +++ b/migrations/20230805101911_commits.sql @@ -1,7 +1,12 @@ CREATE TABLE commits ( - hash TEXT NOT NULL PRIMARY KEY, - new INT NOT NULL DEFAULT 1, - tracked INT NOT NULL DEFAULT 0 + hash TEXT NOT NULL PRIMARY KEY, + author TEXT NOT NULL, + author_date TEXT NOT NULL, + committer TEXT NOT NULL, + committer_date TEXT NOT NULL, + message TEXT NOT NULL, + reachable INT NOT NULL DEFAULT 0, + new INT NOT NULL DEFAULT 1 ) STRICT; CREATE TABLE commit_links ( diff --git a/src/recurring/repo.rs b/src/recurring/repo.rs index c18c460..bea1e21 100644 --- a/src/recurring/repo.rs +++ b/src/recurring/repo.rs @@ -6,13 +6,13 @@ use std::collections::HashSet; use futures::TryStreamExt; -use gix::{objs::Kind, traverse::commit::Info, ObjectId, Repository}; +use gix::{date::time::format::ISO8601_STRICT, objs::Kind, Commit, ObjectId, Repository}; use sqlx::{Acquire, SqliteConnection, SqlitePool}; use tracing::{debug, info}; -use crate::somehow; +use crate::{repo, somehow}; -async fn get_all_commits_from_db( +async fn get_all_commit_hashes_from_db( conn: &mut SqliteConnection, ) -> somehow::Result> { let hashes = sqlx::query!("SELECT hash FROM commits") @@ -25,14 +25,14 @@ async fn get_all_commits_from_db( Ok(hashes) } -fn get_new_commits_from_repo( - repo: &Repository, - old: &HashSet, -) -> somehow::Result> { +fn get_new_commits_from_repo<'a, 'b: 'a>( + repo: &'a Repository, + old: &'b HashSet, +) -> somehow::Result>> { // Collect all references starting with "refs" let mut all_references: Vec = vec![]; for reference in repo.references()?.prefixed("refs")? { - let reference = reference.map_err(|e| somehow::Error(anyhow::anyhow!(e)))?; + let reference = reference.map_err(somehow::Error::from_box)?; let id = reference.into_fully_peeled_id()?; // Some repos *cough*linuxkernel*cough* have refs that don't point to @@ -45,29 +45,57 @@ fn get_new_commits_from_repo( } // Walk from those until hitting old references - let new_commits = repo + let mut new = vec![]; + for commit in repo .rev_walk(all_references) .selected(|c| !old.contains(c))? - .map(|r| r.map(|i| i.detach())) - .collect::, _>>()?; + { + let commit = commit?.id().object()?.try_into_commit()?; + new.push(commit); + } - Ok(new_commits) + Ok(new) } -async fn insert_new_commits(conn: &mut SqliteConnection, new: &[Info]) -> somehow::Result<()> { +async fn insert_new_commits( + conn: &mut SqliteConnection, + new: &[Commit<'_>], +) -> somehow::Result<()> { for commit in new { let hash = commit.id.to_string(); - sqlx::query!("INSERT OR IGNORE INTO commits (hash) VALUES (?)", hash) - .execute(&mut *conn) - .await?; + let author_info = commit.author()?; + let author = repo::format_actor(author_info.actor())?; + let author_date = author_info.time.format(ISO8601_STRICT); + let committer_info = commit.committer()?; + let committer = repo::format_actor(committer_info.actor())?; + let committer_date = committer_info.time.format(ISO8601_STRICT); + let message = commit.message_raw()?.to_string(); + + sqlx::query!( + " +INSERT OR IGNORE INTO commits (hash, author, author_date, committer, committer_date, message) +VALUES (?, ?, ?, ?, ?, ?) +", + hash, + author, + author_date, + committer, + committer_date, + message + ) + .execute(&mut *conn) + .await?; } Ok(()) } -async fn insert_new_commit_links(conn: &mut SqliteConnection, new: &[Info]) -> somehow::Result<()> { +async fn insert_new_commit_links( + conn: &mut SqliteConnection, + new: &[Commit<'_>], +) -> somehow::Result<()> { for commit in new { let child = commit.id.to_string(); - for parent in &commit.parent_ids { + for parent in commit.parent_ids() { let parent = parent.to_string(); // Commits *cough*linuxkernel*cough* may list the same parent // multiple times, so we just ignore duplicates during insert. @@ -148,7 +176,7 @@ WITH RECURSIVE reachable(hash) AS ( ) UPDATE commits -SET tracked = (hash IN reachable) +SET reachable = (hash IN reachable) " ) .execute(conn) @@ -161,7 +189,7 @@ pub async fn update(db: &SqlitePool, repo: &Repository) -> somehow::Result<()> { let mut tx = db.begin().await?; let conn = tx.acquire().await?; - let old = get_all_commits_from_db(&mut *conn).await?; + let old = get_all_commit_hashes_from_db(&mut *conn).await?; debug!("Loaded {} commits from the db", old.len()); let repo_is_new = old.is_empty(); diff --git a/src/repo.rs b/src/repo.rs index 8255b42..6e8dd24 100644 --- a/src/repo.rs +++ b/src/repo.rs @@ -7,7 +7,7 @@ use crate::somehow; pub fn format_actor(author: IdentityRef<'_>) -> somehow::Result { let mut buffer = vec![]; - author.write_to(&mut buffer)?; + author.trim().write_to(&mut buffer)?; Ok(String::from_utf8_lossy(&buffer).to_string()) } diff --git a/src/somehow.rs b/src/somehow.rs index 44aa00e..9cfe456 100644 --- a/src/somehow.rs +++ b/src/somehow.rs @@ -8,6 +8,12 @@ use axum::{ /// Wrapper around [`anyhow::Error`] that implements additional type classes. pub struct Error(pub anyhow::Error); +impl Error { + pub fn from_box(err: Box) -> Self { + Self(anyhow::anyhow!(err)) + } +} + impl From for Error where E: error::Error + Send + Sync + 'static, diff --git a/src/web/commit_hash.rs b/src/web/commit_hash.rs index 65a0f54..e59623b 100644 --- a/src/web/commit_hash.rs +++ b/src/web/commit_hash.rs @@ -53,7 +53,7 @@ pub async fn get( // Do this first because a &Repository can't be kept across awaits. let child_rows = sqlx::query!( " -SELECT child, tracked FROM commit_links +SELECT child, reachable FROM commit_links JOIN commits ON hash = child WHERE parent = ? ", @@ -79,7 +79,7 @@ WHERE parent = ? let mut children = vec![]; for row in child_rows { let id = row.child.parse::()?.attach(&repo); - children.push(Commit::new(id, row.tracked != 0)?); + children.push(Commit::new(id, row.reachable != 0)?); } Ok(CommitIdTemplate {