diff --git a/.sqlx/query-5cb0b1fbc1e9d5e651d77d0c4389b84b54d120400174ca12383fd8d2e277b073.json b/.sqlx/query-5cb0b1fbc1e9d5e651d77d0c4389b84b54d120400174ca12383fd8d2e277b073.json new file mode 100644 index 0000000..5babc2b --- /dev/null +++ b/.sqlx/query-5cb0b1fbc1e9d5e651d77d0c4389b84b54d120400174ca12383fd8d2e277b073.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "PRAGMA defer_foreign_keys=1", + "describe": { + "columns": [], + "parameters": { + "Right": 0 + }, + "nullable": [] + }, + "hash": "5cb0b1fbc1e9d5e651d77d0c4389b84b54d120400174ca12383fd8d2e277b073" +} diff --git a/.sqlx/query-99a988f87d9e1d9ca6b818d815bc343b1e65e9aa00751f1a530dfc696a94c6dd.json b/.sqlx/query-99a988f87d9e1d9ca6b818d815bc343b1e65e9aa00751f1a530dfc696a94c6dd.json new file mode 100644 index 0000000..2a3c25b --- /dev/null +++ b/.sqlx/query-99a988f87d9e1d9ca6b818d815bc343b1e65e9aa00751f1a530dfc696a94c6dd.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "INSERT INTO commit_links (parent, child) VALUES (?, ?)", + "describe": { + "columns": [], + "parameters": { + "Right": 2 + }, + "nullable": [] + }, + "hash": "99a988f87d9e1d9ca6b818d815bc343b1e65e9aa00751f1a530dfc696a94c6dd" +} diff --git a/.sqlx/query-d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da.json b/.sqlx/query-d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da.json new file mode 100644 index 0000000..76fd2f7 --- /dev/null +++ b/.sqlx/query-d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "INSERT OR IGNORE INTO commits (hash) VALUES (?)", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da" +} diff --git a/.sqlx/query-d34f2712f1db625ba6a7d1cfcd0ac6270913f153fc743ee8bd2335c2f5acf6c2.json b/.sqlx/query-d34f2712f1db625ba6a7d1cfcd0ac6270913f153fc743ee8bd2335c2f5acf6c2.json new file mode 100644 index 0000000..c5d17e3 --- /dev/null +++ b/.sqlx/query-d34f2712f1db625ba6a7d1cfcd0ac6270913f153fc743ee8bd2335c2f5acf6c2.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "SELECT hash FROM commits", + "describe": { + "columns": [ + { + "name": "hash", + "ordinal": 0, + "type_info": "Text" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "d34f2712f1db625ba6a7d1cfcd0ac6270913f153fc743ee8bd2335c2f5acf6c2" +} diff --git a/Cargo.lock b/Cargo.lock index 6a4ee71..4819641 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -691,6 +691,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.28" @@ -735,6 +750,17 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + [[package]] name = "futures-sink" version = "0.3.28" @@ -753,8 +779,10 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ + "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -2731,6 +2759,7 @@ dependencies = [ "axum", "clap", "directories", + "futures", "gix", "humantime-serde", "mime_guess", diff --git a/Cargo.toml b/Cargo.toml index 5f1efdc..ea8c41a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ askama_axum = "0.3.0" axum = { version = "0.6.19", features = ["macros"] } clap = { version = "4.3.19", features = ["derive", "deprecated"] } directories = "5.0.1" +futures = "0.3.28" humantime-serde = "1.1.1" mime_guess = "2.0.4" rust-embed = "6.8.1" diff --git a/migrations/1_commits.sql b/migrations/1_commits.sql new file mode 100644 index 0000000..2e8ada0 --- /dev/null +++ b/migrations/1_commits.sql @@ -0,0 +1,18 @@ +CREATE TABLE commits ( + hash TEXT NOT NULL PRIMARY KEY, + new INT NOT NULL DEFAULT 1 +) STRICT; + +CREATE TABLE commit_links ( + parent TEXT NOT NULL, + child TEXT NOT NULL, + PRIMARY KEY (parent, child), + FOREIGN KEY (parent) REFERENCES commits (hash) ON DELETE CASCADE, + FOREIGN KEY (child) REFERENCES commits (hash) ON DELETE CASCADE +) STRICT; + +CREATE TABLE branches ( + name TEXT NOT NULL PRIMARY KEY, + hash TEXT NOT NULL, + FOREIGN KEY (hash) REFERENCES commits (hash) ON DELETE CASCADE +) STRICT; diff --git a/src/update.rs b/src/update.rs index 63777dc..08616a7 100644 --- a/src/update.rs +++ b/src/update.rs @@ -1,19 +1,108 @@ //! Repeatedly update the db from the repo. -use tracing::{warn, debug}; +use std::collections::HashSet; + +use anyhow::anyhow; +use futures::TryStreamExt; +use gix::{ObjectId, Repository}; +use sqlx::{prelude::*, SqliteConnection, SqlitePool}; +use tracing::{debug, debug_span, error, Instrument}; use crate::state::AppState; +/// Add new commits from the repo to the database, marked as new. +/// +/// Starts at the known refs and advances depth-first until it hits a commit +/// that is already in the db. +/// +/// Uses a transaction because batch inserts in sqlite are a lot faster in +/// transactions. +// TODO Initialize tracked refs? +// TODO Update tracked refs? +async fn add_new_commits_to_db(db: &SqlitePool, repo: &Repository) -> anyhow::Result<()> { + debug!("Adding new commits to the db"); + let mut tx = db.begin().await?; + let conn = tx.acquire().await?; + + // Defer foreign key checks until the end of the transaction to improve + // insert performance. + sqlx::query!("PRAGMA defer_foreign_keys=1") + .execute(&mut *conn) + .await?; + + let commits = get_all_commits_from_db(&mut *conn).await?; + debug!("Loaded {} commits from the db", commits.len()); + + let mut references = vec![]; + for reference in repo.references()?.prefixed("refs")? { + let id: ObjectId = reference + .map_err(|e| anyhow!(e))? + .into_fully_peeled_id()? + .into(); + references.push(id); + } + debug!("Found {} refs in repo", references.len()); + + let new_commits = repo + .rev_walk(references) + .selected(|c| !commits.contains(c))? + .collect::, _>>()?; + debug!("Found {} new commits in repo", new_commits.len()); + + for commit in new_commits { + let hash = commit.id.to_string(); + sqlx::query!("INSERT OR IGNORE INTO commits (hash) VALUES (?)", hash) + .execute(&mut *conn) + .await?; + + for parent in commit.parent_ids() { + let parent_hash = parent.to_string(); + sqlx::query!( + "INSERT INTO commit_links (parent, child) VALUES (?, ?)", + parent_hash, + hash + ) + .execute(&mut *conn) + .await?; + } + } + + debug!("Finished adding new commits to the db"); + tx.commit().await?; + Ok(()) +} + +async fn get_all_commits_from_db(conn: &mut SqliteConnection) -> anyhow::Result> { + let hashes = sqlx::query!("SELECT hash FROM commits") + .fetch(conn) + .err_into::() + .and_then(|r| async move { r.hash.parse::().map_err(|e| e.into()) }) + .try_collect::>() + .await?; + + Ok(hashes) +} + async fn update_repo(state: &AppState) -> anyhow::Result<()> { - debug!("Updating repo"); + let repo = state.repo.to_thread_local(); + + add_new_commits_to_db(&state.db, &repo) + .instrument(debug_span!("add new commits")) + .await?; + Ok(()) } pub async fn repeatedly(state: AppState) { loop { - if let Err(e) = update_repo(&state).await { - warn!("Error while updating repo: {e:?}"); + async { + if let Err(e) = update_repo(&state).await { + error!("{e:?}"); + } } + .instrument(debug_span!("update repo")) + .await; + tokio::time::sleep(state.config.repo_update_delay).await; } }