Insert new commits from repo

This commit is contained in:
Joscha 2023-08-05 00:07:52 +02:00
parent 56dd74b65f
commit 6651c72ea3
8 changed files with 197 additions and 4 deletions

View file

@ -0,0 +1,12 @@
{
"db_name": "SQLite",
"query": "PRAGMA defer_foreign_keys=1",
"describe": {
"columns": [],
"parameters": {
"Right": 0
},
"nullable": []
},
"hash": "5cb0b1fbc1e9d5e651d77d0c4389b84b54d120400174ca12383fd8d2e277b073"
}

View file

@ -0,0 +1,12 @@
{
"db_name": "SQLite",
"query": "INSERT INTO commit_links (parent, child) VALUES (?, ?)",
"describe": {
"columns": [],
"parameters": {
"Right": 2
},
"nullable": []
},
"hash": "99a988f87d9e1d9ca6b818d815bc343b1e65e9aa00751f1a530dfc696a94c6dd"
}

View file

@ -0,0 +1,12 @@
{
"db_name": "SQLite",
"query": "INSERT OR IGNORE INTO commits (hash) VALUES (?)",
"describe": {
"columns": [],
"parameters": {
"Right": 1
},
"nullable": []
},
"hash": "d06de282a27e84bae58b7ea73fce0f0e205e82b8c17406b4f613066a7f3979da"
}

View file

@ -0,0 +1,20 @@
{
"db_name": "SQLite",
"query": "SELECT hash FROM commits",
"describe": {
"columns": [
{
"name": "hash",
"ordinal": 0,
"type_info": "Text"
}
],
"parameters": {
"Right": 0
},
"nullable": [
false
]
},
"hash": "d34f2712f1db625ba6a7d1cfcd0ac6270913f153fc743ee8bd2335c2f5acf6c2"
}

29
Cargo.lock generated
View file

@ -691,6 +691,21 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "futures"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.28"
@ -735,6 +750,17 @@ version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
[[package]]
name = "futures-macro"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.28",
]
[[package]]
name = "futures-sink"
version = "0.3.28"
@ -753,8 +779,10 @@ version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
@ -2731,6 +2759,7 @@ dependencies = [
"axum",
"clap",
"directories",
"futures",
"gix",
"humantime-serde",
"mime_guess",

View file

@ -10,6 +10,7 @@ askama_axum = "0.3.0"
axum = { version = "0.6.19", features = ["macros"] }
clap = { version = "4.3.19", features = ["derive", "deprecated"] }
directories = "5.0.1"
futures = "0.3.28"
humantime-serde = "1.1.1"
mime_guess = "2.0.4"
rust-embed = "6.8.1"

18
migrations/1_commits.sql Normal file
View file

@ -0,0 +1,18 @@
CREATE TABLE commits (
hash TEXT NOT NULL PRIMARY KEY,
new INT NOT NULL DEFAULT 1
) STRICT;
CREATE TABLE commit_links (
parent TEXT NOT NULL,
child TEXT NOT NULL,
PRIMARY KEY (parent, child),
FOREIGN KEY (parent) REFERENCES commits (hash) ON DELETE CASCADE,
FOREIGN KEY (child) REFERENCES commits (hash) ON DELETE CASCADE
) STRICT;
CREATE TABLE branches (
name TEXT NOT NULL PRIMARY KEY,
hash TEXT NOT NULL,
FOREIGN KEY (hash) REFERENCES commits (hash) ON DELETE CASCADE
) STRICT;

View file

@ -1,19 +1,108 @@
//! Repeatedly update the db from the repo.
use tracing::{warn, debug};
use std::collections::HashSet;
use anyhow::anyhow;
use futures::TryStreamExt;
use gix::{ObjectId, Repository};
use sqlx::{prelude::*, SqliteConnection, SqlitePool};
use tracing::{debug, debug_span, error, Instrument};
use crate::state::AppState;
/// Add new commits from the repo to the database, marked as new.
///
/// Starts at the known refs and advances depth-first until it hits a commit
/// that is already in the db.
///
/// Uses a transaction because batch inserts in sqlite are a lot faster in
/// transactions.
// TODO Initialize tracked refs?
// TODO Update tracked refs?
async fn add_new_commits_to_db(db: &SqlitePool, repo: &Repository) -> anyhow::Result<()> {
debug!("Adding new commits to the db");
let mut tx = db.begin().await?;
let conn = tx.acquire().await?;
// Defer foreign key checks until the end of the transaction to improve
// insert performance.
sqlx::query!("PRAGMA defer_foreign_keys=1")
.execute(&mut *conn)
.await?;
let commits = get_all_commits_from_db(&mut *conn).await?;
debug!("Loaded {} commits from the db", commits.len());
let mut references = vec![];
for reference in repo.references()?.prefixed("refs")? {
let id: ObjectId = reference
.map_err(|e| anyhow!(e))?
.into_fully_peeled_id()?
.into();
references.push(id);
}
debug!("Found {} refs in repo", references.len());
let new_commits = repo
.rev_walk(references)
.selected(|c| !commits.contains(c))?
.collect::<Result<Vec<_>, _>>()?;
debug!("Found {} new commits in repo", new_commits.len());
for commit in new_commits {
let hash = commit.id.to_string();
sqlx::query!("INSERT OR IGNORE INTO commits (hash) VALUES (?)", hash)
.execute(&mut *conn)
.await?;
for parent in commit.parent_ids() {
let parent_hash = parent.to_string();
sqlx::query!(
"INSERT INTO commit_links (parent, child) VALUES (?, ?)",
parent_hash,
hash
)
.execute(&mut *conn)
.await?;
}
}
debug!("Finished adding new commits to the db");
tx.commit().await?;
Ok(())
}
async fn get_all_commits_from_db(conn: &mut SqliteConnection) -> anyhow::Result<HashSet<ObjectId>> {
let hashes = sqlx::query!("SELECT hash FROM commits")
.fetch(conn)
.err_into::<anyhow::Error>()
.and_then(|r| async move { r.hash.parse::<ObjectId>().map_err(|e| e.into()) })
.try_collect::<HashSet<_>>()
.await?;
Ok(hashes)
}
async fn update_repo(state: &AppState) -> anyhow::Result<()> {
debug!("Updating repo");
let repo = state.repo.to_thread_local();
add_new_commits_to_db(&state.db, &repo)
.instrument(debug_span!("add new commits"))
.await?;
Ok(())
}
pub async fn repeatedly(state: AppState) {
loop {
async {
if let Err(e) = update_repo(&state).await {
warn!("Error while updating repo: {e:?}");
error!("{e:?}");
}
}
.instrument(debug_span!("update repo"))
.await;
tokio::time::sleep(state.config.repo_update_delay).await;
}
}