Refactor repo update code again

This commit is contained in:
Joscha 2023-08-05 11:37:43 +02:00
parent 488ae76c72
commit b56d0df142
3 changed files with 77 additions and 74 deletions

View file

@ -1,7 +1,7 @@
mod config; mod config;
mod recurring;
mod state; mod state;
mod r#static; mod r#static;
mod update;
use std::{io, path::PathBuf}; use std::{io, path::PathBuf};
@ -132,7 +132,7 @@ async fn run() -> anyhow::Result<()> {
select! { select! {
_ = wait_for_signal() => {}, _ = wait_for_signal() => {},
_ = server.serve(app) => {}, _ = server.serve(app) => {},
_ = update::repeatedly(state.clone()) => {}, _ = recurring::run(state.clone()) => {},
} }
state.shut_down().await; state.shut_down().await;

29
src/recurring.rs Normal file
View file

@ -0,0 +1,29 @@
//! Recurring actions and updates.
// TODO `fetch` submodule for fetching new commits
// TODO `queue` submodule for updating the queue
mod repo;
use tracing::{debug_span, error, Instrument};
use crate::state::AppState;
async fn recurring_task(state: &AppState) {
let repo = state.repo.to_thread_local();
async {
if let Err(e) = repo::update(&state.db, &repo).await {
error!("Error updating repo:\n{e:?}");
};
}
.instrument(debug_span!("update repo"))
.await;
}
pub async fn run(state: AppState) {
loop {
recurring_task(&state).await;
tokio::time::sleep(state.config.repo.update_delay).await;
}
}

View file

@ -1,60 +1,13 @@
//! Repeatedly update the db from the repo. //! Add new commits to the database and update the tracked refs.
// TODO Think about whether ref hashes should be tracked in the db
use std::collections::HashSet; use std::collections::HashSet;
use anyhow::anyhow;
use futures::TryStreamExt; use futures::TryStreamExt;
use gix::{objs::Kind, traverse::commit::Info, ObjectId, Repository}; use gix::{objs::Kind, traverse::commit::Info, ObjectId, Repository};
use sqlx::{prelude::*, SqliteConnection, SqlitePool}; use sqlx::{Acquire, SqliteConnection, SqlitePool};
use tracing::{debug, debug_span, error, info, Instrument}; use tracing::{debug, info};
use crate::state::AppState;
/// Add new commits from the repo to the database, marked as new.
// TODO Rename so the name fits better (maybe rename update module to recurring as well?)
// upadte module -> recurring
// Fetching new commits -> fetch submodule
// Updating repo (current file) -> repo submodule
// Updating queue -> queue submodule
async fn add_new_commits_to_db(db: &SqlitePool, repo: &Repository) -> anyhow::Result<()> {
debug!("Adding new commits to db");
let mut tx = db.begin().await?;
let conn = tx.acquire().await?;
let old = get_all_commits_from_db(&mut *conn).await?;
debug!("Loaded {} commits from the db", old.len());
let new = get_new_commits_from_repo(repo, &old)?;
debug!("Found {} new commits in repo", new.len());
// Defer foreign key checks until the end of the transaction to improve
// insert performance.
sqlx::query!("PRAGMA defer_foreign_keys=1")
.execute(&mut *conn)
.await?;
// Inserts are grouped by table so sqlite can process them *a lot* faster
// than if they were grouped by commit (insert commit and parents, then next
// commit and so on).
insert_new_commits(conn, &new).await?;
insert_new_commit_links(conn, &new).await?;
debug!("Inserted {} new commits into db", new.len());
if old.is_empty() {
// We've never seen any repo before, so we need to do additional
// initialization.
info!("Detected new repo, initializing");
mark_all_commits_as_old(conn).await?;
track_main_branch(conn, repo).await?;
info!("Initialized new repo");
}
update_tracked_refs(conn, repo).await?;
tx.commit().await?;
debug!("Finished adding new commits to db");
Ok(())
}
async fn get_all_commits_from_db(conn: &mut SqliteConnection) -> anyhow::Result<HashSet<ObjectId>> { async fn get_all_commits_from_db(conn: &mut SqliteConnection) -> anyhow::Result<HashSet<ObjectId>> {
let hashes = sqlx::query!("SELECT hash FROM commits") let hashes = sqlx::query!("SELECT hash FROM commits")
@ -74,7 +27,7 @@ fn get_new_commits_from_repo(
// Collect all references starting with "refs" // Collect all references starting with "refs"
let mut all_references: Vec<ObjectId> = vec![]; let mut all_references: Vec<ObjectId> = vec![];
for reference in repo.references()?.prefixed("refs")? { for reference in repo.references()?.prefixed("refs")? {
let reference = reference.map_err(|e| anyhow!(e))?; let reference = reference.map_err(|e| anyhow::anyhow!(e))?;
let id = reference.into_fully_peeled_id()?; let id = reference.into_fully_peeled_id()?;
// Some repos *cough*linuxkernel*cough* have refs that don't point to // Some repos *cough*linuxkernel*cough* have refs that don't point to
@ -174,27 +127,48 @@ async fn update_tracked_refs(conn: &mut SqliteConnection, repo: &Repository) ->
Ok(()) Ok(())
} }
async fn update_repo(state: &AppState) -> anyhow::Result<()> { pub async fn update(db: &SqlitePool, repo: &Repository) -> anyhow::Result<()> {
info!("Updating repo"); debug!("Updating repo");
let repo = state.repo.to_thread_local(); let mut tx = db.begin().await?;
let conn = tx.acquire().await?;
add_new_commits_to_db(&state.db, &repo) let old = get_all_commits_from_db(&mut *conn).await?;
.instrument(debug_span!("add new commits")) debug!("Loaded {} commits from the db", old.len());
let repo_is_new = old.is_empty();
if repo_is_new {
info!("Initializing new repo");
}
let new = get_new_commits_from_repo(repo, &old)?;
debug!("Found {} new commits in repo", new.len());
// Defer foreign key checks until the end of the transaction to improve
// insert performance.
sqlx::query!("PRAGMA defer_foreign_keys=1")
.execute(&mut *conn)
.await?; .await?;
// Inserts are grouped by table so sqlite can process them *a lot* faster
// than if they were grouped by commit (insert commit and parents, then next
// commit and so on).
insert_new_commits(conn, &new).await?;
insert_new_commit_links(conn, &new).await?;
debug!("Inserted {} new commits into db", new.len());
if repo_is_new {
mark_all_commits_as_old(conn).await?;
track_main_branch(conn, repo).await?;
debug!("Prepared new repo");
}
update_tracked_refs(conn, repo).await?;
tx.commit().await?;
if repo_is_new {
info!("Initialized new repo");
}
debug!("Updated repo");
Ok(()) Ok(())
} }
pub async fn repeatedly(state: AppState) {
loop {
async {
if let Err(e) = update_repo(&state).await {
error!("Error updating repo:\n{e:?}");
}
}
.instrument(debug_span!("update repo"))
.await;
tokio::time::sleep(state.config.repo.update_delay).await;
}
}