Restructure db schema

The "commit_links" table is now called "commit_edges".

There is now a "metrics" table that run measurements have a foreign key
to. This provides canonical metric metadata and will speed up the
initial graph page (before any real data arrives). For now, it will be
overwritten with each new run, but more nuanced config options may be
added later.
This commit is contained in:
Joscha 2023-09-01 11:47:16 +02:00
parent 1bae83d116
commit a525e58211
18 changed files with 77 additions and 71 deletions

View file

@ -1,10 +1,10 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "SELECT DISTINCT metric FROM run_measurements ORDER BY metric ASC", "query": "SELECT name FROM metrics ORDER BY name ASC",
"describe": { "describe": {
"columns": [ "columns": [
{ {
"name": "metric", "name": "name",
"ordinal": 0, "ordinal": 0,
"type_info": "Text" "type_info": "Text"
} }
@ -16,5 +16,5 @@
false false
] ]
}, },
"hash": "35324f9148e3e7ce2d2aa62d69378ce28a2398b408bbc99ea1299315904d6673" "hash": "249aa7a29ae6f1746b9c56ff4fbe9038a48ee5f32116a163af60617e0394de9c"
} }

View file

@ -1,6 +1,6 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "WITH RECURSIVE tracked (hash) AS ( SELECT hash FROM refs WHERE tracked UNION SELECT parent FROM commit_links JOIN tracked ON hash = child ), reachable (hash) AS ( SELECT hash FROM refs UNION SELECT hash FROM tracked UNION SELECT parent FROM commit_links JOIN reachable ON hash = child ) UPDATE commits SET reachable = CASE WHEN hash IN tracked THEN 2 WHEN hash IN reachable THEN 1 ELSE 0 END ", "query": "WITH RECURSIVE tracked (hash) AS ( SELECT hash FROM refs WHERE tracked UNION SELECT parent FROM commit_edges JOIN tracked ON hash = child ), reachable (hash) AS ( SELECT hash FROM refs UNION SELECT hash FROM tracked UNION SELECT parent FROM commit_edges JOIN reachable ON hash = child ) UPDATE commits SET reachable = CASE WHEN hash IN tracked THEN 2 WHEN hash IN reachable THEN 1 ELSE 0 END ",
"describe": { "describe": {
"columns": [], "columns": [],
"parameters": { "parameters": {
@ -8,5 +8,5 @@
}, },
"nullable": [] "nullable": []
}, },
"hash": "2afda30451ececd424b4af3d8b106bdc99d72c8e3e0579cb6d3df8e66429bad1" "hash": "32f0ac59687e5455bb38060dc1d6cd181a86cd6ca7ac2f583a33212c8b7bef1a"
} }

View file

@ -1,6 +1,6 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "SELECT hash, message, reachable FROM commits JOIN commit_links ON hash = child WHERE parent = ? ORDER BY reachable DESC, unixepoch(committer_date) ASC ", "query": "SELECT hash, message, reachable FROM commits JOIN commit_edges ON hash = parent WHERE child = ? ORDER BY reachable DESC, unixepoch(committer_date) ASC ",
"describe": { "describe": {
"columns": [ "columns": [
{ {
@ -28,5 +28,5 @@
false false
] ]
}, },
"hash": "b8557950e066a215dd53ad69a031d487ab55f01c9c3a2a3a1bd5ffe1889d0e44" "hash": "3514cb91d76683ccdf40ef732ae6062327d6b4aa1acd96632cb934b2d8495b9d"
} }

View file

@ -1,6 +1,6 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "SELECT child, parent FROM commit_links JOIN commits ON hash = child WHERE reachable = 2 ORDER BY hash ASC ", "query": "SELECT child, parent FROM commit_edges JOIN commits ON hash = child WHERE reachable = 2 ORDER BY hash ASC ",
"describe": { "describe": {
"columns": [ "columns": [
{ {
@ -22,5 +22,5 @@
false false
] ]
}, },
"hash": "f9eefc496eb423eb9a73767d3ff17784fd2f8597021bc22657b9211825916dc2" "hash": "5092d460f9bd489e2a5905c1ae3e3d3b3c946c427e1dd7d2d723f6aab5acd636"
} }

View file

@ -1,12 +0,0 @@
{
"db_name": "SQLite",
"query": "INSERT INTO run_output ( id, idx, source, text ) VALUES (?, ?, ?, ?) ",
"describe": {
"columns": [],
"parameters": {
"Right": 4
},
"nullable": []
},
"hash": "6b74f746c36091274ad5477aad2d64a24e0f4a33d5b792394d39c82aca2487dd"
}

View file

@ -1,6 +1,6 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "SELECT hash, author, committer_date AS \"committer_date: time::OffsetDateTime\", message FROM commits WHERE reachable = 2 ORDER BY hash ASC ", "query": "SELECT hash, author, message, committer_date AS \"committer_date: OffsetDateTime\" FROM commits WHERE reachable = 2 ORDER BY hash ASC ",
"describe": { "describe": {
"columns": [ "columns": [
{ {
@ -14,12 +14,12 @@
"type_info": "Text" "type_info": "Text"
}, },
{ {
"name": "committer_date: time::OffsetDateTime", "name": "message",
"ordinal": 2, "ordinal": 2,
"type_info": "Text" "type_info": "Text"
}, },
{ {
"name": "message", "name": "committer_date: OffsetDateTime",
"ordinal": 3, "ordinal": 3,
"type_info": "Text" "type_info": "Text"
} }
@ -34,5 +34,5 @@
false false
] ]
}, },
"hash": "fa9d5c2db5228e513a4c50f1b85b6782312deef803ed6231658c6721a26a3639" "hash": "6c766f5d504a19061a551b0b11b1558d08702ff3656331cb2595c9dc311cd870"
} }

View file

@ -1,6 +1,6 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "SELECT hash, message, reachable FROM commits JOIN commit_links ON hash = parent WHERE child = ? ORDER BY reachable DESC, unixepoch(committer_date) ASC ", "query": "SELECT hash, message, reachable FROM commits JOIN commit_edges ON hash = child WHERE parent = ? ORDER BY reachable DESC, unixepoch(committer_date) ASC ",
"describe": { "describe": {
"columns": [ "columns": [
{ {
@ -28,5 +28,5 @@
false false
] ]
}, },
"hash": "3fae2fec2ed76f5ce7a8a3127280030e0adb25c62a1f30c7c97e435ee3d148f6" "hash": "a6d9bfe6d0cd4677074341cebe6a3cc9c1ed2273ab238bd18a435abff4564c67"
} }

View file

@ -1,6 +1,6 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "SELECT source, text FROM run_output WHERE id = ? ORDER BY idx ASC ", "query": "SELECT source, text FROM run_output WHERE id = ? ORDER BY line ASC ",
"describe": { "describe": {
"columns": [ "columns": [
{ {
@ -22,5 +22,5 @@
false false
] ]
}, },
"hash": "6ee70f3a692ecb2a4fadfdd28778d74f63be6af7977f73ddd647a2b86e78b49f" "hash": "bf7e8565a6ddea9d894fbeef3c9e7ed5e71893b920f5f36e80a5e2ed65cc9bb9"
} }

View file

@ -0,0 +1,12 @@
{
"db_name": "SQLite",
"query": "INSERT INTO run_output ( id, line, source, text ) VALUES (?, ?, ?, ?) ",
"describe": {
"columns": [],
"parameters": {
"Right": 4
},
"nullable": []
},
"hash": "c12f5a469c6ad64bcc01f6a095f27f703dba6e519e8e218bffac81c9b8b45b95"
}

View file

@ -1,6 +1,6 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "INSERT OR IGNORE INTO commit_links (parent, child) VALUES (?, ?)", "query": "INSERT OR IGNORE INTO commit_edges (parent, child) VALUES (?, ?)",
"describe": { "describe": {
"columns": [], "columns": [],
"parameters": { "parameters": {
@ -8,5 +8,5 @@
}, },
"nullable": [] "nullable": []
}, },
"hash": "2af3e5b2458b2674034853428f264190c2c1614d8df8dd831fd668c121786e88" "hash": "ef93e50e17e9f73b07ce548fc92e8480a715819d25c3f880465f9b5c89a8780b"
} }

View file

@ -1,2 +0,0 @@
CREATE INDEX idx_run_measurements_metric
ON run_measurements (metric);

View file

@ -1,5 +0,0 @@
CREATE INDEX idx_commits_hash_reachable
ON commits (hash, reachable);
CREATE INDEX idx_commit_links_child_parent
ON commit_links (child, parent);

View file

@ -9,23 +9,38 @@ CREATE TABLE commits (
new INT NOT NULL DEFAULT 1 new INT NOT NULL DEFAULT 1
) STRICT; ) STRICT;
CREATE TABLE commit_links ( CREATE INDEX idx_commits_hash_reachable
ON commits (hash, reachable);
CREATE TABLE commit_edges (
child TEXT NOT NULL, child TEXT NOT NULL,
parent TEXT NOT NULL, parent TEXT NOT NULL,
PRIMARY KEY (parent, child), PRIMARY KEY (parent, child),
FOREIGN KEY (parent) REFERENCES commits (hash) ON DELETE CASCADE, FOREIGN KEY (parent) REFERENCES commits (hash) ON DELETE CASCADE,
FOREIGN KEY (child) REFERENCES commits (hash) ON DELETE CASCADE FOREIGN KEY (child) REFERENCES commits (hash) ON DELETE CASCADE
) STRICT; ) STRICT;
CREATE INDEX idx_commit_edges_parent_child
ON commit_edges (parent, child);
CREATE INDEX idx_commit_edges_child_parent
ON commit_edges (child, parent);
CREATE TABLE refs ( CREATE TABLE refs (
name TEXT NOT NULL PRIMARY KEY, name TEXT NOT NULL PRIMARY KEY,
hash TEXT NOT NULL, hash TEXT NOT NULL,
tracked INT NOT NULL DEFAULT 0, tracked INT NOT NULL DEFAULT 0,
FOREIGN KEY (hash) REFERENCES commits (hash) ON DELETE CASCADE FOREIGN KEY (hash) REFERENCES commits (hash) ON DELETE CASCADE
) STRICT; ) STRICT;
CREATE TABLE metrics (
name TEXT NOT NULL PRIMARY KEY,
unit TEXT,
direction INT
) STRICT;
CREATE TABLE runs ( CREATE TABLE runs (
id TEXT NOT NULL PRIMARY KEY, id TEXT NOT NULL PRIMARY KEY,
hash TEXT NOT NULL, hash TEXT NOT NULL,
@ -46,16 +61,20 @@ CREATE TABLE run_measurements (
unit TEXT, unit TEXT,
PRIMARY KEY (id, metric), PRIMARY KEY (id, metric),
FOREIGN KEY (id) REFERENCES runs (id) ON DELETE CASCADE FOREIGN KEY (id) REFERENCES runs (id) ON DELETE CASCADE,
FOREIGN KEY (metric) REFERENCES metrics (name) ON UPDATE CASCADE ON DELETE CASCADE
) STRICT; ) STRICT;
CREATE INDEX idx_run_measurements_metric_id_value
ON run_measurements (metric, id, value);
CREATE TABLE run_output ( CREATE TABLE run_output (
id TEXT NOT NULL, id TEXT NOT NULL,
idx INT NOT NULL, line INT NOT NULL,
source INT NOT NULL, source INT NOT NULL,
text TEXT NOT NULL, text TEXT NOT NULL,
PRIMARY KEY (id, idx), PRIMARY KEY (id, line),
FOREIGN KEY (id) REFERENCES runs (id) ON DELETE CASCADE FOREIGN KEY (id) REFERENCES runs (id) ON DELETE CASCADE
) STRICT; ) STRICT;
@ -67,11 +86,5 @@ CREATE TABLE queue (
FOREIGN KEY (hash) REFERENCES commits (hash) ON DELETE CASCADE FOREIGN KEY (hash) REFERENCES commits (hash) ON DELETE CASCADE
) STRICT; ) STRICT;
CREATE INDEX idx_commit_links_parent_child
ON commit_links (parent, child);
CREATE INDEX idx_queue_priority_date_hash CREATE INDEX idx_queue_priority_date_hash
ON queue (priority DESC, unixepoch(date) DESC, hash ASC); ON queue (priority DESC, unixepoch(date) DESC, hash ASC);
CREATE INDEX idx_run_measurements_metric_id_value
ON run_measurements (metric, id, value);

View file

@ -119,7 +119,7 @@ async fn insert_new_commits(
Ok(()) Ok(())
} }
async fn insert_new_commit_links( async fn insert_new_commit_edges(
conn: &mut SqliteConnection, conn: &mut SqliteConnection,
repo: &Repository, repo: &Repository,
new: &[ObjectId], new: &[ObjectId],
@ -132,7 +132,7 @@ async fn insert_new_commit_links(
// Commits *cough*linuxkernel*cough* may list the same parent // Commits *cough*linuxkernel*cough* may list the same parent
// multiple times, so we just ignore duplicates during insert. // multiple times, so we just ignore duplicates during insert.
sqlx::query!( sqlx::query!(
"INSERT OR IGNORE INTO commit_links (parent, child) VALUES (?, ?)", "INSERT OR IGNORE INTO commit_edges (parent, child) VALUES (?, ?)",
parent, parent,
child, child,
) )
@ -142,7 +142,7 @@ async fn insert_new_commit_links(
// So the user has something to look at while importing big repos // So the user has something to look at while importing big repos
if (i + 1) % 100000 == 0 { if (i + 1) % 100000 == 0 {
info!("(2/2) Inserting links: {}/{}", i + 1, new.len()); info!("(2/2) Inserting edges: {}/{}", i + 1, new.len());
} }
} }
Ok(()) Ok(())
@ -214,7 +214,7 @@ async fn update_commit_tracked_status(conn: &mut SqliteConnection) -> somehow::R
tracked (hash) AS ( \ tracked (hash) AS ( \
SELECT hash FROM refs WHERE tracked \ SELECT hash FROM refs WHERE tracked \
UNION \ UNION \
SELECT parent FROM commit_links \ SELECT parent FROM commit_edges \
JOIN tracked ON hash = child \ JOIN tracked ON hash = child \
), \ ), \
reachable (hash) AS ( \ reachable (hash) AS ( \
@ -222,7 +222,7 @@ async fn update_commit_tracked_status(conn: &mut SqliteConnection) -> somehow::R
UNION \ UNION \
SELECT hash FROM tracked \ SELECT hash FROM tracked \
UNION \ UNION \
SELECT parent FROM commit_links \ SELECT parent FROM commit_edges \
JOIN reachable ON hash = child \ JOIN reachable ON hash = child \
) \ ) \
UPDATE commits \ UPDATE commits \
@ -273,7 +273,7 @@ pub async fn inner(db: &SqlitePool, repo: Repo) -> somehow::Result<()> {
// than if they were grouped by commit (insert commit and parents, then next // than if they were grouped by commit (insert commit and parents, then next
// commit and so on). // commit and so on).
insert_new_commits(conn, &thread_local_repo, &new).await?; insert_new_commits(conn, &thread_local_repo, &new).await?;
insert_new_commit_links(conn, &thread_local_repo, &new).await?; insert_new_commit_edges(conn, &thread_local_repo, &new).await?;
if repo_is_new { if repo_is_new {
mark_all_commits_as_old(conn).await?; mark_all_commits_as_old(conn).await?;
} }

View file

@ -96,21 +96,21 @@ async fn save_work(
.await?; .await?;
} }
for (idx, (source, text)) in run.output.into_iter().enumerate() { for (line, (source, text)) in run.output.into_iter().enumerate() {
// Hopefully we won't need more than 4294967296 output chunks per run :P // Hopefully we won't need more than 4294967296 lines per run :P
let idx = idx as u32; let line = line as u32;
sqlx::query!( sqlx::query!(
"\ "\
INSERT INTO run_output ( \ INSERT INTO run_output ( \
id, \ id, \
idx, \ line, \
source, \ source, \
text \ text \
) \ ) \
VALUES (?, ?, ?, ?) \ VALUES (?, ?, ?, ?) \
", ",
run.id, run.id,
idx, line,
source, source,
text, text,
) )

View file

@ -50,7 +50,7 @@ pub async fn get_commit_by_hash(
let parents = sqlx::query!( let parents = sqlx::query!(
"\ "\
SELECT hash, message, reachable FROM commits \ SELECT hash, message, reachable FROM commits \
JOIN commit_links ON hash = parent \ JOIN commit_edges ON hash = parent \
WHERE child = ? \ WHERE child = ? \
ORDER BY reachable DESC, unixepoch(committer_date) ASC \ ORDER BY reachable DESC, unixepoch(committer_date) ASC \
", ",
@ -64,7 +64,7 @@ pub async fn get_commit_by_hash(
let children = sqlx::query!( let children = sqlx::query!(
"\ "\
SELECT hash, message, reachable FROM commits \ SELECT hash, message, reachable FROM commits \
JOIN commit_links ON hash = child \ JOIN commit_edges ON hash = child \
WHERE parent = ? \ WHERE parent = ? \
ORDER BY reachable DESC, unixepoch(committer_date) ASC \ ORDER BY reachable DESC, unixepoch(committer_date) ASC \
", ",

View file

@ -6,6 +6,7 @@ use futures::TryStreamExt;
use maud::html; use maud::html;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sqlx::{Acquire, SqlitePool}; use sqlx::{Acquire, SqlitePool};
use time::OffsetDateTime;
use crate::{ use crate::{
config::ServerConfig, config::ServerConfig,
@ -55,10 +56,9 @@ pub async fn get_graph_metrics(
_path: PathGraphMetrics, _path: PathGraphMetrics,
State(db): State<SqlitePool>, State(db): State<SqlitePool>,
) -> somehow::Result<impl IntoResponse> { ) -> somehow::Result<impl IntoResponse> {
let metrics = let metrics = sqlx::query_scalar!("SELECT name FROM metrics ORDER BY name ASC")
sqlx::query_scalar!("SELECT DISTINCT metric FROM run_measurements ORDER BY metric ASC") .fetch_all(&db)
.fetch_all(&db) .await?;
.await?;
Ok(Json(MetricsResponse { Ok(Json(MetricsResponse {
data_id: 0, // TODO Implement data_id: 0, // TODO Implement
@ -96,8 +96,8 @@ pub async fn get_graph_commits(
SELECT \ SELECT \
hash, \ hash, \
author, \ author, \
committer_date AS \"committer_date: time::OffsetDateTime\", \ message, \
message \ committer_date AS \"committer_date: OffsetDateTime\" \
FROM commits \ FROM commits \
WHERE reachable = 2 \ WHERE reachable = 2 \
ORDER BY hash ASC \ ORDER BY hash ASC \
@ -124,7 +124,7 @@ pub async fn get_graph_commits(
let mut rows = sqlx::query!( let mut rows = sqlx::query!(
"\ "\
SELECT child, parent \ SELECT child, parent \
FROM commit_links \ FROM commit_edges \
JOIN commits ON hash = child \ JOIN commits ON hash = child \
WHERE reachable = 2 \ WHERE reachable = 2 \
ORDER BY hash ASC \ ORDER BY hash ASC \

View file

@ -80,7 +80,7 @@ async fn from_finished_run(
"\ "\
SELECT source, text FROM run_output \ SELECT source, text FROM run_output \
WHERE id = ? \ WHERE id = ? \
ORDER BY idx ASC \ ORDER BY line ASC \
", ",
id, id,
) )