Restructure db schema

The "commit_links" table is now called "commit_edges".

There is now a "metrics" table that run measurements have a foreign key
to. This provides canonical metric metadata and will speed up the
initial graph page (before any real data arrives). For now, it will be
overwritten with each new run, but more nuanced config options may be
added later.
This commit is contained in:
Joscha 2023-09-01 11:47:16 +02:00
parent 1bae83d116
commit a525e58211
18 changed files with 77 additions and 71 deletions

View file

@ -1,10 +1,10 @@
{
"db_name": "SQLite",
"query": "SELECT DISTINCT metric FROM run_measurements ORDER BY metric ASC",
"query": "SELECT name FROM metrics ORDER BY name ASC",
"describe": {
"columns": [
{
"name": "metric",
"name": "name",
"ordinal": 0,
"type_info": "Text"
}
@ -16,5 +16,5 @@
false
]
},
"hash": "35324f9148e3e7ce2d2aa62d69378ce28a2398b408bbc99ea1299315904d6673"
"hash": "249aa7a29ae6f1746b9c56ff4fbe9038a48ee5f32116a163af60617e0394de9c"
}

View file

@ -1,6 +1,6 @@
{
"db_name": "SQLite",
"query": "WITH RECURSIVE tracked (hash) AS ( SELECT hash FROM refs WHERE tracked UNION SELECT parent FROM commit_links JOIN tracked ON hash = child ), reachable (hash) AS ( SELECT hash FROM refs UNION SELECT hash FROM tracked UNION SELECT parent FROM commit_links JOIN reachable ON hash = child ) UPDATE commits SET reachable = CASE WHEN hash IN tracked THEN 2 WHEN hash IN reachable THEN 1 ELSE 0 END ",
"query": "WITH RECURSIVE tracked (hash) AS ( SELECT hash FROM refs WHERE tracked UNION SELECT parent FROM commit_edges JOIN tracked ON hash = child ), reachable (hash) AS ( SELECT hash FROM refs UNION SELECT hash FROM tracked UNION SELECT parent FROM commit_edges JOIN reachable ON hash = child ) UPDATE commits SET reachable = CASE WHEN hash IN tracked THEN 2 WHEN hash IN reachable THEN 1 ELSE 0 END ",
"describe": {
"columns": [],
"parameters": {
@ -8,5 +8,5 @@
},
"nullable": []
},
"hash": "2afda30451ececd424b4af3d8b106bdc99d72c8e3e0579cb6d3df8e66429bad1"
"hash": "32f0ac59687e5455bb38060dc1d6cd181a86cd6ca7ac2f583a33212c8b7bef1a"
}

View file

@ -1,6 +1,6 @@
{
"db_name": "SQLite",
"query": "SELECT hash, message, reachable FROM commits JOIN commit_links ON hash = child WHERE parent = ? ORDER BY reachable DESC, unixepoch(committer_date) ASC ",
"query": "SELECT hash, message, reachable FROM commits JOIN commit_edges ON hash = parent WHERE child = ? ORDER BY reachable DESC, unixepoch(committer_date) ASC ",
"describe": {
"columns": [
{
@ -28,5 +28,5 @@
false
]
},
"hash": "b8557950e066a215dd53ad69a031d487ab55f01c9c3a2a3a1bd5ffe1889d0e44"
"hash": "3514cb91d76683ccdf40ef732ae6062327d6b4aa1acd96632cb934b2d8495b9d"
}

View file

@ -1,6 +1,6 @@
{
"db_name": "SQLite",
"query": "SELECT child, parent FROM commit_links JOIN commits ON hash = child WHERE reachable = 2 ORDER BY hash ASC ",
"query": "SELECT child, parent FROM commit_edges JOIN commits ON hash = child WHERE reachable = 2 ORDER BY hash ASC ",
"describe": {
"columns": [
{
@ -22,5 +22,5 @@
false
]
},
"hash": "f9eefc496eb423eb9a73767d3ff17784fd2f8597021bc22657b9211825916dc2"
"hash": "5092d460f9bd489e2a5905c1ae3e3d3b3c946c427e1dd7d2d723f6aab5acd636"
}

View file

@ -1,12 +0,0 @@
{
"db_name": "SQLite",
"query": "INSERT INTO run_output ( id, idx, source, text ) VALUES (?, ?, ?, ?) ",
"describe": {
"columns": [],
"parameters": {
"Right": 4
},
"nullable": []
},
"hash": "6b74f746c36091274ad5477aad2d64a24e0f4a33d5b792394d39c82aca2487dd"
}

View file

@ -1,6 +1,6 @@
{
"db_name": "SQLite",
"query": "SELECT hash, author, committer_date AS \"committer_date: time::OffsetDateTime\", message FROM commits WHERE reachable = 2 ORDER BY hash ASC ",
"query": "SELECT hash, author, message, committer_date AS \"committer_date: OffsetDateTime\" FROM commits WHERE reachable = 2 ORDER BY hash ASC ",
"describe": {
"columns": [
{
@ -14,12 +14,12 @@
"type_info": "Text"
},
{
"name": "committer_date: time::OffsetDateTime",
"name": "message",
"ordinal": 2,
"type_info": "Text"
},
{
"name": "message",
"name": "committer_date: OffsetDateTime",
"ordinal": 3,
"type_info": "Text"
}
@ -34,5 +34,5 @@
false
]
},
"hash": "fa9d5c2db5228e513a4c50f1b85b6782312deef803ed6231658c6721a26a3639"
"hash": "6c766f5d504a19061a551b0b11b1558d08702ff3656331cb2595c9dc311cd870"
}

View file

@ -1,6 +1,6 @@
{
"db_name": "SQLite",
"query": "SELECT hash, message, reachable FROM commits JOIN commit_links ON hash = parent WHERE child = ? ORDER BY reachable DESC, unixepoch(committer_date) ASC ",
"query": "SELECT hash, message, reachable FROM commits JOIN commit_edges ON hash = child WHERE parent = ? ORDER BY reachable DESC, unixepoch(committer_date) ASC ",
"describe": {
"columns": [
{
@ -28,5 +28,5 @@
false
]
},
"hash": "3fae2fec2ed76f5ce7a8a3127280030e0adb25c62a1f30c7c97e435ee3d148f6"
"hash": "a6d9bfe6d0cd4677074341cebe6a3cc9c1ed2273ab238bd18a435abff4564c67"
}

View file

@ -1,6 +1,6 @@
{
"db_name": "SQLite",
"query": "SELECT source, text FROM run_output WHERE id = ? ORDER BY idx ASC ",
"query": "SELECT source, text FROM run_output WHERE id = ? ORDER BY line ASC ",
"describe": {
"columns": [
{
@ -22,5 +22,5 @@
false
]
},
"hash": "6ee70f3a692ecb2a4fadfdd28778d74f63be6af7977f73ddd647a2b86e78b49f"
"hash": "bf7e8565a6ddea9d894fbeef3c9e7ed5e71893b920f5f36e80a5e2ed65cc9bb9"
}

View file

@ -0,0 +1,12 @@
{
"db_name": "SQLite",
"query": "INSERT INTO run_output ( id, line, source, text ) VALUES (?, ?, ?, ?) ",
"describe": {
"columns": [],
"parameters": {
"Right": 4
},
"nullable": []
},
"hash": "c12f5a469c6ad64bcc01f6a095f27f703dba6e519e8e218bffac81c9b8b45b95"
}

View file

@ -1,6 +1,6 @@
{
"db_name": "SQLite",
"query": "INSERT OR IGNORE INTO commit_links (parent, child) VALUES (?, ?)",
"query": "INSERT OR IGNORE INTO commit_edges (parent, child) VALUES (?, ?)",
"describe": {
"columns": [],
"parameters": {
@ -8,5 +8,5 @@
},
"nullable": []
},
"hash": "2af3e5b2458b2674034853428f264190c2c1614d8df8dd831fd668c121786e88"
"hash": "ef93e50e17e9f73b07ce548fc92e8480a715819d25c3f880465f9b5c89a8780b"
}

View file

@ -1,2 +0,0 @@
CREATE INDEX idx_run_measurements_metric
ON run_measurements (metric);

View file

@ -1,5 +0,0 @@
CREATE INDEX idx_commits_hash_reachable
ON commits (hash, reachable);
CREATE INDEX idx_commit_links_child_parent
ON commit_links (child, parent);

View file

@ -9,23 +9,38 @@ CREATE TABLE commits (
new INT NOT NULL DEFAULT 1
) STRICT;
CREATE TABLE commit_links (
CREATE INDEX idx_commits_hash_reachable
ON commits (hash, reachable);
CREATE TABLE commit_edges (
child TEXT NOT NULL,
parent TEXT NOT NULL,
PRIMARY KEY (parent, child),
FOREIGN KEY (parent) REFERENCES commits (hash) ON DELETE CASCADE,
FOREIGN KEY (child) REFERENCES commits (hash) ON DELETE CASCADE
FOREIGN KEY (child) REFERENCES commits (hash) ON DELETE CASCADE
) STRICT;
CREATE INDEX idx_commit_edges_parent_child
ON commit_edges (parent, child);
CREATE INDEX idx_commit_edges_child_parent
ON commit_edges (child, parent);
CREATE TABLE refs (
name TEXT NOT NULL PRIMARY KEY,
hash TEXT NOT NULL,
tracked INT NOT NULL DEFAULT 0,
tracked INT NOT NULL DEFAULT 0,
FOREIGN KEY (hash) REFERENCES commits (hash) ON DELETE CASCADE
) STRICT;
CREATE TABLE metrics (
name TEXT NOT NULL PRIMARY KEY,
unit TEXT,
direction INT
) STRICT;
CREATE TABLE runs (
id TEXT NOT NULL PRIMARY KEY,
hash TEXT NOT NULL,
@ -46,16 +61,20 @@ CREATE TABLE run_measurements (
unit TEXT,
PRIMARY KEY (id, metric),
FOREIGN KEY (id) REFERENCES runs (id) ON DELETE CASCADE
FOREIGN KEY (id) REFERENCES runs (id) ON DELETE CASCADE,
FOREIGN KEY (metric) REFERENCES metrics (name) ON UPDATE CASCADE ON DELETE CASCADE
) STRICT;
CREATE INDEX idx_run_measurements_metric_id_value
ON run_measurements (metric, id, value);
CREATE TABLE run_output (
id TEXT NOT NULL,
idx INT NOT NULL,
line INT NOT NULL,
source INT NOT NULL,
text TEXT NOT NULL,
PRIMARY KEY (id, idx),
PRIMARY KEY (id, line),
FOREIGN KEY (id) REFERENCES runs (id) ON DELETE CASCADE
) STRICT;
@ -67,11 +86,5 @@ CREATE TABLE queue (
FOREIGN KEY (hash) REFERENCES commits (hash) ON DELETE CASCADE
) STRICT;
CREATE INDEX idx_commit_links_parent_child
ON commit_links (parent, child);
CREATE INDEX idx_queue_priority_date_hash
ON queue (priority DESC, unixepoch(date) DESC, hash ASC);
CREATE INDEX idx_run_measurements_metric_id_value
ON run_measurements (metric, id, value);

View file

@ -119,7 +119,7 @@ async fn insert_new_commits(
Ok(())
}
async fn insert_new_commit_links(
async fn insert_new_commit_edges(
conn: &mut SqliteConnection,
repo: &Repository,
new: &[ObjectId],
@ -132,7 +132,7 @@ async fn insert_new_commit_links(
// Commits *cough*linuxkernel*cough* may list the same parent
// multiple times, so we just ignore duplicates during insert.
sqlx::query!(
"INSERT OR IGNORE INTO commit_links (parent, child) VALUES (?, ?)",
"INSERT OR IGNORE INTO commit_edges (parent, child) VALUES (?, ?)",
parent,
child,
)
@ -142,7 +142,7 @@ async fn insert_new_commit_links(
// So the user has something to look at while importing big repos
if (i + 1) % 100000 == 0 {
info!("(2/2) Inserting links: {}/{}", i + 1, new.len());
info!("(2/2) Inserting edges: {}/{}", i + 1, new.len());
}
}
Ok(())
@ -214,7 +214,7 @@ async fn update_commit_tracked_status(conn: &mut SqliteConnection) -> somehow::R
tracked (hash) AS ( \
SELECT hash FROM refs WHERE tracked \
UNION \
SELECT parent FROM commit_links \
SELECT parent FROM commit_edges \
JOIN tracked ON hash = child \
), \
reachable (hash) AS ( \
@ -222,7 +222,7 @@ async fn update_commit_tracked_status(conn: &mut SqliteConnection) -> somehow::R
UNION \
SELECT hash FROM tracked \
UNION \
SELECT parent FROM commit_links \
SELECT parent FROM commit_edges \
JOIN reachable ON hash = child \
) \
UPDATE commits \
@ -273,7 +273,7 @@ pub async fn inner(db: &SqlitePool, repo: Repo) -> somehow::Result<()> {
// than if they were grouped by commit (insert commit and parents, then next
// commit and so on).
insert_new_commits(conn, &thread_local_repo, &new).await?;
insert_new_commit_links(conn, &thread_local_repo, &new).await?;
insert_new_commit_edges(conn, &thread_local_repo, &new).await?;
if repo_is_new {
mark_all_commits_as_old(conn).await?;
}

View file

@ -96,21 +96,21 @@ async fn save_work(
.await?;
}
for (idx, (source, text)) in run.output.into_iter().enumerate() {
// Hopefully we won't need more than 4294967296 output chunks per run :P
let idx = idx as u32;
for (line, (source, text)) in run.output.into_iter().enumerate() {
// Hopefully we won't need more than 4294967296 lines per run :P
let line = line as u32;
sqlx::query!(
"\
INSERT INTO run_output ( \
id, \
idx, \
line, \
source, \
text \
) \
VALUES (?, ?, ?, ?) \
",
run.id,
idx,
line,
source,
text,
)

View file

@ -50,7 +50,7 @@ pub async fn get_commit_by_hash(
let parents = sqlx::query!(
"\
SELECT hash, message, reachable FROM commits \
JOIN commit_links ON hash = parent \
JOIN commit_edges ON hash = parent \
WHERE child = ? \
ORDER BY reachable DESC, unixepoch(committer_date) ASC \
",
@ -64,7 +64,7 @@ pub async fn get_commit_by_hash(
let children = sqlx::query!(
"\
SELECT hash, message, reachable FROM commits \
JOIN commit_links ON hash = child \
JOIN commit_edges ON hash = child \
WHERE parent = ? \
ORDER BY reachable DESC, unixepoch(committer_date) ASC \
",

View file

@ -6,6 +6,7 @@ use futures::TryStreamExt;
use maud::html;
use serde::{Deserialize, Serialize};
use sqlx::{Acquire, SqlitePool};
use time::OffsetDateTime;
use crate::{
config::ServerConfig,
@ -55,10 +56,9 @@ pub async fn get_graph_metrics(
_path: PathGraphMetrics,
State(db): State<SqlitePool>,
) -> somehow::Result<impl IntoResponse> {
let metrics =
sqlx::query_scalar!("SELECT DISTINCT metric FROM run_measurements ORDER BY metric ASC")
.fetch_all(&db)
.await?;
let metrics = sqlx::query_scalar!("SELECT name FROM metrics ORDER BY name ASC")
.fetch_all(&db)
.await?;
Ok(Json(MetricsResponse {
data_id: 0, // TODO Implement
@ -96,8 +96,8 @@ pub async fn get_graph_commits(
SELECT \
hash, \
author, \
committer_date AS \"committer_date: time::OffsetDateTime\", \
message \
message, \
committer_date AS \"committer_date: OffsetDateTime\" \
FROM commits \
WHERE reachable = 2 \
ORDER BY hash ASC \
@ -124,7 +124,7 @@ pub async fn get_graph_commits(
let mut rows = sqlx::query!(
"\
SELECT child, parent \
FROM commit_links \
FROM commit_edges \
JOIN commits ON hash = child \
WHERE reachable = 2 \
ORDER BY hash ASC \

View file

@ -80,7 +80,7 @@ async fn from_finished_run(
"\
SELECT source, text FROM run_output \
WHERE id = ? \
ORDER BY idx ASC \
ORDER BY line ASC \
",
id,
)