Poke server tasks in coordinator

2023-08-11 01:11:53 +02:00 · 2023-08-11 01:11:53 +02:00 · 33607779b0
commit 33607779b0
parent 7911a67906
3 changed files with 82 additions and 41 deletions
--- a/src/runner.rs
+++ b/src/runner.rs
@ -25,16 +25,15 @@ impl Runner {
            return;
        }

-        let names = self.config.runner_servers.keys().cloned().collect();
-        let coordinator = Arc::new(Mutex::new(Coordinator::new(names)));
+        let coordinator = Arc::new(Mutex::new(Coordinator::new()));

        let mut tasks = JoinSet::new();
-        for (name, config) in self.config.runner_servers.iter() {
+        for (name, server_config) in self.config.runner_servers.iter() {
            debug!("Launching task for server {name}");
            let mut server = Server::new(
                name.clone(),
-                config,
-                self.config.runner_ping_delay,
+                self.config,
+                server_config,
                coordinator.clone(),
            );
            tasks.spawn(async move { server.run().await });
--- a/src/runner/coordinator.rs
+++ b/src/runner/coordinator.rs
@ -1,26 +1,68 @@
 //! Coordinate performing runs across servers.

+use tokio::sync::mpsc;
+
+struct Server {
+    name: String,
+    poke: mpsc::UnboundedSender<()>,
+}
+
 pub struct Coordinator {
-    names: Vec<String>,
+    servers: Vec<Server>,
    current: usize,
 }

 impl Coordinator {
-    pub fn new(mut names: Vec<String>) -> Self {
-        assert!(!names.is_empty());
-        names.sort_unstable();
-        Self { names, current: 0 }
+    pub fn new() -> Self {
+        Self {
+            servers: vec![],
+            current: 0,
+        }
+    }
+
+    pub fn register(&mut self, name: String, poke: mpsc::UnboundedSender<()>) {
+        self.servers.push(Server { name, poke });
    }

    pub fn active(&self, name: &str) -> bool {
-        self.names[self.current] == name
+        if let Some(current) = self.servers.get(self.current) {
+            name == current.name
+        } else {
+            false
+        }
    }

    pub fn next(&mut self, name: &str) {
        // Check just to prevent weird shenanigans
-        if self.active(name) {
+        if !self.active(name) {
+            return;
+        }
+
+        // At least one server (the current one) must be registered according to
+        // the previous check
+        assert!(!self.servers.is_empty());
+
        self.current += 1;
-            self.current %= self.names.len();
+        self.current %= self.servers.len();
+
+        // When the runner seeks work and a queue is idle, the next server
+        // should be queried immediately. Otherwise, we'd introduce lots of
+        // delay in the multi-server case were most queues are empty.
+        //
+        // However, if all server's queues were empty, this would generate a
+        // slippery cycle of requests that the runner sends as quickly as
+        // possible, only limited by the roundtrip time. Because we don't want
+        // this, we let the first task wait its full timeout. Effectively, this
+        // results in iterations starting at least the ping delay apart, which
+        // is pretty much what we want.
+        //
+        // The way this is implemented currently is sub-optimal however: If the
+        // chain takes even a fraction longer than the previous iteration, tasks
+        // will send two requests back-to-back: The first because their ping
+        // timeout ran out, and the second because they were poked. So far, I
+        // haven't been able to think of an elegant solution for this.
+        if self.current > 0 {
+            let _ = self.servers[self.current].poke.send(());
        }
    }
 }
--- a/src/runner/server.rs
+++ b/src/runner/server.rs
@ -1,53 +1,44 @@
-use std::{
-    sync::{Arc, Mutex},
-    time::Duration,
-};
+use std::sync::{Arc, Mutex};

-use time::OffsetDateTime;
+use tokio::sync::mpsc;
 use tracing::{debug, info_span, warn, Instrument};

-use crate::{config::RunnerServerConfig, somehow};
+use crate::{
+    config::{Config, RunnerServerConfig},
+    somehow,
+};

 use super::coordinator::Coordinator;

-enum RunState {
-    Preparing,
-    Running,
-    Finished, // TODO Include run results here
-}
-
-struct Run {
-    id: String,
-    hash: String,
-    start: OffsetDateTime,
-    state: RunState,
-}
-
 pub struct Server {
    name: String,
-    config: &'static RunnerServerConfig,
-    ping_delay: Duration,
+    config: &'static Config,
+    server_config: &'static RunnerServerConfig,
    coordinator: Arc<Mutex<Coordinator>>,
-    run: Option<Arc<Mutex<Run>>>,
 }

 impl Server {
    pub fn new(
        name: String,
-        config: &'static RunnerServerConfig,
-        ping_delay: Duration,
+        config: &'static Config,
+        server_config: &'static RunnerServerConfig,
        coordinator: Arc<Mutex<Coordinator>>,
    ) -> Self {
        Self {
            name,
            config,
-            ping_delay,
+            server_config,
            coordinator,
-            run: None,
        }
    }

    pub async fn run(&mut self) {
+        let (poke_tx, mut poke_rx) = mpsc::unbounded_channel();
+        self.coordinator
+            .lock()
+            .unwrap()
+            .register(self.name.clone(), poke_tx.clone());
+
        let name = self.name.clone();
        async {
            loop {
@ -55,7 +46,16 @@ impl Server {
                    Ok(()) => {}
                    Err(e) => warn!("Error talking to server:\n{e:?}"),
                }
-                tokio::time::sleep(self.ping_delay).await;
+
+                // Wait for poke or until the ping delay elapses. If we get
+                // poked while pinging the server, this will not wait and we'll
+                // immediately do another ping.
+                let _ = tokio::time::timeout(self.config.runner_ping_delay, poke_rx.recv()).await;
+
+                // Empty queue in case we were poked more than once. This can
+                // happen for example if we get poked multiple times while
+                // pinging the server.
+                while poke_rx.try_recv().is_ok() {}
            }
        }
        .instrument(info_span!("runner", name))