Rename runners to workers

2023-08-11 02:03:32 +02:00 · 2023-08-11 02:03:32 +02:00 · 6f4793bcf2
commit 6f4793bcf2
parent 78f945647c
20 changed files with 233 additions and 237 deletions
--- a/src/worker/coordinator.rs
+++ b/src/worker/coordinator.rs
@ -0,0 +1,68 @@
+//! Coordinate performing runs across servers.
+
+use tokio::sync::mpsc;
+
+struct Server {
+    name: String,
+    poke: mpsc::UnboundedSender<()>,
+}
+
+pub struct Coordinator {
+    servers: Vec<Server>,
+    current: usize,
+}
+
+impl Coordinator {
+    pub fn new() -> Self {
+        Self {
+            servers: vec![],
+            current: 0,
+        }
+    }
+
+    pub fn register(&mut self, name: String, poke: mpsc::UnboundedSender<()>) {
+        self.servers.push(Server { name, poke });
+    }
+
+    pub fn active(&self, name: &str) -> bool {
+        if let Some(current) = self.servers.get(self.current) {
+            name == current.name
+        } else {
+            false
+        }
+    }
+
+    pub fn next(&mut self, name: &str) {
+        // Check just to prevent weird shenanigans
+        if !self.active(name) {
+            return;
+        }
+
+        // At least one server (the current one) must be registered according to
+        // the previous check
+        assert!(!self.servers.is_empty());
+
+        self.current += 1;
+        self.current %= self.servers.len();
+
+        // When the worker seeks work and a queue is idle, the next server
+        // should be queried immediately. Otherwise, we'd introduce lots of
+        // delay in the multi-server case were most queues are empty.
+        //
+        // However, if all server's queues were empty, this would generate a
+        // slippery cycle of requests that the worker sends as quickly as
+        // possible, only limited by the roundtrip time. Because we don't want
+        // this, we let the first task wait its full timeout. Effectively, this
+        // results in iterations starting at least the ping delay apart, which
+        // is pretty much what we want.
+        //
+        // The way this is implemented currently is sub-optimal however: If the
+        // chain takes even a fraction longer than the previous iteration, tasks
+        // will send two requests back-to-back: The first because their ping
+        // timeout ran out, and the second because they were poked. So far, I
+        // haven't been able to think of an elegant solution for this.
+        if self.current > 0 {
+            let _ = self.servers[self.current].poke.send(());
+        }
+    }
+}
--- a/src/worker/server.rs
+++ b/src/worker/server.rs
@ -0,0 +1,90 @@
+use std::sync::{Arc, Mutex};
+
+use reqwest::Client;
+use tokio::sync::mpsc;
+use tracing::{debug, info_span, warn, Instrument};
+
+use crate::{
+    config::{Config, WorkerServerConfig},
+    id,
+    shared::{WorkerRequest, WorkerStatus},
+    somehow,
+};
+
+use super::coordinator::Coordinator;
+
+pub struct Server {
+    name: String,
+    config: &'static Config,
+    server_config: &'static WorkerServerConfig,
+    coordinator: Arc<Mutex<Coordinator>>,
+    client: Client,
+    secret: String,
+}
+
+impl Server {
+    pub fn new(
+        name: String,
+        config: &'static Config,
+        server_config: &'static WorkerServerConfig,
+        coordinator: Arc<Mutex<Coordinator>>,
+    ) -> Self {
+        Self {
+            name,
+            config,
+            server_config,
+            coordinator,
+            client: Client::new(),
+            secret: id::random_worker_secret(),
+        }
+    }
+
+    pub async fn run(&mut self) {
+        let (poke_tx, mut poke_rx) = mpsc::unbounded_channel();
+        self.coordinator
+            .lock()
+            .unwrap()
+            .register(self.name.clone(), poke_tx.clone());
+
+        let name = self.name.clone();
+        async {
+            loop {
+                match self.ping().await {
+                    Ok(()) => {}
+                    Err(e) => warn!("Error talking to server:\n{e:?}"),
+                }
+
+                // Wait for poke or until the ping delay elapses. If we get
+                // poked while pinging the server, this will not wait and we'll
+                // immediately do another ping.
+                let _ = tokio::time::timeout(self.config.worker_ping_delay, poke_rx.recv()).await;
+
+                // Empty queue in case we were poked more than once. This can
+                // happen for example if we get poked multiple times while
+                // pinging the server.
+                while poke_rx.try_recv().is_ok() {}
+            }
+        }
+        .instrument(info_span!("worker", name))
+        .await;
+    }
+
+    async fn ping(&self) -> somehow::Result<()> {
+        debug!("Pinging");
+        let request = WorkerRequest {
+            info: None,
+            secret: self.secret.clone(),
+            status: WorkerStatus::Idle,
+            request_work: false,
+            submit_work: None,
+        };
+        let url = format!("{}api/worker/status", self.server_config.url);
+        self.client
+            .post(url)
+            .basic_auth(&self.config.worker_name, Some(&self.server_config.token))
+            .json(&request)
+            .send()
+            .await?;
+        Ok(())
+    }
+}