diff --git a/src/runner.rs b/src/runner.rs index 3cce79b..d00ea54 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -25,16 +25,15 @@ impl Runner { return; } - let names = self.config.runner_servers.keys().cloned().collect(); - let coordinator = Arc::new(Mutex::new(Coordinator::new(names))); + let coordinator = Arc::new(Mutex::new(Coordinator::new())); let mut tasks = JoinSet::new(); - for (name, config) in self.config.runner_servers.iter() { + for (name, server_config) in self.config.runner_servers.iter() { debug!("Launching task for server {name}"); let mut server = Server::new( name.clone(), - config, - self.config.runner_ping_delay, + self.config, + server_config, coordinator.clone(), ); tasks.spawn(async move { server.run().await }); diff --git a/src/runner/coordinator.rs b/src/runner/coordinator.rs index 316ce12..943e570 100644 --- a/src/runner/coordinator.rs +++ b/src/runner/coordinator.rs @@ -1,26 +1,68 @@ //! Coordinate performing runs across servers. +use tokio::sync::mpsc; + +struct Server { + name: String, + poke: mpsc::UnboundedSender<()>, +} + pub struct Coordinator { - names: Vec, + servers: Vec, current: usize, } impl Coordinator { - pub fn new(mut names: Vec) -> Self { - assert!(!names.is_empty()); - names.sort_unstable(); - Self { names, current: 0 } + pub fn new() -> Self { + Self { + servers: vec![], + current: 0, + } + } + + pub fn register(&mut self, name: String, poke: mpsc::UnboundedSender<()>) { + self.servers.push(Server { name, poke }); } pub fn active(&self, name: &str) -> bool { - self.names[self.current] == name + if let Some(current) = self.servers.get(self.current) { + name == current.name + } else { + false + } } pub fn next(&mut self, name: &str) { // Check just to prevent weird shenanigans - if self.active(name) { - self.current += 1; - self.current %= self.names.len(); + if !self.active(name) { + return; + } + + // At least one server (the current one) must be registered according to + // the previous check + assert!(!self.servers.is_empty()); + + self.current += 1; + self.current %= self.servers.len(); + + // When the runner seeks work and a queue is idle, the next server + // should be queried immediately. Otherwise, we'd introduce lots of + // delay in the multi-server case were most queues are empty. + // + // However, if all server's queues were empty, this would generate a + // slippery cycle of requests that the runner sends as quickly as + // possible, only limited by the roundtrip time. Because we don't want + // this, we let the first task wait its full timeout. Effectively, this + // results in iterations starting at least the ping delay apart, which + // is pretty much what we want. + // + // The way this is implemented currently is sub-optimal however: If the + // chain takes even a fraction longer than the previous iteration, tasks + // will send two requests back-to-back: The first because their ping + // timeout ran out, and the second because they were poked. So far, I + // haven't been able to think of an elegant solution for this. + if self.current > 0 { + let _ = self.servers[self.current].poke.send(()); } } } diff --git a/src/runner/server.rs b/src/runner/server.rs index 7fe45c1..a0860c4 100644 --- a/src/runner/server.rs +++ b/src/runner/server.rs @@ -1,53 +1,44 @@ -use std::{ - sync::{Arc, Mutex}, - time::Duration, -}; +use std::sync::{Arc, Mutex}; -use time::OffsetDateTime; +use tokio::sync::mpsc; use tracing::{debug, info_span, warn, Instrument}; -use crate::{config::RunnerServerConfig, somehow}; +use crate::{ + config::{Config, RunnerServerConfig}, + somehow, +}; use super::coordinator::Coordinator; -enum RunState { - Preparing, - Running, - Finished, // TODO Include run results here -} - -struct Run { - id: String, - hash: String, - start: OffsetDateTime, - state: RunState, -} - pub struct Server { name: String, - config: &'static RunnerServerConfig, - ping_delay: Duration, + config: &'static Config, + server_config: &'static RunnerServerConfig, coordinator: Arc>, - run: Option>>, } impl Server { pub fn new( name: String, - config: &'static RunnerServerConfig, - ping_delay: Duration, + config: &'static Config, + server_config: &'static RunnerServerConfig, coordinator: Arc>, ) -> Self { Self { name, config, - ping_delay, + server_config, coordinator, - run: None, } } pub async fn run(&mut self) { + let (poke_tx, mut poke_rx) = mpsc::unbounded_channel(); + self.coordinator + .lock() + .unwrap() + .register(self.name.clone(), poke_tx.clone()); + let name = self.name.clone(); async { loop { @@ -55,7 +46,16 @@ impl Server { Ok(()) => {} Err(e) => warn!("Error talking to server:\n{e:?}"), } - tokio::time::sleep(self.ping_delay).await; + + // Wait for poke or until the ping delay elapses. If we get + // poked while pinging the server, this will not wait and we'll + // immediately do another ping. + let _ = tokio::time::timeout(self.config.runner_ping_delay, poke_rx.recv()).await; + + // Empty queue in case we were poked more than once. This can + // happen for example if we get poked multiple times while + // pinging the server. + while poke_rx.try_recv().is_ok() {} } } .instrument(info_span!("runner", name))