Rename runners to workers
This commit is contained in:
parent
78f945647c
commit
6f4793bcf2
20 changed files with 233 additions and 237 deletions
68
src/worker/coordinator.rs
Normal file
68
src/worker/coordinator.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
//! Coordinate performing runs across servers.
|
||||
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
struct Server {
|
||||
name: String,
|
||||
poke: mpsc::UnboundedSender<()>,
|
||||
}
|
||||
|
||||
pub struct Coordinator {
|
||||
servers: Vec<Server>,
|
||||
current: usize,
|
||||
}
|
||||
|
||||
impl Coordinator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
servers: vec![],
|
||||
current: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(&mut self, name: String, poke: mpsc::UnboundedSender<()>) {
|
||||
self.servers.push(Server { name, poke });
|
||||
}
|
||||
|
||||
pub fn active(&self, name: &str) -> bool {
|
||||
if let Some(current) = self.servers.get(self.current) {
|
||||
name == current.name
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next(&mut self, name: &str) {
|
||||
// Check just to prevent weird shenanigans
|
||||
if !self.active(name) {
|
||||
return;
|
||||
}
|
||||
|
||||
// At least one server (the current one) must be registered according to
|
||||
// the previous check
|
||||
assert!(!self.servers.is_empty());
|
||||
|
||||
self.current += 1;
|
||||
self.current %= self.servers.len();
|
||||
|
||||
// When the worker seeks work and a queue is idle, the next server
|
||||
// should be queried immediately. Otherwise, we'd introduce lots of
|
||||
// delay in the multi-server case were most queues are empty.
|
||||
//
|
||||
// However, if all server's queues were empty, this would generate a
|
||||
// slippery cycle of requests that the worker sends as quickly as
|
||||
// possible, only limited by the roundtrip time. Because we don't want
|
||||
// this, we let the first task wait its full timeout. Effectively, this
|
||||
// results in iterations starting at least the ping delay apart, which
|
||||
// is pretty much what we want.
|
||||
//
|
||||
// The way this is implemented currently is sub-optimal however: If the
|
||||
// chain takes even a fraction longer than the previous iteration, tasks
|
||||
// will send two requests back-to-back: The first because their ping
|
||||
// timeout ran out, and the second because they were poked. So far, I
|
||||
// haven't been able to think of an elegant solution for this.
|
||||
if self.current > 0 {
|
||||
let _ = self.servers[self.current].poke.send(());
|
||||
}
|
||||
}
|
||||
}
|
||||
90
src/worker/server.rs
Normal file
90
src/worker/server.rs
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use reqwest::Client;
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::{debug, info_span, warn, Instrument};
|
||||
|
||||
use crate::{
|
||||
config::{Config, WorkerServerConfig},
|
||||
id,
|
||||
shared::{WorkerRequest, WorkerStatus},
|
||||
somehow,
|
||||
};
|
||||
|
||||
use super::coordinator::Coordinator;
|
||||
|
||||
pub struct Server {
|
||||
name: String,
|
||||
config: &'static Config,
|
||||
server_config: &'static WorkerServerConfig,
|
||||
coordinator: Arc<Mutex<Coordinator>>,
|
||||
client: Client,
|
||||
secret: String,
|
||||
}
|
||||
|
||||
impl Server {
|
||||
pub fn new(
|
||||
name: String,
|
||||
config: &'static Config,
|
||||
server_config: &'static WorkerServerConfig,
|
||||
coordinator: Arc<Mutex<Coordinator>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
name,
|
||||
config,
|
||||
server_config,
|
||||
coordinator,
|
||||
client: Client::new(),
|
||||
secret: id::random_worker_secret(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(&mut self) {
|
||||
let (poke_tx, mut poke_rx) = mpsc::unbounded_channel();
|
||||
self.coordinator
|
||||
.lock()
|
||||
.unwrap()
|
||||
.register(self.name.clone(), poke_tx.clone());
|
||||
|
||||
let name = self.name.clone();
|
||||
async {
|
||||
loop {
|
||||
match self.ping().await {
|
||||
Ok(()) => {}
|
||||
Err(e) => warn!("Error talking to server:\n{e:?}"),
|
||||
}
|
||||
|
||||
// Wait for poke or until the ping delay elapses. If we get
|
||||
// poked while pinging the server, this will not wait and we'll
|
||||
// immediately do another ping.
|
||||
let _ = tokio::time::timeout(self.config.worker_ping_delay, poke_rx.recv()).await;
|
||||
|
||||
// Empty queue in case we were poked more than once. This can
|
||||
// happen for example if we get poked multiple times while
|
||||
// pinging the server.
|
||||
while poke_rx.try_recv().is_ok() {}
|
||||
}
|
||||
}
|
||||
.instrument(info_span!("worker", name))
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn ping(&self) -> somehow::Result<()> {
|
||||
debug!("Pinging");
|
||||
let request = WorkerRequest {
|
||||
info: None,
|
||||
secret: self.secret.clone(),
|
||||
status: WorkerStatus::Idle,
|
||||
request_work: false,
|
||||
submit_work: None,
|
||||
};
|
||||
let url = format!("{}api/worker/status", self.server_config.url);
|
||||
self.client
|
||||
.post(url)
|
||||
.basic_auth(&self.config.worker_name, Some(&self.server_config.token))
|
||||
.json(&request)
|
||||
.send()
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue