job: retry on failure, general refactor
This commit is contained in:
parent
055a97a8e1
commit
fb8bf0f316
2 changed files with 70 additions and 30 deletions
|
@ -1,47 +1,88 @@
|
|||
use actix_rt::Arbiter;
|
||||
use actix_web::web;
|
||||
use async_trait::async_trait;
|
||||
use sqlx::Execute;
|
||||
use std::{cell::RefCell, fmt, sync::Mutex};
|
||||
use tokio::sync::mpsc::{channel, Receiver, Sender};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Weak;
|
||||
use std::time::Duration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::core::*;
|
||||
use crate::state::AppState;
|
||||
use crate::state::{AppState, State};
|
||||
|
||||
const MAX_RETRIES: u32 = 3;
|
||||
const ERROR_DELAY: Duration = Duration::from_secs(5 * 60);
|
||||
|
||||
/// A background job to be executed asynchronously.
|
||||
///
|
||||
/// Jobs run in a separate execution context, detached from the one that
|
||||
/// enqueued it (which is usually a request handler). This mechanism allows
|
||||
/// for fast response times to requests and automatic retries on failures
|
||||
/// such as network errors when a remote instance is currently offline.
|
||||
#[async_trait]
|
||||
pub trait Job: Send + Sync + fmt::Debug {
|
||||
async fn perform(&self) -> Result<()>;
|
||||
pub trait Job: Send + Sync + Serialize + Deserialize<'static> {
|
||||
/// Execute the errand.
|
||||
///
|
||||
/// This will be called by the scheduler in a background thread.
|
||||
/// If the result is an error, the scheduler may retry it again later.
|
||||
async fn perform(&self, state: &AppState) -> Result<()>;
|
||||
|
||||
/// Return this job's name.
|
||||
///
|
||||
/// Currently only used for logging.
|
||||
fn name(&self) -> &'static str;
|
||||
}
|
||||
|
||||
pub struct Scheduler {
|
||||
arbiter: Arbiter,
|
||||
sender: Sender<Box<dyn Job>>,
|
||||
counter: AtomicUsize,
|
||||
state: Weak<State>,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub fn new() -> Scheduler {
|
||||
let (tx, rx) = channel(32);
|
||||
let sched = Scheduler {
|
||||
pub fn new(state: Weak<State>) -> Scheduler {
|
||||
Scheduler {
|
||||
arbiter: Arbiter::new(),
|
||||
sender: tx,
|
||||
};
|
||||
|
||||
if !sched.arbiter.spawn(worker(rx)) {
|
||||
panic!("Job scheduler initialization failed");
|
||||
counter: AtomicUsize::new(1),
|
||||
state,
|
||||
}
|
||||
sched
|
||||
}
|
||||
|
||||
pub async fn schedule(&self, job: Box<dyn Job>) {
|
||||
self.sender.send(job).await.expect("Job send failed");
|
||||
}
|
||||
}
|
||||
|
||||
async fn worker(mut receiver: Receiver<Box<dyn Job>>) {
|
||||
while let Some(job) = receiver.recv().await {
|
||||
debug!(target: "job", "Starting job {}", job.name());
|
||||
if let Err(e) = job.perform().await {
|
||||
error!(target: "job", "Service {} failed: {}", job.name(), e);
|
||||
pub fn schedule<T>(&self, job: T)
|
||||
where
|
||||
T: 'static + Job,
|
||||
{
|
||||
let id = self.counter.fetch_add(1, Ordering::Relaxed);
|
||||
let state: AppState = web::Data::from(
|
||||
self.state
|
||||
.upgrade()
|
||||
.expect("Zombie scheduler (this is supposed to be impossible)"),
|
||||
);
|
||||
if !self.arbiter.spawn(worker(job, id, state)) {
|
||||
panic!("Arbiter has died");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn worker<T>(job: T, id: usize, state: AppState)
|
||||
where
|
||||
T: Job,
|
||||
{
|
||||
let name = job.name();
|
||||
for n in 1..=MAX_RETRIES {
|
||||
debug!(target: "job", "Starting job {name}#{id}");
|
||||
|
||||
if let Err(e) = job.perform(&state).await {
|
||||
warn!(target: "job", "Job {name}#{id} failed (try {n}/{MAX_RETRIES}): {e}");
|
||||
|
||||
if n == MAX_RETRIES {
|
||||
error!(target: "job", "Giving up on job {name}#{id} (retry limit reached)");
|
||||
} else {
|
||||
sleep(ERROR_DELAY).await;
|
||||
}
|
||||
} else {
|
||||
debug!(target: "job", "Job {name}#{id} succeeded");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,13 +12,12 @@ pub struct State {
|
|||
pub sched: Scheduler,
|
||||
}
|
||||
|
||||
pub type AppStateRaw = Arc<State>;
|
||||
pub type AppState = web::Data<AppStateRaw>;
|
||||
pub type AppState = web::Data<State>;
|
||||
|
||||
pub fn new(config: Config, db_pool: PgPool) -> AppState {
|
||||
web::Data::new(Arc::new(State {
|
||||
web::Data::from(Arc::new_cyclic(|ptr| State {
|
||||
config,
|
||||
repo: AppRepo::new(db_pool),
|
||||
sched: Scheduler::new(),
|
||||
sched: Scheduler::new(ptr.clone()),
|
||||
}))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue