Skip to content

Commit

Permalink
Add exponential backoff when rate limited
Browse files Browse the repository at this point in the history
  • Loading branch information
dimfeld committed Dec 4, 2023
1 parent f334fdc commit d4a8369
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.1.2

- Add exponential backoff when rate limited

## 0.1.1

- Add context length management features
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ clap = { version = "4.4.7", features = ["derive", "env", "string"] }
dirs = "5.0.1"
dotenvy = "0.15.7"
error-stack = "0.4.1"
fastrand = "2.0.1"
flume = "0.11.0"
itertools = "0.11.0"
liquid = "0.26.4"
Expand Down
1 change: 1 addition & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ mod model;
mod ollama;
mod openai;
mod option;
mod requests;
mod template;
#[cfg(test)]
mod tests;
Expand Down
18 changes: 11 additions & 7 deletions src/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ use error_stack::{Report, ResultExt};
use serde::Deserialize;
use serde_json::json;

use crate::model::{map_model_response_err, ModelComms, ModelError, ModelOptions};
use crate::{
model::{map_model_response_err, ModelComms, ModelError, ModelOptions},
requests::request_with_retry,
};

pub const OPENAI_HOST: &str = "https://api.openai.com";

Expand Down Expand Up @@ -98,12 +101,13 @@ pub fn send_chat_request(
body["max_tokens"] = json!(max_tokens);
}

let mut response: ChatCompletion = create_base_request(&options, "v1/chat/completions")
.timeout(Duration::from_secs(30))
.send_json(body)
.map_err(map_model_response_err)?
.into_json()
.change_context(ModelError::Deserialize)?;
let mut response: ChatCompletion = request_with_retry(
create_base_request(&options, "v1/chat/completions").timeout(Duration::from_secs(30)),
body,
)
.map_err(map_model_response_err)?
.into_json()
.change_context(ModelError::Deserialize)?;

// TODO streaming
let result = response
Expand Down
32 changes: 32 additions & 0 deletions src/requests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use serde::Serialize;

pub fn request_with_retry(
req: ureq::Request,
body: impl Serialize,
) -> Result<ureq::Response, ureq::Error> {
const MAX_TRIES: u32 = 4;
let mut try_num = 0;
let delay = 1000;
loop {
let response = req.clone().send_json(&body);
match response {
Ok(res) => return Ok(res),
Err(ureq::Error::Status(code, response)) => {
if code != 429 || try_num > MAX_TRIES {
return Err(ureq::Error::Status(code, response));
}

// This is potentially retryable. We don't do anything smart right now, just a
// random exponential backoff.

let perturb = fastrand::i32(-100..100);
let this_delay = 2i32.pow(try_num) * delay + perturb;

eprintln!("Rate limited... waiting {this_delay}ms to retry");
std::thread::sleep(std::time::Duration::from_millis(this_delay as u64));
try_num += 1;
}
e @ Err(_) => return e,
}
}
}

0 comments on commit d4a8369

Please sign in to comment.