Obey crates.io crawler policy: Only make one request per sec (#908)

Fixed https://github.com/taiki-e/install-action/issues/89

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
This commit is contained in:
Jiahao XU 2023-03-14 17:11:22 +11:00 committed by GitHub
parent f37796a55f
commit d118fa8fdb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 1 deletions

View file

@ -117,6 +117,7 @@ pub fn install_crates(
client, client,
gh_api_client, gh_api_client,
jobserver_client, jobserver_client,
crates_io_rate_limit: Default::default(),
}); });
// Destruct args before any async function to reduce size of the future // Destruct args before any async function to reduce size of the future

View file

@ -13,6 +13,7 @@ use crate::{
remote::{Client, Url}, remote::{Client, Url},
}, },
manifests::cargo_toml_binstall::{Meta, TarBasedFmt}, manifests::cargo_toml_binstall::{Meta, TarBasedFmt},
ops::CratesIoRateLimit,
}; };
mod vfs; mod vfs;
@ -48,7 +49,11 @@ pub async fn fetch_crate_cratesio(
client: Client, client: Client,
name: &str, name: &str,
version_req: &VersionReq, version_req: &VersionReq,
crates_io_rate_limit: &CratesIoRateLimit,
) -> Result<Manifest<Meta>, BinstallError> { ) -> Result<Manifest<Meta>, BinstallError> {
// Wait until we can make another request to crates.io
crates_io_rate_limit.tick().await;
// Fetch / update index // Fetch / update index
debug!("Looking up crate information"); debug!("Looking up crate information");

View file

@ -3,6 +3,10 @@
use std::{path::PathBuf, sync::Arc}; use std::{path::PathBuf, sync::Arc};
use semver::VersionReq; use semver::VersionReq;
use tokio::{
sync::Mutex,
time::{interval, Duration, Interval, MissedTickBehavior},
};
use crate::{ use crate::{
fetchers::{Data, Fetcher, TargetData}, fetchers::{Data, Fetcher, TargetData},
@ -37,4 +41,32 @@ pub struct Options {
pub client: Client, pub client: Client,
pub gh_api_client: GhApiClient, pub gh_api_client: GhApiClient,
pub jobserver_client: LazyJobserverClient, pub jobserver_client: LazyJobserverClient,
pub crates_io_rate_limit: CratesIoRateLimit,
}
pub struct CratesIoRateLimit(Mutex<Interval>);
impl Default for CratesIoRateLimit {
fn default() -> Self {
let mut interval = interval(Duration::from_secs(1));
// If somehow one tick is delayed, then next tick should be at least
// 1s later than the current tick.
//
// Other MissedTickBehavior including Burst (default), which will
// tick as fast as possible to catch up, and Skip, which will
// skip the current tick for the next one.
//
// Both Burst and Skip is not the expected behavior for rate limit:
// ticking as fast as possible would violate crates.io crawler
// policy, and skipping the current one will slow down the resolution
// process.
interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
Self(Mutex::new(interval))
}
}
impl CratesIoRateLimit {
pub(super) async fn tick(&self) {
self.0.lock().await.tick().await;
}
} }

View file

@ -354,7 +354,15 @@ impl PackageInfo {
// Fetch crate via crates.io, git, or use a local manifest path // Fetch crate via crates.io, git, or use a local manifest path
let manifest = match opts.manifest_path.as_ref() { let manifest = match opts.manifest_path.as_ref() {
Some(manifest_path) => load_manifest_path(manifest_path)?, Some(manifest_path) => load_manifest_path(manifest_path)?,
None => Box::pin(fetch_crate_cratesio(client, &name, version_req)).await?, None => {
Box::pin(fetch_crate_cratesio(
client,
&name,
version_req,
&opts.crates_io_rate_limit,
))
.await?
}
}; };
let Some(mut package) = manifest.package else { let Some(mut package) = manifest.package else {