diff --git a/Cargo.lock b/Cargo.lock index 5965313a..63ce28c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -263,11 +263,13 @@ dependencies = [ "once_cell", "semver", "serde", + "serde_json", "strum", "target-lexicon", "tempfile", "thiserror", "tokio", + "toml_edit", "tracing", "url", "windows 0.48.0", diff --git a/crates/bin/src/args.rs b/crates/bin/src/args.rs index fa3a1958..1f9f40b8 100644 --- a/crates/bin/src/args.rs +++ b/crates/bin/src/args.rs @@ -8,6 +8,7 @@ use std::{ }; use binstalk::{ + drivers::Registry, helpers::remote, manifests::cargo_toml_binstall::PkgFmt, ops::resolve::{CrateName, VersionReqExt}, @@ -222,6 +223,10 @@ pub struct Args { #[clap(help_heading = "Options", long, alias = "roots")] pub root: Option, + /// The URL of the registry index to use + #[clap(help_heading = "Options", long)] + pub index: Option, + /// This option will be passed through to all `cargo-install` invocations. /// /// It will require `Cargo.lock` to be up to date. diff --git a/crates/bin/src/entry.rs b/crates/bin/src/entry.rs index 3b7b2736..3ffa7026 100644 --- a/crates/bin/src/entry.rs +++ b/crates/bin/src/entry.rs @@ -127,7 +127,7 @@ pub fn install_crates( client, gh_api_client, jobserver_client, - crates_io_rate_limit: Default::default(), + registry: args.index.unwrap_or_default(), }); // Destruct args before any async function to reduce size of the future diff --git a/crates/binstalk-downloader/src/remote.rs b/crates/binstalk-downloader/src/remote.rs index 2de06d75..abda9d66 100644 --- a/crates/binstalk-downloader/src/remote.rs +++ b/crates/binstalk-downloader/src/remote.rs @@ -63,6 +63,13 @@ pub struct HttpError { err: reqwest::Error, } +impl HttpError { + /// Returns true if the error is from [`Response::error_for_status`]. + pub fn is_status(&self) -> bool { + self.err.is_status() + } +} + #[derive(Debug)] struct Inner { client: reqwest::Client, diff --git a/crates/binstalk/Cargo.toml b/crates/binstalk/Cargo.toml index cac37220..d55fa9ac 100644 --- a/crates/binstalk/Cargo.toml +++ b/crates/binstalk/Cargo.toml @@ -31,6 +31,7 @@ normalize-path = { version = "0.2.1", path = "../normalize-path" } once_cell = "1.18.0" semver = { version = "1.0.17", features = ["serde"] } serde = { version = "1.0.163", features = ["derive"] } +serde_json = "1.0.99" strum = "0.25.0" target-lexicon = { version = "0.12.8", features = ["std"] } tempfile = "3.5.0" @@ -41,6 +42,9 @@ tracing = "0.1.37" url = { version = "2.3.1", features = ["serde"] } xz2 = "0.1.7" +[dev-dependencies] +toml_edit = { version = "0.19.11", features = ["serde"] } + [target.'cfg(target_os = "windows")'.dependencies] windows = { version = "0.48.0", features = ["Win32_Storage_FileSystem", "Win32_Foundation"] } diff --git a/crates/binstalk/src/drivers.rs b/crates/binstalk/src/drivers.rs index 16d43f03..1f07c099 100644 --- a/crates/binstalk/src/drivers.rs +++ b/crates/binstalk/src/drivers.rs @@ -1,2 +1,8 @@ -mod crates_io; -pub use crates_io::fetch_crate_cratesio; +mod registry; +pub use registry::{ + fetch_crate_cratesio, CratesIoRateLimit, InvalidRegistryError, Registry, RegistryError, + SparseRegistry, +}; + +#[cfg(feature = "git")] +pub use registry::GitRegistry; diff --git a/crates/binstalk/src/drivers/registry.rs b/crates/binstalk/src/drivers/registry.rs new file mode 100644 index 00000000..2e137e17 --- /dev/null +++ b/crates/binstalk/src/drivers/registry.rs @@ -0,0 +1,252 @@ +use std::{str::FromStr, sync::Arc}; + +use cargo_toml::Manifest; +use compact_str::CompactString; +use leon::{ParseError, RenderError}; +use miette::Diagnostic; +use semver::VersionReq; +use serde_json::Error as JsonError; +use thiserror::Error as ThisError; + +use crate::{ + errors::BinstallError, + helpers::remote::{Client, Error as RemoteError, Url, UrlParseError}, + manifests::cargo_toml_binstall::Meta, +}; + +#[cfg(feature = "git")] +use crate::helpers::git::{GitUrl, GitUrlParseError}; + +mod vfs; + +mod visitor; + +mod common; +use common::*; + +#[cfg(feature = "git")] +mod git_registry; +#[cfg(feature = "git")] +pub use git_registry::GitRegistry; + +mod crates_io_registry; +pub use crates_io_registry::{fetch_crate_cratesio, CratesIoRateLimit}; + +mod sparse_registry; +pub use sparse_registry::SparseRegistry; + +#[derive(Debug, ThisError, Diagnostic)] +#[diagnostic(severity(error), code(binstall::cargo_registry))] +#[non_exhaustive] +pub enum RegistryError { + #[error(transparent)] + Remote(#[from] RemoteError), + + #[error("{0} is not found")] + #[diagnostic( + help("Check that the crate name you provided is correct.\nYou can also search for a matching crate at: https://lib.rs/search?q={0}") + )] + NotFound(CompactString), + + #[error(transparent)] + Json(#[from] JsonError), + + #[error("Failed to parse dl config: {0}")] + ParseDlConfig(#[from] ParseError), + + #[error("Failed to render dl config: {0}")] + RenderDlConfig(#[from] RenderError), +} + +#[derive(Clone, Debug)] +#[non_exhaustive] +pub enum Registry { + CratesIo(Arc), + + Sparse(Arc), + + #[cfg(feature = "git")] + Git(GitRegistry), +} + +impl Default for Registry { + fn default() -> Self { + Self::CratesIo(Default::default()) + } +} + +#[derive(Debug, ThisError)] +#[error("Invalid registry `{src}`, {inner}")] +pub struct InvalidRegistryError { + src: CompactString, + #[source] + inner: InvalidRegistryErrorInner, +} + +#[derive(Debug, ThisError)] +enum InvalidRegistryErrorInner { + #[cfg(feature = "git")] + #[error("failed to parse git url {0}")] + GitUrlParseErr(#[from] Box), + + #[error("failed to parse sparse registry url: {0}")] + UrlParseErr(#[from] UrlParseError), + + #[error("expected protocol http(s), actual protocl {0}")] + InvalidScheme(CompactString), + + #[cfg(not(feature = "git"))] + #[error("git registry not supported")] + GitRegistryNotSupported, +} + +impl Registry { + fn from_str_inner(s: &str) -> Result { + if let Some(s) = s.strip_prefix("sparse+") { + let url = Url::parse(s)?; + + let scheme = url.scheme(); + if scheme != "http" && scheme != "https" { + Err(InvalidRegistryErrorInner::InvalidScheme(scheme.into())) + } else { + Ok(Self::Sparse(Arc::new(SparseRegistry::new(url)))) + } + } else { + #[cfg(not(feature = "git"))] + { + Err(InvalidRegistryErrorInner::GitRegistryNotSupported) + } + #[cfg(feature = "git")] + { + let url = GitUrl::from_str(s).map_err(Box::new)?; + Ok(Self::Git(GitRegistry::new(url))) + } + } + } + + /// Fetch the latest crate with `crate_name` and with version matching + /// `version_req`. + pub async fn fetch_crate_matched( + &self, + client: Client, + crate_name: &str, + version_req: &VersionReq, + ) -> Result, BinstallError> { + match self { + Self::CratesIo(rate_limit) => { + fetch_crate_cratesio(client, crate_name, version_req, rate_limit).await + } + Self::Sparse(sparse_registry) => { + sparse_registry + .fetch_crate_matched(client, crate_name, version_req) + .await + } + #[cfg(feature = "git")] + Self::Git(git_registry) => { + git_registry + .fetch_crate_matched(client, crate_name, version_req) + .await + } + } + } +} + +impl FromStr for Registry { + type Err = InvalidRegistryError; + + fn from_str(s: &str) -> Result { + Self::from_str_inner(s).map_err(|inner| InvalidRegistryError { + src: s.into(), + inner, + }) + } +} + +#[cfg(test)] +mod test { + use std::time::Duration; + + use toml_edit::ser::to_string; + + use super::*; + + /// Mark this as an async fn so that you won't accidentally use it in + /// sync context. + async fn create_client() -> Client { + Client::new( + concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")), + None, + Duration::from_millis(10), + 1.try_into().unwrap(), + [], + ) + .unwrap() + } + + #[tokio::test] + async fn test_crates_io_sparse_registry() { + let client = create_client().await; + + let sparse_registry: Registry = "sparse+https://index.crates.io/".parse().unwrap(); + assert!( + matches!(sparse_registry, Registry::Sparse(_)), + "{:?}", + sparse_registry + ); + + let crate_name = "cargo-binstall"; + let version_req = &VersionReq::parse("=1.0.0").unwrap(); + let manifest_from_sparse = sparse_registry + .fetch_crate_matched(client.clone(), crate_name, version_req) + .await + .unwrap(); + + let manifest_from_cratesio_api = Registry::default() + .fetch_crate_matched(client, crate_name, version_req) + .await + .unwrap(); + + let serialized_manifest_from_sparse = to_string(&manifest_from_sparse).unwrap(); + let serialized_manifest_from_cratesio_api = to_string(&manifest_from_cratesio_api).unwrap(); + + assert_eq!( + serialized_manifest_from_sparse, + serialized_manifest_from_cratesio_api + ); + } + + #[cfg(feature = "git")] + #[tokio::test] + async fn test_crates_io_git_registry() { + let client = create_client().await; + + let git_registry: Registry = "https://github.com/rust-lang/crates.io-index" + .parse() + .unwrap(); + assert!( + matches!(git_registry, Registry::Git(_)), + "{:?}", + git_registry + ); + + let crate_name = "cargo-binstall"; + let version_req = &VersionReq::parse("=1.0.0").unwrap(); + let manifest_from_git = git_registry + .fetch_crate_matched(client.clone(), crate_name, version_req) + .await + .unwrap(); + + let manifest_from_cratesio_api = Registry::default() + .fetch_crate_matched(client, crate_name, version_req) + .await + .unwrap(); + + let serialized_manifest_from_git = to_string(&manifest_from_git).unwrap(); + let serialized_manifest_from_cratesio_api = to_string(&manifest_from_cratesio_api).unwrap(); + + assert_eq!( + serialized_manifest_from_git, + serialized_manifest_from_cratesio_api + ); + } +} diff --git a/crates/binstalk/src/drivers/registry/common.rs b/crates/binstalk/src/drivers/registry/common.rs new file mode 100644 index 00000000..54d64fa7 --- /dev/null +++ b/crates/binstalk/src/drivers/registry/common.rs @@ -0,0 +1,168 @@ +use std::{borrow::Cow, path::PathBuf}; + +use cargo_toml::Manifest; +use compact_str::{format_compact, CompactString, ToCompactString}; +use leon::{Template, Values}; +use semver::{Version, VersionReq}; +use serde::Deserialize; +use serde_json::Error as JsonError; +use tracing::debug; + +use crate::{ + drivers::registry::{visitor::ManifestVisitor, RegistryError}, + errors::BinstallError, + helpers::{ + download::Download, + remote::{Client, Url}, + }, + manifests::cargo_toml_binstall::{Meta, TarBasedFmt}, +}; + +#[derive(Deserialize)] +pub(super) struct RegistryConfig { + pub(super) dl: CompactString, +} + +pub(super) async fn parse_manifest( + client: Client, + crate_name: &str, + version: &str, + crate_url: Url, +) -> Result, BinstallError> { + debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); + + let manifest_dir_path: PathBuf = format!("{crate_name}-{version}").into(); + + let mut manifest_visitor = ManifestVisitor::new(manifest_dir_path); + + Download::new(client, crate_url) + .and_visit_tar(TarBasedFmt::Tgz, &mut manifest_visitor) + .await?; + + manifest_visitor.load_manifest() +} + +/// Return components of crate prefix +pub(super) fn crate_prefix_components( + crate_name: &str, +) -> Result<(CompactString, Option), RegistryError> { + let mut chars = crate_name.chars(); + + match (chars.next(), chars.next(), chars.next(), chars.next()) { + (None, None, None, None) => Err(RegistryError::NotFound(crate_name.into())), + (Some(_), None, None, None) => Ok((CompactString::new("1"), None)), + (Some(_), Some(_), None, None) => Ok((CompactString::new("2"), None)), + (Some(ch), Some(_), Some(_), None) => Ok(( + CompactString::new("3"), + Some(ch.to_lowercase().to_compact_string()), + )), + (Some(a), Some(b), Some(c), Some(d)) => Ok(( + format_compact!("{}{}", a.to_lowercase(), b.to_lowercase()), + Some(format_compact!("{}{}", c.to_lowercase(), d.to_lowercase())), + )), + _ => unreachable!(), + } +} + +pub(super) fn render_dl_template( + dl_template: &str, + crate_name: &str, + (c1, c2): &(CompactString, Option), + version: &str, + cksum: &str, +) -> Result { + let template = Template::parse(dl_template)?; + if template.keys().next().is_some() { + let mut crate_prefix = c1.clone(); + if let Some(c2) = c2 { + crate_prefix.push('/'); + crate_prefix.push_str(c2); + } + + struct Context<'a> { + crate_name: &'a str, + crate_prefix: CompactString, + crate_lowerprefix: String, + version: &'a str, + cksum: &'a str, + } + impl Values for Context<'_> { + fn get_value(&self, key: &str) -> Option> { + match key { + "crate" => Some(Cow::Borrowed(self.crate_name)), + "version" => Some(Cow::Borrowed(self.version)), + "prefix" => Some(Cow::Borrowed(&self.crate_prefix)), + "lowerprefix" => Some(Cow::Borrowed(&self.crate_lowerprefix)), + "sha256-checksum" => Some(Cow::Borrowed(self.cksum)), + _ => None, + } + } + } + Ok(template.render(&Context { + crate_name, + crate_lowerprefix: crate_prefix.to_lowercase(), + crate_prefix, + version, + cksum, + })?) + } else { + Ok(format!("{dl_template}/{crate_name}/{version}/download")) + } +} + +#[derive(Deserialize)] +pub(super) struct RegistryIndexEntry { + vers: CompactString, + yanked: bool, + cksum: CompactString, +} + +pub(super) struct MatchedVersion { + pub(super) version: CompactString, + pub(super) cksum: CompactString, +} + +impl MatchedVersion { + pub(super) fn find( + it: &mut dyn Iterator>, + version_req: &VersionReq, + ) -> Result { + let mut ret = Option::<(Self, Version)>::None; + + for res in it { + let entry = res.map_err(RegistryError::from)?; + + if entry.yanked { + continue; + } + + let num = entry.vers; + + // Parse out version + let Ok(ver) = Version::parse(&num) else { continue }; + + // Filter by version match + if !version_req.matches(&ver) { + continue; + } + + let matched = Self { + version: num, + cksum: entry.cksum, + }; + + if let Some((_, max_ver)) = &ret { + if ver > *max_ver { + ret = Some((matched, ver)); + } + } else { + ret = Some((matched, ver)); + } + } + + ret.map(|(num, _)| num) + .ok_or_else(|| BinstallError::VersionMismatch { + req: version_req.clone(), + }) + } +} diff --git a/crates/binstalk/src/drivers/crates_io.rs b/crates/binstalk/src/drivers/registry/crates_io_registry.rs similarity index 55% rename from crates/binstalk/src/drivers/crates_io.rs rename to crates/binstalk/src/drivers/registry/crates_io_registry.rs index 441a44a1..b2fa25ec 100644 --- a/crates/binstalk/src/drivers/crates_io.rs +++ b/crates/binstalk/src/drivers/registry/crates_io_registry.rs @@ -1,31 +1,49 @@ -use std::path::PathBuf; - +use binstalk_downloader::remote::Error as RemoteError; use cargo_toml::Manifest; use compact_str::{CompactString, ToCompactString}; use semver::{Comparator, Op as ComparatorOp, Version as SemVersion, VersionReq}; use serde::Deserialize; +use tokio::{ + sync::Mutex, + time::{interval, Duration, Interval, MissedTickBehavior}, +}; use tracing::debug; use crate::{ - errors::{BinstallError, CratesIoApiError}, - helpers::{ - download::Download, - remote::{Client, Url}, - }, - manifests::cargo_toml_binstall::{Meta, TarBasedFmt}, - ops::CratesIoRateLimit, + drivers::registry::{parse_manifest, RegistryError}, + errors::BinstallError, + helpers::remote::{Client, Url}, + manifests::cargo_toml_binstall::Meta, }; -mod vfs; +#[derive(Debug)] +pub struct CratesIoRateLimit(Mutex); -mod visitor; -use visitor::ManifestVisitor; +impl Default for CratesIoRateLimit { + fn default() -> Self { + let mut interval = interval(Duration::from_secs(1)); + // If somehow one tick is delayed, then next tick should be at least + // 1s later than the current tick. + // + // Other MissedTickBehavior including Burst (default), which will + // tick as fast as possible to catch up, and Skip, which will + // skip the current tick for the next one. + // + // Both Burst and Skip is not the expected behavior for rate limit: + // ticking as fast as possible would violate crates.io crawler + // policy, and skipping the current one will slow down the resolution + // process. + interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + Self(Mutex::new(interval)) + } +} -async fn is_crate_yanked( - client: &Client, - name: &str, - version: &str, -) -> Result { +impl CratesIoRateLimit { + pub(super) async fn tick(&self) { + self.0.lock().await.tick().await; + } +} +async fn is_crate_yanked(client: &Client, url: Url) -> Result { #[derive(Deserialize)] struct CrateInfo { version: Inner, @@ -39,29 +57,16 @@ async fn is_crate_yanked( // Fetch / update index debug!("Looking up crate information"); - let response = client - .get(Url::parse(&format!( - "https://crates.io/api/v1/crates/{name}/{version}" - ))?) - .send(true) - .await - .map_err(|err| { - BinstallError::CratesIoApi(Box::new(CratesIoApiError { - crate_name: name.into(), - err, - })) - })?; - - let info: CrateInfo = response.json().await?; + let info: CrateInfo = client.get(url).send(true).await?.json().await?; Ok(info.version.yanked) } async fn fetch_crate_cratesio_version_matched( client: &Client, - name: &str, + url: Url, version_req: &VersionReq, -) -> Result { +) -> Result, RemoteError> { #[derive(Deserialize)] struct CrateInfo { #[serde(rename = "crate")] @@ -87,22 +92,11 @@ async fn fetch_crate_cratesio_version_matched( // Fetch / update index debug!("Looking up crate information"); - let response = client - .get(Url::parse(&format!( - "https://crates.io/api/v1/crates/{name}" - ))?) - .send(true) - .await - .map_err(|err| { - BinstallError::CratesIoApi(Box::new(CratesIoApiError { - crate_name: name.into(), - err, - })) - })?; + let response = client.get(url).send(true).await?; let version = if version_req == &VersionReq::STAR { let crate_info: CrateInfo = response.json().await?; - crate_info.inner.max_stable_version + Some(crate_info.inner.max_stable_version) } else { let response: Versions = response.json().await?; response @@ -128,14 +122,9 @@ async fn fetch_crate_cratesio_version_matched( }) // Return highest version .max_by(|(_ver_str_x, ver_x), (_ver_str_y, ver_y)| ver_x.cmp(ver_y)) - .ok_or_else(|| BinstallError::VersionMismatch { - req: version_req.clone(), - })? - .0 + .map(|(ver_str, _)| ver_str) }; - debug!("Found information for crate version: '{version}'"); - Ok(version) } @@ -150,6 +139,8 @@ pub async fn fetch_crate_cratesio( // Wait until we can make another request to crates.io crates_io_rate_limit.tick().await; + let url = Url::parse(&format!("https://crates.io/api/v1/crates/{name}"))?; + let version = match version_req.comparators.as_slice() { [Comparator { op: ComparatorOp::Exact, @@ -167,29 +158,32 @@ pub async fn fetch_crate_cratesio( } .to_compact_string(); - if is_crate_yanked(&client, name, &version).await? { - return Err(BinstallError::VersionMismatch { - req: version_req.clone(), - }); - } + let mut url = url.clone(); + url.path_segments_mut().unwrap().push(&version); - version + is_crate_yanked(&client, url) + .await + .map(|yanked| (!yanked).then_some(version)) } - _ => fetch_crate_cratesio_version_matched(&client, name, version_req).await?, - }; + _ => fetch_crate_cratesio_version_matched(&client, url.clone(), version_req).await, + } + .map_err(|e| match e { + RemoteError::Http(e) if e.is_status() => RegistryError::NotFound(name.into()), + e => e.into(), + })? + .ok_or_else(|| BinstallError::VersionMismatch { + req: version_req.clone(), + })?; + + debug!("Found information for crate version: '{version}'"); // Download crate to temporary dir (crates.io or git?) - let crate_url = format!("https://crates.io/api/v1/crates/{name}/{version}/download"); + let mut crate_url = url; + crate_url + .path_segments_mut() + .unwrap() + .push(&version) + .push("download"); - debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); - - let manifest_dir_path: PathBuf = format!("{name}-{version}").into(); - - let mut manifest_visitor = ManifestVisitor::new(manifest_dir_path); - - Download::new(client, Url::parse(&crate_url)?) - .and_visit_tar(TarBasedFmt::Tgz, &mut manifest_visitor) - .await?; - - manifest_visitor.load_manifest() + parse_manifest(client, name, &version, crate_url).await } diff --git a/crates/binstalk/src/drivers/registry/git_registry.rs b/crates/binstalk/src/drivers/registry/git_registry.rs new file mode 100644 index 00000000..3922bbd0 --- /dev/null +++ b/crates/binstalk/src/drivers/registry/git_registry.rs @@ -0,0 +1,136 @@ +use std::{ + fs::File, + io::{self, BufReader, Read}, + path::PathBuf, + sync::Arc, +}; + +use cargo_toml::Manifest; +use compact_str::{CompactString, ToCompactString}; +use once_cell::sync::OnceCell; +use semver::VersionReq; +use serde_json::{from_slice as json_from_slice, Deserializer as JsonDeserializer}; +use tempfile::TempDir; +use tokio::task::spawn_blocking; +use url::Url; + +use crate::{ + drivers::registry::{ + crate_prefix_components, parse_manifest, render_dl_template, MatchedVersion, + RegistryConfig, RegistryError, + }, + errors::BinstallError, + helpers::{ + git::{GitUrl, Repository}, + remote::Client, + }, + manifests::cargo_toml_binstall::Meta, +}; + +#[derive(Debug)] +struct GitIndex { + path: TempDir, + dl_template: CompactString, +} + +impl GitIndex { + fn new(url: GitUrl) -> Result { + let tempdir = TempDir::new()?; + + Repository::shallow_clone(url, tempdir.as_ref())?; + + let mut v = Vec::with_capacity(100); + File::open(tempdir.as_ref().join("config.json"))?.read_to_end(&mut v)?; + + let config: RegistryConfig = json_from_slice(&v).map_err(RegistryError::from)?; + + Ok(Self { + path: tempdir, + dl_template: config.dl, + }) + } +} + +#[derive(Debug)] +struct GitRegistryInner { + url: GitUrl, + git_index: OnceCell, +} + +#[derive(Clone, Debug)] +pub struct GitRegistry(Arc); + +impl GitRegistry { + pub fn new(url: GitUrl) -> Self { + Self(Arc::new(GitRegistryInner { + url, + git_index: Default::default(), + })) + } + + /// WARNING: This is a blocking operation. + fn find_crate_matched_ver( + mut path: PathBuf, + crate_name: &str, + (c1, c2): &(CompactString, Option), + version_req: &VersionReq, + ) -> Result { + path.push(&**c1); + if let Some(c2) = c2 { + path.push(&**c2); + } + + path.push(&*crate_name.to_lowercase()); + + let f = File::open(path) + .map_err(|e| match e.kind() { + io::ErrorKind::NotFound => RegistryError::NotFound(crate_name.into()).into(), + _ => BinstallError::from(e), + }) + .map(BufReader::new)?; + + MatchedVersion::find( + &mut JsonDeserializer::from_reader(f).into_iter(), + version_req, + ) + } + + pub async fn fetch_crate_matched( + &self, + client: Client, + name: &str, + version_req: &VersionReq, + ) -> Result, BinstallError> { + let crate_prefix = crate_prefix_components(name)?; + let crate_name = name.to_compact_string(); + let version_req = version_req.clone(); + let this = self.clone(); + + let (version, dl_url) = spawn_blocking(move || { + let GitIndex { path, dl_template } = this + .0 + .git_index + .get_or_try_init(|| GitIndex::new(this.0.url.clone()))?; + + let MatchedVersion { version, cksum } = Self::find_crate_matched_ver( + path.as_ref().to_owned(), + &crate_name, + &crate_prefix, + &version_req, + )?; + + let url = Url::parse(&render_dl_template( + dl_template, + &crate_name, + &crate_prefix, + &version, + &cksum, + )?)?; + + Ok::<_, BinstallError>((version, url)) + }) + .await??; + + parse_manifest(client, name, &version, dl_url).await + } +} diff --git a/crates/binstalk/src/drivers/registry/sparse_registry.rs b/crates/binstalk/src/drivers/registry/sparse_registry.rs new file mode 100644 index 00000000..13acb057 --- /dev/null +++ b/crates/binstalk/src/drivers/registry/sparse_registry.rs @@ -0,0 +1,109 @@ +use cargo_toml::Manifest; +use compact_str::CompactString; +use semver::VersionReq; +use serde_json::Deserializer as JsonDeserializer; +use tokio::sync::OnceCell; +use url::Url; + +use crate::{ + drivers::registry::{ + crate_prefix_components, parse_manifest, render_dl_template, MatchedVersion, + RegistryConfig, RegistryError, + }, + errors::BinstallError, + helpers::remote::{Client, Error as RemoteError}, + manifests::cargo_toml_binstall::Meta, +}; + +#[derive(Debug)] +pub struct SparseRegistry { + url: Url, + dl_template: OnceCell, +} + +impl SparseRegistry { + /// * `url` - `url.cannot_be_a_base()` must be `false` + pub fn new(url: Url) -> Self { + Self { + url, + dl_template: Default::default(), + } + } + + async fn get_dl_template(&self, client: &Client) -> Result<&str, RegistryError> { + self.dl_template + .get_or_try_init(|| { + Box::pin(async { + let mut url = self.url.clone(); + url.path_segments_mut().unwrap().push("config.json"); + let config: RegistryConfig = client.get(url).send(true).await?.json().await?; + Ok(config.dl) + }) + }) + .await + .map(AsRef::as_ref) + } + + /// `url` must be a valid http(s) url. + async fn find_crate_matched_ver( + client: &Client, + mut url: Url, + crate_name: &str, + (c1, c2): &(CompactString, Option), + version_req: &VersionReq, + ) -> Result { + { + let mut path = url.path_segments_mut().unwrap(); + + path.push(c1); + if let Some(c2) = c2 { + path.push(c2); + } + + path.push(&crate_name.to_lowercase()); + } + + let body = client + .get(url) + .send(true) + .await + .map_err(|e| match e { + RemoteError::Http(e) if e.is_status() => RegistryError::NotFound(crate_name.into()), + e => e.into(), + })? + .bytes() + .await + .map_err(RegistryError::from)?; + MatchedVersion::find( + &mut JsonDeserializer::from_slice(&body).into_iter(), + version_req, + ) + } + + pub async fn fetch_crate_matched( + &self, + client: Client, + crate_name: &str, + version_req: &VersionReq, + ) -> Result, BinstallError> { + let crate_prefix = crate_prefix_components(crate_name)?; + let dl_template = self.get_dl_template(&client).await?; + let MatchedVersion { version, cksum } = Self::find_crate_matched_ver( + &client, + self.url.clone(), + crate_name, + &crate_prefix, + version_req, + ) + .await?; + let dl_url = Url::parse(&render_dl_template( + dl_template, + crate_name, + &crate_prefix, + &version, + &cksum, + )?)?; + + parse_manifest(client, crate_name, &version, dl_url).await + } +} diff --git a/crates/binstalk/src/drivers/crates_io/vfs.rs b/crates/binstalk/src/drivers/registry/vfs.rs similarity index 100% rename from crates/binstalk/src/drivers/crates_io/vfs.rs rename to crates/binstalk/src/drivers/registry/vfs.rs diff --git a/crates/binstalk/src/drivers/crates_io/visitor.rs b/crates/binstalk/src/drivers/registry/visitor.rs similarity index 100% rename from crates/binstalk/src/drivers/crates_io/visitor.rs rename to crates/binstalk/src/drivers/registry/visitor.rs diff --git a/crates/binstalk/src/errors.rs b/crates/binstalk/src/errors.rs index aa14d953..fa050304 100644 --- a/crates/binstalk/src/errors.rs +++ b/crates/binstalk/src/errors.rs @@ -15,15 +15,7 @@ use thiserror::Error; use tokio::task; use tracing::{error, warn}; -use crate::helpers::cargo_toml_workspace::LoadManifestFromWSError; - -#[derive(Debug, Error)] -#[error("crates.io API error for {crate_name}: {err}")] -pub struct CratesIoApiError { - pub crate_name: CompactString, - #[source] - pub err: RemoteError, -} +use crate::{drivers::RegistryError, helpers::cargo_toml_workspace::LoadManifestFromWSError}; #[derive(Debug, Error)] #[error("version string '{v}' is not semver: {err}")] @@ -145,15 +137,11 @@ pub enum BinstallError { /// /// This could either be a "not found" or a server/transport error. /// - /// - Code: `binstall::crates_io_api` + /// - Code: `binstall::cargo_registry` /// - Exit: 76 #[error(transparent)] - #[diagnostic( - severity(error), - code(binstall::crates_io_api), - help("Check that the crate name you provided is correct.\nYou can also search for a matching crate at: https://lib.rs/search?q={}", .0.crate_name) - )] - CratesIoApi(#[from] Box), + #[diagnostic(transparent)] + RegistryError(#[from] Box), /// The override path to the cargo manifest is invalid or cannot be resolved. /// @@ -360,7 +348,7 @@ impl BinstallError { Download(_) => 68, SubProcess { .. } => 70, Io(_) => 74, - CratesIoApi { .. } => 76, + RegistryError { .. } => 76, CargoManifestPath => 77, CargoManifest { .. } => 78, VersionParse { .. } => 80, @@ -479,3 +467,9 @@ impl From for BinstallError { BinstallError::TargetTripleParseError(Box::new(e)) } } + +impl From for BinstallError { + fn from(e: RegistryError) -> Self { + BinstallError::RegistryError(Box::new(e)) + } +} diff --git a/crates/binstalk/src/helpers/git.rs b/crates/binstalk/src/helpers/git.rs index 1509182f..f4ebb85b 100644 --- a/crates/binstalk/src/helpers/git.rs +++ b/crates/binstalk/src/helpers/git.rs @@ -8,6 +8,8 @@ use tracing::debug; mod progress_tracing; use progress_tracing::TracingProgress; +pub use gix::url::parse::Error as GitUrlParseError; + #[derive(Debug, ThisError)] #[non_exhaustive] pub enum GitError { @@ -43,7 +45,7 @@ impl From for GitError { pub struct GitUrl(Url); impl FromStr for GitUrl { - type Err = gix::url::parse::Error; + type Err = GitUrlParseError; fn from_str(s: &str) -> Result { Url::try_from(s).map(Self) diff --git a/crates/binstalk/src/helpers/remote.rs b/crates/binstalk/src/helpers/remote.rs index b03a9c3c..e794c744 100644 --- a/crates/binstalk/src/helpers/remote.rs +++ b/crates/binstalk/src/helpers/remote.rs @@ -1,4 +1,5 @@ pub use binstalk_downloader::remote::*; +pub use url::ParseError as UrlParseError; use binstalk_downloader::gh_api_client::{GhApiClient, GhReleaseArtifact, HasReleaseArtifact}; use tracing::{debug, warn}; diff --git a/crates/binstalk/src/ops.rs b/crates/binstalk/src/ops.rs index 382d3fc5..c28ee280 100644 --- a/crates/binstalk/src/ops.rs +++ b/crates/binstalk/src/ops.rs @@ -3,12 +3,9 @@ use std::{path::PathBuf, sync::Arc}; use semver::VersionReq; -use tokio::{ - sync::Mutex, - time::{interval, Duration, Interval, MissedTickBehavior}, -}; use crate::{ + drivers::Registry, fetchers::{Data, Fetcher, TargetData}, helpers::{ self, gh_api_client::GhApiClient, jobserver_client::LazyJobserverClient, remote::Client, @@ -51,32 +48,5 @@ pub struct Options { pub client: Client, pub gh_api_client: GhApiClient, pub jobserver_client: LazyJobserverClient, - pub crates_io_rate_limit: CratesIoRateLimit, -} - -pub struct CratesIoRateLimit(Mutex); - -impl Default for CratesIoRateLimit { - fn default() -> Self { - let mut interval = interval(Duration::from_secs(1)); - // If somehow one tick is delayed, then next tick should be at least - // 1s later than the current tick. - // - // Other MissedTickBehavior including Burst (default), which will - // tick as fast as possible to catch up, and Skip, which will - // skip the current tick for the next one. - // - // Both Burst and Skip is not the expected behavior for rate limit: - // ticking as fast as possible would violate crates.io crawler - // policy, and skipping the current one will slow down the resolution - // process. - interval.set_missed_tick_behavior(MissedTickBehavior::Delay); - Self(Mutex::new(interval)) - } -} - -impl CratesIoRateLimit { - pub(super) async fn tick(&self) { - self.0.lock().await.tick().await; - } + pub registry: Registry, } diff --git a/crates/binstalk/src/ops/resolve.rs b/crates/binstalk/src/ops/resolve.rs index 6bc87ad2..47b1d359 100644 --- a/crates/binstalk/src/ops/resolve.rs +++ b/crates/binstalk/src/ops/resolve.rs @@ -19,7 +19,6 @@ use tracing::{debug, info, instrument, warn}; use crate::{ bins, - drivers::fetch_crate_cratesio, errors::{BinstallError, VersionParseError}, fetchers::{Data, Fetcher, TargetData}, helpers::{self, download::ExtractedFiles, remote::Client, target_triple::TargetTriple}, @@ -379,12 +378,10 @@ impl PackageInfo { .await?? } None => { - Box::pin(fetch_crate_cratesio( - client, - &name, - version_req, - &opts.crates_io_rate_limit, - )) + Box::pin( + opts.registry + .fetch_crate_matched(client, &name, version_req), + ) .await? } };