From 083811856b656981dbb87708118aaea75a967ba9 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Wed, 5 Jun 2024 00:40:08 +1000 Subject: [PATCH] feature: Support private repository Signed-off-by: Jiahao XU --- crates/binstalk-fetchers/src/common.rs | 58 ++++++++----- crates/binstalk-fetchers/src/gh_crate_meta.rs | 81 +++++++++++++++---- crates/binstalk-fetchers/src/lib.rs | 54 +++++++++---- 3 files changed, 140 insertions(+), 53 deletions(-) diff --git a/crates/binstalk-fetchers/src/common.rs b/crates/binstalk-fetchers/src/common.rs index 6cc291d7..b3a491ae 100644 --- a/crates/binstalk-fetchers/src/common.rs +++ b/crates/binstalk-fetchers/src/common.rs @@ -8,7 +8,7 @@ pub(super) use binstalk_downloader::{ remote::{Client, Url}, }; pub(super) use binstalk_git_repo_api::gh_api_client::GhApiClient; -use binstalk_git_repo_api::gh_api_client::{GhApiError, GhReleaseArtifact}; +use binstalk_git_repo_api::gh_api_client::{GhApiError, GhReleaseArtifact, GhReleaseArtifactUrl}; pub(super) use binstalk_types::cargo_toml_binstall::{PkgFmt, PkgMeta}; pub(super) use compact_str::CompactString; pub(super) use tokio::task::JoinHandle; @@ -16,6 +16,39 @@ pub(super) use tracing::{debug, instrument, warn}; use crate::FetchError; +static WARN_RATE_LIMIT_ONCE: Once = Once::new(); +static WARN_UNAUTHORIZED_ONCE: Once = Once::new(); + +pub(super) async fn get_gh_release_artifact_url( + gh_api_client: GhApiClient, + artifact: GhReleaseArtifact, +) -> Result, GhApiError> { + debug!("Using GitHub API to check for existence of artifact, which will also cache the API response"); + + // The future returned has the same size as a pointer + match gh_api_client.has_release_artifact(artifact).await { + Ok(ret) => Ok(ret), + Err(GhApiError::NotFound) => Ok(None), + + Err(GhApiError::RateLimit { retry_after }) => { + WARN_RATE_LIMIT_ONCE.call_once(|| { + warn!("Your GitHub API token (if any) has reached its rate limit and cannot be used again until {retry_after:?}, so we will fallback to HEAD/GET on the url."); + warn!("If you did not supply a github token, consider doing so: GitHub limits unauthorized users to 60 requests per hour per origin IP address."); + }); + Err(GhApiError::RateLimit { retry_after }) + } + Err(GhApiError::Unauthorized) => { + WARN_UNAUTHORIZED_ONCE.call_once(|| { + warn!("GitHub API somehow requires a token for the API access, so we will fallback to HEAD/GET on the url."); + warn!("Please consider supplying a token to cargo-binstall to speedup resolution."); + }); + Err(GhApiError::Unauthorized) + } + + Err(err) => Err(err), + } +} + /// This function returns a future where its size should be at most size of /// 2-4 pointers. pub(super) async fn does_url_exist( @@ -24,32 +57,15 @@ pub(super) async fn does_url_exist( url: &Url, ) -> Result { static GH_API_CLIENT_FAILED: AtomicBool = AtomicBool::new(false); - static WARN_RATE_LIMIT_ONCE: Once = Once::new(); - static WARN_UNAUTHORIZED_ONCE: Once = Once::new(); debug!("Checking for package at: '{url}'"); if !GH_API_CLIENT_FAILED.load(Relaxed) { if let Some(artifact) = GhReleaseArtifact::try_extract_from_url(url) { - debug!("Using GitHub API to check for existence of artifact, which will also cache the API response"); + match get_gh_release_artifact_url(gh_api_client, artifact).await { + Ok(ret) => return Ok(ret.is_some()), - // The future returned has the same size as a pointer - match gh_api_client.has_release_artifact(artifact).await { - Ok(Some(_)) => return Ok(true), - Ok(None) | Err(GhApiError::NotFound) => return Ok(false), - - Err(GhApiError::RateLimit { retry_after }) => { - WARN_RATE_LIMIT_ONCE.call_once(|| { - warn!("Your GitHub API token (if any) has reached its rate limit and cannot be used again until {retry_after:?}, so we will fallback to HEAD/GET on the url."); - warn!("If you did not supply a github token, consider doing so: GitHub limits unauthorized users to 60 requests per hour per origin IP address."); - }); - } - Err(GhApiError::Unauthorized) => { - WARN_UNAUTHORIZED_ONCE.call_once(|| { - warn!("GitHub API somehow requires a token for the API access, so we will fallback to HEAD/GET on the url."); - warn!("Please consider supplying a token to cargo-binstall to speedup resolution."); - }); - } + Err(GhApiError::RateLimit { .. }) | Err(GhApiError::Unauthorized) => {} Err(err) => return Err(err.into()), } diff --git a/crates/binstalk-fetchers/src/gh_crate_meta.rs b/crates/binstalk-fetchers/src/gh_crate_meta.rs index 68c66f00..015debbc 100644 --- a/crates/binstalk-fetchers/src/gh_crate_meta.rs +++ b/crates/binstalk-fetchers/src/gh_crate_meta.rs @@ -1,5 +1,6 @@ use std::{borrow::Cow, fmt, iter, path::Path, sync::Arc}; +use binstalk_git_repo_api::gh_api_client::{GhApiError, GhReleaseArtifact, GhReleaseArtifactUrl}; use compact_str::{CompactString, ToCompactString}; use either::Either; use leon::Template; @@ -31,6 +32,8 @@ struct Resolved { archive_suffix: Option, repo: Option, subcrate: Option, + gh_release_artifact_url: Option, + is_repo_private: bool, } impl GhCrateMeta { @@ -41,6 +44,7 @@ impl GhCrateMeta { pkg_url: &Template<'_>, repo: Option<&str>, subcrate: Option<&str>, + is_repo_private: bool, ) { let render_url = |ext| { let ctx = Context::from_data_with_repo( @@ -82,16 +86,39 @@ impl GhCrateMeta { let repo = repo.map(ToString::to_string); let subcrate = subcrate.map(ToString::to_string); let archive_suffix = ext.map(ToString::to_string); + let gh_release_artifact = GhReleaseArtifact::try_extract_from_url(&url); + async move { - Ok(does_url_exist(client, gh_api_client, &url) + debug!("Checking for package at: '{url}'"); + + let mut resolved = Resolved { + url: url.clone(), + pkg_fmt, + repo, + subcrate, + archive_suffix, + is_repo_private, + gh_release_artifact_url: None, + }; + + if let Some(artifact) = gh_release_artifact { + match get_gh_release_artifact_url(gh_api_client, artifact).await { + Ok(Some(artifact_url)) => { + resolved.gh_release_artifact_url = Some(artifact_url); + return Ok(Some(resolved)); + } + Ok(None) => return Ok(None), + + Err(GhApiError::RateLimit { .. }) => (), + Err(GhApiError::Unauthorized) if !is_repo_private => (), + + Err(err) => return Err(err.into()), + } + } + + Ok(Box::pin(client.remote_gettable(url)) .await? - .then_some(Resolved { - url, - pkg_fmt, - repo, - subcrate, - archive_suffix, - })) + .then_some(resolved)) } })); } @@ -118,10 +145,11 @@ impl super::Fetcher for GhCrateMeta { fn find(self: Arc) -> JoinHandle> { tokio::spawn(async move { - let info = self.data.get_repo_info(&self.client).await?.as_ref(); + let info = self.data.get_repo_info(&self.gh_api_client).await?; let repo = info.map(|info| &info.repo); let subcrate = info.and_then(|info| info.subcrate.as_deref()); + let is_repo_private = info.map(|info| info.is_private).unwrap_or_default(); let mut pkg_fmt = self.target_data.meta.pkg_fmt; @@ -230,13 +258,22 @@ impl super::Fetcher for GhCrateMeta { // basically cartesian product. // | for pkg_fmt in pkg_fmts.clone() { - this.launch_baseline_find_tasks(&resolver, pkg_fmt, &pkg_url, repo, subcrate); + this.launch_baseline_find_tasks( + &resolver, + pkg_fmt, + &pkg_url, + repo, + subcrate, + is_repo_private, + ); } } if let Some(resolved) = resolver.resolve().await? { debug!(?resolved, "Winning URL found!"); - self.resolution.set(resolved).unwrap(); // find() is called first + self.resolution + .set(resolved) + .expect("find() should be only called once"); Ok(true) } else { Ok(false) @@ -245,7 +282,10 @@ impl super::Fetcher for GhCrateMeta { } async fn fetch_and_extract(&self, dst: &Path) -> Result { - let resolved = self.resolution.get().unwrap(); // find() is called first + let resolved = self + .resolution + .get() + .expect("find() should be called once before fetch_and_extract()"); trace!(?resolved, "preparing to fetch"); let verifier = match (self.signature_policy, &self.target_data.meta.signing) { @@ -290,11 +330,18 @@ impl super::Fetcher for GhCrateMeta { "Downloading package", ); let mut data_verifier = verifier.data_verifier()?; - let files = Download::new_with_data_verifier( - self.client.clone(), - resolved.url.clone(), - data_verifier.as_mut(), - ) + let files = match resolved.gh_release_artifact_url.as_ref() { + Some(artifact_url) if resolved.is_repo_private => self + .gh_api_client + .download_artifact(artifact_url.clone()) + .await? + .with_data_verifier(data_verifier.as_mut()), + _ => Download::new_with_data_verifier( + self.client.clone(), + resolved.url.clone(), + data_verifier.as_mut(), + ), + } .and_extract(resolved.pkg_fmt, dst) .await?; trace!("validating signature (if any)"); diff --git a/crates/binstalk-fetchers/src/lib.rs b/crates/binstalk-fetchers/src/lib.rs index 1ed1ad8e..f40e1b4a 100644 --- a/crates/binstalk-fetchers/src/lib.rs +++ b/crates/binstalk-fetchers/src/lib.rs @@ -3,7 +3,7 @@ use std::{path::Path, sync::Arc}; use binstalk_downloader::{download::DownloadError, remote::Error as RemoteError}; -use binstalk_git_repo_api::gh_api_client::GhApiError; +use binstalk_git_repo_api::gh_api_client::{GhApiError, GhRepo}; use binstalk_types::cargo_toml_binstall::SigningAlgorithm; use thiserror::Error as ThisError; use tokio::sync::OnceCell; @@ -144,6 +144,7 @@ struct RepoInfo { repo: Url, repository_host: RepositoryHost, subcrate: Option, + is_private: bool, } /// What to do about package signatures @@ -179,29 +180,52 @@ impl Data { } #[instrument(level = "debug")] - async fn get_repo_info(&self, client: &Client) -> Result<&Option, FetchError> { + async fn get_repo_info(&self, client: &GhApiClient) -> Result, FetchError> { self.repo_info .get_or_try_init(move || { Box::pin(async move { - if let Some(repo) = self.repo.as_deref() { - let mut repo = client.get_redirected_final_url(Url::parse(repo)?).await?; - let repository_host = RepositoryHost::guess_git_hosting_services(&repo); + let Some(repo) = self.repo.as_deref() else { + return Ok(None); + }; - let repo_info = RepoInfo { - subcrate: RepoInfo::detect_subcrate(&mut repo, repository_host), - repo, - repository_host, - }; + let mut repo = Url::parse(&repo)?; + let mut repository_host = RepositoryHost::guess_git_hosting_services(&repo); - debug!("Resolved repo_info = {repo_info:#?}"); - - Ok(Some(repo_info)) - } else { - Ok(None) + if repository_host == RepositoryHost::Unknown { + repo = client + .remote_client() + .get_redirected_final_url(repo) + .await?; + repository_host = RepositoryHost::guess_git_hosting_services(&repo); } + + let subcrate = RepoInfo::detect_subcrate(&mut repo, repository_host); + + let mut is_private = false; + if repository_host == RepositoryHost::GitHub && client.has_gh_token() { + if let Some(gh_repo) = GhRepo::try_extract_from_url(&repo) { + let Some(gh_repo_info) = client.get_repo_info(&gh_repo).await? else { + return Err(GhApiError::NotFound.into()); + }; + + is_private = gh_repo_info.is_private(); + } + } + + let repo_info = RepoInfo { + subcrate, + repo, + repository_host, + is_private, + }; + + debug!("Resolved repo_info = {repo_info:#?}"); + + Ok(Some(repo_info)) }) }) .await + .map(Option::as_ref) } }