diff --git a/.github/workflows/release-pr.yml b/.github/workflows/release-pr.yml index b5398fd0..9777ad89 100644 --- a/.github/workflows/release-pr.yml +++ b/.github/workflows/release-pr.yml @@ -12,6 +12,7 @@ on: - binstalk - binstalk-bins - binstalk-fetchers + - binstalk-git-repo-api - binstalk-registry - binstalk-manifests - binstalk-types diff --git a/Cargo.lock b/Cargo.lock index d8bcb685..bf75d89e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -252,6 +252,7 @@ dependencies = [ "binstalk-bins", "binstalk-downloader", "binstalk-fetchers", + "binstalk-git-repo-api", "binstalk-registry", "binstalk-types", "cargo-toml-workspace", @@ -309,10 +310,8 @@ dependencies = [ "httpdate", "ipconfig", "once_cell", - "percent-encoding", "reqwest", "serde", - "serde-tuple-vec-map", "serde_json", "tempfile", "thiserror", @@ -331,6 +330,7 @@ version = "0.4.1" dependencies = [ "async-trait", "binstalk-downloader", + "binstalk-git-repo-api", "binstalk-types", "bytes", "compact_str", @@ -348,6 +348,23 @@ dependencies = [ "url", ] +[[package]] +name = "binstalk-git-repo-api" +version = "0.0.0" +dependencies = [ + "binstalk-downloader", + "compact_str", + "percent-encoding", + "serde", + "serde-tuple-vec-map", + "serde_json", + "thiserror", + "tokio", + "tracing", + "tracing-subscriber", + "url", +] + [[package]] name = "binstalk-manifests" version = "0.13.0" @@ -4106,8 +4123,10 @@ dependencies = [ "serde", "serde_json", "sharded-slab", + "smallvec", "thread_local", "tracing-core", + "tracing-log", "tracing-serde", ] diff --git a/Cargo.toml b/Cargo.toml index 8fda04af..3ed0edbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "crates/fs-lock", "crates/normalize-path", "crates/detect-targets", + "crates/binstalk-git-repo-api", ] [profile.release] diff --git a/crates/binstalk-downloader/Cargo.toml b/crates/binstalk-downloader/Cargo.toml index f176a872..b7a55be1 100644 --- a/crates/binstalk-downloader/Cargo.toml +++ b/crates/binstalk-downloader/Cargo.toml @@ -43,9 +43,7 @@ reqwest = { version = "0.12.4", features = [ "brotli", "deflate", ], default-features = false } -percent-encoding = "2.2.0" serde = { version = "1.0.163", features = ["derive"], optional = true } -serde-tuple-vec-map = "1.0.1" serde_json = { version = "1.0.107", optional = true } # Use a fork here since we need PAX support, but the upstream # does not hav the PR merged yet. @@ -127,7 +125,6 @@ zstd-thin = ["zstd/thin"] cross-lang-fat-lto = ["zstd/fat-lto"] -gh-api-client = ["json"] json = ["serde", "serde_json"] [target."cfg(windows)".dependencies] diff --git a/crates/binstalk-downloader/src/download.rs b/crates/binstalk-downloader/src/download.rs index c1b7f59e..a2ee5cde 100644 --- a/crates/binstalk-downloader/src/download.rs +++ b/crates/binstalk-downloader/src/download.rs @@ -1,4 +1,4 @@ -use std::{fmt, io, marker::PhantomData, path::Path}; +use std::{fmt, io, path::Path}; use binstalk_types::cargo_toml_binstall::PkgFmtDecomposed; use bytes::Bytes; @@ -8,7 +8,7 @@ use tracing::{debug, error, instrument}; pub use binstalk_types::cargo_toml_binstall::{PkgFmt, TarBasedFmt}; -use crate::remote::{Client, Error as RemoteError, Url}; +use crate::remote::{Client, Error as RemoteError, Response, Url}; mod async_extracter; use async_extracter::*; @@ -90,38 +90,43 @@ impl DataVerifier for () { } } +#[derive(Debug)] +enum DownloadContent { + ToIssue { client: Client, url: Url }, + Response(Response), +} + +impl DownloadContent { + async fn into_response(self) -> Result { + Ok(match self { + DownloadContent::ToIssue { client, url } => client.get(url).send(true).await?, + DownloadContent::Response(response) => response, + }) + } +} + pub struct Download<'a> { - client: Client, - url: Url, + content: DownloadContent, data_verifier: Option<&'a mut dyn DataVerifier>, } impl fmt::Debug for Download<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - #[allow(dead_code, clippy::type_complexity)] - #[derive(Debug)] - struct Download<'a> { - client: &'a Client, - url: &'a Url, - data_verifier: Option>, - } - - fmt::Debug::fmt( - &Download { - client: &self.client, - url: &self.url, - data_verifier: self.data_verifier.as_ref().map(|_| PhantomData), - }, - f, - ) + fmt::Debug::fmt(&self.content, f) } } impl Download<'static> { pub fn new(client: Client, url: Url) -> Self { Self { - client, - url, + content: DownloadContent::ToIssue { client, url }, + data_verifier: None, + } + } + + pub fn from_response(response: Response) -> Self { + Self { + content: DownloadContent::Response(response), data_verifier: None, } } @@ -134,8 +139,24 @@ impl<'a> Download<'a> { data_verifier: &'a mut dyn DataVerifier, ) -> Self { Self { - client, - url, + content: DownloadContent::ToIssue { client, url }, + data_verifier: Some(data_verifier), + } + } + + pub fn from_response_with_data_verifier( + response: Response, + data_verifier: &'a mut dyn DataVerifier, + ) -> Self { + Self { + content: DownloadContent::Response(response), + data_verifier: Some(data_verifier), + } + } + + pub fn with_data_verifier(self, data_verifier: &mut dyn DataVerifier) -> Download<'_> { + Download { + content: self.content, data_verifier: Some(data_verifier), } } @@ -148,9 +169,10 @@ impl<'a> Download<'a> { > { let mut data_verifier = self.data_verifier; Ok(self - .client - .get_stream(self.url) + .content + .into_response() .await? + .bytes_stream() .map(move |res| { let bytes = res?; @@ -257,7 +279,7 @@ impl Download<'_> { #[instrument] pub async fn into_bytes(self) -> Result { - let bytes = self.client.get(self.url).send(true).await?.bytes().await?; + let bytes = self.content.into_response().await?.bytes().await?; if let Some(verifier) = self.data_verifier { verifier.update(&bytes); } diff --git a/crates/binstalk-downloader/src/gh_api_client.rs b/crates/binstalk-downloader/src/gh_api_client.rs deleted file mode 100644 index 92dac5f8..00000000 --- a/crates/binstalk-downloader/src/gh_api_client.rs +++ /dev/null @@ -1,506 +0,0 @@ -use std::{ - collections::HashMap, - ops::Deref, - sync::{ - atomic::{AtomicBool, Ordering::Relaxed}, - Arc, Mutex, RwLock, - }, - time::{Duration, Instant}, -}; - -use compact_str::CompactString; -use percent_encoding::{ - percent_decode_str, utf8_percent_encode, AsciiSet, PercentEncode, CONTROLS, -}; -use tokio::sync::OnceCell; - -use crate::remote; - -mod request; -pub use request::{GhApiContextError, GhApiError, GhGraphQLErrors}; - -/// default retry duration if x-ratelimit-reset is not found in response header -const DEFAULT_RETRY_DURATION: Duration = Duration::from_secs(10 * 60); - -fn percent_encode_http_url_path(path: &str) -> PercentEncode<'_> { - /// https://url.spec.whatwg.org/#fragment-percent-encode-set - const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); - - /// https://url.spec.whatwg.org/#path-percent-encode-set - const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}'); - - const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%'); - - // The backslash (\) character is treated as a path separator in special URLs - // so it needs to be additionally escaped in that case. - // - // http is considered to have special path. - const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\'); - - utf8_percent_encode(path, SPECIAL_PATH_SEGMENT) -} - -fn percent_decode_http_url_path(input: &str) -> CompactString { - if input.contains('%') { - percent_decode_str(input).decode_utf8_lossy().into() - } else { - // No '%', no need to decode. - CompactString::new(input) - } -} - -/// The keys required to identify a github release. -#[derive(Clone, Eq, PartialEq, Hash, Debug)] -pub struct GhRelease { - pub owner: CompactString, - pub repo: CompactString, - pub tag: CompactString, -} - -/// The Github Release and one of its artifact. -#[derive(Clone, Eq, PartialEq, Hash, Debug)] -pub struct GhReleaseArtifact { - pub release: GhRelease, - pub artifact_name: CompactString, -} - -impl GhReleaseArtifact { - /// Create [`GhReleaseArtifact`] from url. - pub fn try_extract_from_url(url: &remote::Url) -> Option { - if url.domain() != Some("github.com") { - return None; - } - - let mut path_segments = url.path_segments()?; - - let owner = path_segments.next()?; - let repo = path_segments.next()?; - - if (path_segments.next()?, path_segments.next()?) != ("releases", "download") { - return None; - } - - let tag = path_segments.next()?; - let artifact_name = path_segments.next()?; - - (path_segments.next().is_none() && url.fragment().is_none() && url.query().is_none()).then( - || Self { - release: GhRelease { - owner: percent_decode_http_url_path(owner), - repo: percent_decode_http_url_path(repo), - tag: percent_decode_http_url_path(tag), - }, - artifact_name: percent_decode_http_url_path(artifact_name), - }, - ) - } -} - -#[derive(Debug)] -struct Map(RwLock>>); - -impl Default for Map { - fn default() -> Self { - Self(Default::default()) - } -} - -impl Map -where - K: Eq + std::hash::Hash, - V: Default, -{ - fn get(&self, k: K) -> Arc { - let optional_value = self.0.read().unwrap().deref().get(&k).cloned(); - optional_value.unwrap_or_else(|| Arc::clone(self.0.write().unwrap().entry(k).or_default())) - } -} - -#[derive(Debug)] -struct Inner { - client: remote::Client, - release_artifacts: Map>>, - retry_after: Mutex>, - - auth_token: Option, - is_auth_token_valid: AtomicBool, -} - -/// Github API client for querying whether a release artifact exitsts. -/// Can only handle github.com for now. -#[derive(Clone, Debug)] -pub struct GhApiClient(Arc); - -impl GhApiClient { - pub fn new(client: remote::Client, auth_token: Option) -> Self { - Self(Arc::new(Inner { - client, - release_artifacts: Default::default(), - retry_after: Default::default(), - - auth_token, - is_auth_token_valid: AtomicBool::new(true), - })) - } -} - -enum FetchReleaseArtifactError { - Error(GhApiError), - RateLimit { retry_after: Instant }, - Unauthorized, -} - -impl GhApiClient { - async fn do_fetch_release_artifacts( - &self, - release: &GhRelease, - auth_token: Option<&str>, - ) -> Result, FetchReleaseArtifactError> { - use request::FetchReleaseRet::*; - use FetchReleaseArtifactError as Error; - - match request::fetch_release_artifacts(&self.0.client, release, auth_token).await { - Ok(ReleaseNotFound) => Ok(None), - Ok(Artifacts(artifacts)) => Ok(Some(artifacts)), - Ok(ReachedRateLimit { retry_after }) => { - let retry_after = retry_after.unwrap_or(DEFAULT_RETRY_DURATION); - - let now = Instant::now(); - let retry_after = now - .checked_add(retry_after) - .unwrap_or_else(|| now + DEFAULT_RETRY_DURATION); - - Err(Error::RateLimit { retry_after }) - } - Ok(Unauthorized) => Err(Error::Unauthorized), - Err(err) => Err(Error::Error(err)), - } - } - - /// The returned future is guaranteed to be pointer size. - pub async fn has_release_artifact( - &self, - GhReleaseArtifact { - release, - artifact_name, - }: GhReleaseArtifact, - ) -> Result { - use FetchReleaseArtifactError as Error; - - let once_cell = self.0.release_artifacts.get(release.clone()); - let res = once_cell - .get_or_try_init(|| { - Box::pin(async { - { - let mut guard = self.0.retry_after.lock().unwrap(); - - if let Some(retry_after) = *guard { - if retry_after.elapsed().is_zero() { - return Err(Error::RateLimit { retry_after }); - } else { - // Instant retry_after is already reached. - *guard = None; - } - }; - } - - if self.0.is_auth_token_valid.load(Relaxed) { - match self - .do_fetch_release_artifacts(&release, self.0.auth_token.as_deref()) - .await - { - Err(Error::Unauthorized) => { - self.0.is_auth_token_valid.store(false, Relaxed); - } - res => return res, - } - } - - self.do_fetch_release_artifacts(&release, None).await - }) - }) - .await; - - match res { - Ok(Some(artifacts)) => { - let has_artifact = artifacts.contains(&artifact_name); - Ok(if has_artifact { - HasReleaseArtifact::Yes - } else { - HasReleaseArtifact::No - }) - } - Ok(None) => Ok(HasReleaseArtifact::NoSuchRelease), - Err(Error::Unauthorized) => Ok(HasReleaseArtifact::Unauthorized), - Err(Error::RateLimit { retry_after }) => { - *self.0.retry_after.lock().unwrap() = Some(retry_after); - - Ok(HasReleaseArtifact::RateLimit { retry_after }) - } - Err(Error::Error(err)) => Err(err), - } - } -} - -#[derive(Eq, PartialEq, Copy, Clone, Debug)] -pub enum HasReleaseArtifact { - Yes, - No, - NoSuchRelease, - /// GitHub returns 401 requiring a token. - /// In this case, it makes sense to fallback to HEAD/GET. - Unauthorized, - - /// GitHub rate limit is applied per hour, so in case of reaching the rate - /// limit, [`GhApiClient`] will return this variant and let the user decide - /// what to do. - /// - /// Usually it is more sensible to fallback to directly HEAD/GET the - /// artifact url than waiting until `retry_after`. - /// - /// If you encounter this frequently, then you should consider getting an - /// authentication token (can be personal access or oath access token), - /// which should give you 5000 requests per hour per user. - /// - /// Rate limit for unauthorized user is 60 requests per hour per originating - /// IP address, so it is very easy to be rate limited. - RateLimit { - retry_after: Instant, - }, -} - -#[cfg(test)] -mod test { - use super::*; - use compact_str::{CompactString, ToCompactString}; - use std::{env, num::NonZeroU16}; - - mod cargo_binstall_v0_20_1 { - use super::{CompactString, GhRelease}; - - pub(super) const RELEASE: GhRelease = GhRelease { - owner: CompactString::new_inline("cargo-bins"), - repo: CompactString::new_inline("cargo-binstall"), - tag: CompactString::new_inline("v0.20.1"), - }; - - pub(super) const ARTIFACTS: &[&str] = &[ - "cargo-binstall-aarch64-apple-darwin.full.zip", - "cargo-binstall-aarch64-apple-darwin.zip", - "cargo-binstall-aarch64-pc-windows-msvc.full.zip", - "cargo-binstall-aarch64-pc-windows-msvc.zip", - "cargo-binstall-aarch64-unknown-linux-gnu.full.tgz", - "cargo-binstall-aarch64-unknown-linux-gnu.tgz", - "cargo-binstall-aarch64-unknown-linux-musl.full.tgz", - "cargo-binstall-aarch64-unknown-linux-musl.tgz", - "cargo-binstall-armv7-unknown-linux-gnueabihf.full.tgz", - "cargo-binstall-armv7-unknown-linux-gnueabihf.tgz", - "cargo-binstall-armv7-unknown-linux-musleabihf.full.tgz", - "cargo-binstall-armv7-unknown-linux-musleabihf.tgz", - "cargo-binstall-universal-apple-darwin.full.zip", - "cargo-binstall-universal-apple-darwin.zip", - "cargo-binstall-x86_64-apple-darwin.full.zip", - "cargo-binstall-x86_64-apple-darwin.zip", - "cargo-binstall-x86_64-pc-windows-msvc.full.zip", - "cargo-binstall-x86_64-pc-windows-msvc.zip", - "cargo-binstall-x86_64-unknown-linux-gnu.full.tgz", - "cargo-binstall-x86_64-unknown-linux-gnu.tgz", - "cargo-binstall-x86_64-unknown-linux-musl.full.tgz", - "cargo-binstall-x86_64-unknown-linux-musl.tgz", - ]; - } - - fn try_extract_artifact_from_str(s: &str) -> Option { - GhReleaseArtifact::try_extract_from_url(&url::Url::parse(s).unwrap()) - } - - fn assert_extract_gh_release_artifacts_failures(urls: &[&str]) { - for url in urls { - assert_eq!(try_extract_artifact_from_str(url), None); - } - } - - #[test] - fn extract_gh_release_artifacts_failure() { - use cargo_binstall_v0_20_1::*; - - let GhRelease { owner, repo, tag } = RELEASE; - - assert_extract_gh_release_artifacts_failures(&[ - "https://examle.com", - "https://github.com", - &format!("https://github.com/{owner}"), - &format!("https://github.com/{owner}/{repo}"), - &format!("https://github.com/{owner}/{repo}/123e"), - &format!("https://github.com/{owner}/{repo}/releases/21343"), - &format!("https://github.com/{owner}/{repo}/releases/download"), - &format!("https://github.com/{owner}/{repo}/releases/download/{tag}"), - &format!("https://github.com/{owner}/{repo}/releases/download/{tag}/a/23"), - &format!("https://github.com/{owner}/{repo}/releases/download/{tag}/a#a=12"), - &format!("https://github.com/{owner}/{repo}/releases/download/{tag}/a?page=3"), - ]); - } - - #[test] - fn extract_gh_release_artifacts_success() { - use cargo_binstall_v0_20_1::*; - - let GhRelease { owner, repo, tag } = RELEASE; - - for artifact in ARTIFACTS { - let GhReleaseArtifact { - release, - artifact_name, - } = try_extract_artifact_from_str(&format!( - "https://github.com/{owner}/{repo}/releases/download/{tag}/{artifact}" - )) - .unwrap(); - - assert_eq!(release, RELEASE); - assert_eq!(artifact_name, artifact); - } - } - - /// Mark this as an async fn so that you won't accidentally use it in - /// sync context. - async fn create_client() -> Vec { - let client = remote::Client::new( - concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")), - None, - NonZeroU16::new(10).unwrap(), - 1.try_into().unwrap(), - [], - ) - .unwrap(); - - let mut gh_clients = vec![GhApiClient::new(client.clone(), None)]; - - if let Ok(token) = env::var("GITHUB_TOKEN") { - gh_clients.push(GhApiClient::new(client, Some(token.into()))); - } - - gh_clients - } - - async fn test_specific_release(release: &GhRelease, artifacts: &[&str]) { - for client in create_client().await { - eprintln!("In client {client:?}"); - - for artifact_name in artifacts { - let ret = client - .has_release_artifact(GhReleaseArtifact { - release: release.clone(), - artifact_name: artifact_name.to_compact_string(), - }) - .await - .unwrap(); - - assert!( - matches!( - ret, - HasReleaseArtifact::Yes | HasReleaseArtifact::RateLimit { .. } - ), - "for '{artifact_name}': answer is {:#?}", - ret - ); - } - - let ret = client - .has_release_artifact(GhReleaseArtifact { - release: release.clone(), - artifact_name: "123z".to_compact_string(), - }) - .await - .unwrap(); - - assert!( - matches!( - ret, - HasReleaseArtifact::No | HasReleaseArtifact::RateLimit { .. } - ), - "ret = {:#?}", - ret - ); - } - } - - #[tokio::test] - async fn test_gh_api_client_cargo_binstall_v0_20_1() { - test_specific_release( - &cargo_binstall_v0_20_1::RELEASE, - cargo_binstall_v0_20_1::ARTIFACTS, - ) - .await - } - - #[tokio::test] - async fn test_gh_api_client_cargo_binstall_no_such_release() { - for client in create_client().await { - let release = GhRelease { - owner: "cargo-bins".to_compact_string(), - repo: "cargo-binstall".to_compact_string(), - // We are currently at v0.20.1 and we would never release - // anything older than v0.20.1 - tag: "v0.18.2".to_compact_string(), - }; - - let ret = client - .has_release_artifact(GhReleaseArtifact { - release, - artifact_name: "1234".to_compact_string(), - }) - .await - .unwrap(); - - assert!( - matches!( - ret, - HasReleaseArtifact::NoSuchRelease | HasReleaseArtifact::RateLimit { .. } - ), - "ret = {:#?}", - ret - ); - } - } - - mod cargo_audit_v_0_17_6 { - use super::*; - - const RELEASE: GhRelease = GhRelease { - owner: CompactString::new_inline("rustsec"), - repo: CompactString::new_inline("rustsec"), - tag: CompactString::new_inline("cargo-audit/v0.17.6"), - }; - - const ARTIFACTS: &[&str] = &[ - "cargo-audit-aarch64-unknown-linux-gnu-v0.17.6.tgz", - "cargo-audit-armv7-unknown-linux-gnueabihf-v0.17.6.tgz", - "cargo-audit-x86_64-apple-darwin-v0.17.6.tgz", - "cargo-audit-x86_64-pc-windows-msvc-v0.17.6.zip", - "cargo-audit-x86_64-unknown-linux-gnu-v0.17.6.tgz", - "cargo-audit-x86_64-unknown-linux-musl-v0.17.6.tgz", - ]; - - #[test] - fn extract_with_escaped_characters() { - let release_artifact = try_extract_artifact_from_str( -"https://github.com/rustsec/rustsec/releases/download/cargo-audit%2Fv0.17.6/cargo-audit-aarch64-unknown-linux-gnu-v0.17.6.tgz" - ).unwrap(); - - assert_eq!( - release_artifact, - GhReleaseArtifact { - release: RELEASE, - artifact_name: CompactString::from( - "cargo-audit-aarch64-unknown-linux-gnu-v0.17.6.tgz", - ) - } - ); - } - - #[tokio::test] - async fn test_gh_api_client_cargo_audit_v_0_17_6() { - test_specific_release(&RELEASE, ARTIFACTS).await - } - } -} diff --git a/crates/binstalk-downloader/src/gh_api_client/request.rs b/crates/binstalk-downloader/src/gh_api_client/request.rs deleted file mode 100644 index 59e0d140..00000000 --- a/crates/binstalk-downloader/src/gh_api_client/request.rs +++ /dev/null @@ -1,447 +0,0 @@ -use std::{ - borrow::Borrow, - collections::HashSet, - error, fmt, - hash::{Hash, Hasher}, - io, - sync::OnceLock, - time::Duration, -}; - -use compact_str::{CompactString, ToCompactString}; -use reqwest::{header::HeaderMap, StatusCode}; -use serde::{de::Deserializer, Deserialize, Serialize}; -use serde_json::to_string as to_json_string; -use thiserror::Error as ThisError; -use tracing::debug; -use url::Url; - -use super::{percent_encode_http_url_path, remote, GhRelease}; - -#[derive(ThisError, Debug)] -#[error("Context: '{context}', err: '{err}'")] -pub struct GhApiContextError { - context: CompactString, - #[source] - err: GhApiError, -} - -#[derive(ThisError, Debug)] -#[non_exhaustive] -pub enum GhApiError { - #[error("IO Error: {0}")] - Io(#[from] io::Error), - - #[error("Remote Error: {0}")] - Remote(#[from] remote::Error), - - #[error("Failed to parse url: {0}")] - InvalidUrl(#[from] url::ParseError), - - /// A wrapped error providing the context the error is about. - #[error(transparent)] - Context(Box), - - #[error("Remote failed to process GraphQL query: {0}")] - GraphQLErrors(#[from] GhGraphQLErrors), -} - -impl GhApiError { - /// Attach context to [`GhApiError`] - pub fn context(self, context: impl fmt::Display) -> Self { - Self::Context(Box::new(GhApiContextError { - context: context.to_compact_string(), - err: self, - })) - } -} - -// Only include fields we do care about - -#[derive(Eq, Deserialize, Debug)] -struct Artifact { - name: CompactString, -} - -// Manually implement PartialEq and Hash to ensure it will always produce the -// same hash as a str with the same content, and that the comparison will be -// the same to coparing a string. - -impl PartialEq for Artifact { - fn eq(&self, other: &Self) -> bool { - self.name.eq(&other.name) - } -} - -impl Hash for Artifact { - fn hash(&self, state: &mut H) - where - H: Hasher, - { - let s: &str = self.name.as_str(); - s.hash(state) - } -} - -// Implement Borrow so that we can use call -// `HashSet::contains::` - -impl Borrow for Artifact { - fn borrow(&self) -> &str { - &self.name - } -} - -#[derive(Debug, Default, Deserialize)] -pub(super) struct Artifacts { - assets: HashSet, -} - -impl Artifacts { - pub(super) fn contains(&self, artifact_name: &str) -> bool { - self.assets.contains(artifact_name) - } -} - -pub(super) enum FetchReleaseRet { - ReachedRateLimit { retry_after: Option }, - ReleaseNotFound, - Artifacts(Artifacts), - Unauthorized, -} - -fn check_for_status(status: StatusCode, headers: &HeaderMap) -> Option { - match status { - remote::StatusCode::FORBIDDEN - if headers - .get("x-ratelimit-remaining") - .map(|val| val == "0") - .unwrap_or(false) => - { - Some(FetchReleaseRet::ReachedRateLimit { - retry_after: headers.get("x-ratelimit-reset").and_then(|value| { - let secs = value.to_str().ok()?.parse().ok()?; - Some(Duration::from_secs(secs)) - }), - }) - } - - remote::StatusCode::UNAUTHORIZED => Some(FetchReleaseRet::Unauthorized), - remote::StatusCode::NOT_FOUND => Some(FetchReleaseRet::ReleaseNotFound), - - _ => None, - } -} - -async fn fetch_release_artifacts_restful_api( - client: &remote::Client, - GhRelease { owner, repo, tag }: &GhRelease, - auth_token: Option<&str>, -) -> Result { - let mut request_builder = client - .get(Url::parse(&format!( - "https://api.github.com/repos/{owner}/{repo}/releases/tags/{tag}", - owner = percent_encode_http_url_path(owner), - repo = percent_encode_http_url_path(repo), - tag = percent_encode_http_url_path(tag), - ))?) - .header("Accept", "application/vnd.github+json") - .header("X-GitHub-Api-Version", "2022-11-28"); - - if let Some(auth_token) = auth_token { - request_builder = request_builder.bearer_auth(&auth_token); - } - - let response = request_builder.send(false).await?; - - if let Some(ret) = check_for_status(response.status(), response.headers()) { - Ok(ret) - } else { - Ok(FetchReleaseRet::Artifacts(response.json().await?)) - } -} - -#[derive(Deserialize)] -enum GraphQLResponse { - #[serde(rename = "data")] - Data(GraphQLData), - - #[serde(rename = "errors")] - Errors(GhGraphQLErrors), -} - -#[derive(Debug, Deserialize)] -pub struct GhGraphQLErrors(Box<[GraphQLError]>); - -impl GhGraphQLErrors { - fn is_rate_limited(&self) -> bool { - self.0 - .iter() - .any(|error| matches!(error.error_type, GraphQLErrorType::RateLimited)) - } -} - -impl error::Error for GhGraphQLErrors {} - -impl fmt::Display for GhGraphQLErrors { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let last_error_index = self.0.len() - 1; - - for (i, error) in self.0.iter().enumerate() { - write!( - f, - "type: '{error_type}', msg: '{msg}'", - error_type = error.error_type, - msg = error.message, - )?; - - for location in error.locations.as_deref().into_iter().flatten() { - write!( - f, - ", occured on query line {line} col {col}", - line = location.line, - col = location.column - )?; - } - - for (k, v) in &error.others { - write!(f, ", {k}: {v}")?; - } - - if i < last_error_index { - f.write_str("\n")?; - } - } - - Ok(()) - } -} - -#[derive(Debug, Deserialize)] -struct GraphQLError { - message: CompactString, - locations: Option>, - - #[serde(rename = "type")] - error_type: GraphQLErrorType, - - #[serde(flatten, with = "tuple_vec_map")] - others: Vec<(CompactString, serde_json::Value)>, -} - -#[derive(Debug)] -enum GraphQLErrorType { - RateLimited, - Other(CompactString), -} - -impl fmt::Display for GraphQLErrorType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(match self { - GraphQLErrorType::RateLimited => "RATE_LIMITED", - GraphQLErrorType::Other(s) => s, - }) - } -} - -impl<'de> Deserialize<'de> for GraphQLErrorType { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let s = CompactString::deserialize(deserializer)?; - Ok(match &*s { - "RATE_LIMITED" => GraphQLErrorType::RateLimited, - _ => GraphQLErrorType::Other(s), - }) - } -} - -#[derive(Debug, Deserialize)] -struct GraphQLLocation { - line: u64, - column: u64, -} - -#[derive(Deserialize)] -struct GraphQLData { - repository: Option, -} - -#[derive(Deserialize)] -struct GraphQLRepo { - release: Option, -} - -#[derive(Deserialize)] -struct GraphQLRelease { - #[serde(rename = "releaseAssets")] - assets: GraphQLReleaseAssets, -} - -#[derive(Deserialize)] -struct GraphQLReleaseAssets { - nodes: Vec, - #[serde(rename = "pageInfo")] - page_info: GraphQLPageInfo, -} - -#[derive(Deserialize)] -struct GraphQLPageInfo { - #[serde(rename = "endCursor")] - end_cursor: Option, - #[serde(rename = "hasNextPage")] - has_next_page: bool, -} - -enum FilterCondition { - Init, - After(CompactString), -} - -impl fmt::Display for FilterCondition { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - // GitHub imposes a limit of 100 for the value passed to param "first" - FilterCondition::Init => f.write_str("first:100"), - FilterCondition::After(end_cursor) => write!(f, r#"first:100,after:"{end_cursor}""#), - } - } -} - -#[derive(Serialize)] -struct GraphQLQuery { - query: String, -} - -async fn fetch_release_artifacts_graphql_api( - client: &remote::Client, - GhRelease { owner, repo, tag }: &GhRelease, - auth_token: &str, -) -> Result { - static GRAPHQL_ENDPOINT: OnceLock = OnceLock::new(); - - let graphql_endpoint = GRAPHQL_ENDPOINT.get_or_init(|| { - Url::parse("https://api.github.com/graphql").expect("Literal provided must be a valid url") - }); - - let mut artifacts = Artifacts::default(); - let mut cond = FilterCondition::Init; - - loop { - let query = format!( - r#" -query {{ - repository(owner:"{owner}",name:"{repo}") {{ - release(tagName:"{tag}") {{ - releaseAssets({cond}) {{ - nodes {{ name }} - pageInfo {{ endCursor hasNextPage }} - }} - }} - }} -}}"# - ); - - let graphql_query = to_json_string(&GraphQLQuery { query }).map_err(remote::Error::from)?; - - debug!("Sending graphql query to https://api.github.com/graphql: '{graphql_query}'"); - - let request_builder = client - .post(graphql_endpoint.clone(), graphql_query) - .header("Accept", "application/vnd.github+json") - .bearer_auth(&auth_token); - - let response = request_builder.send(false).await?; - - if let Some(ret) = check_for_status(response.status(), response.headers()) { - return Ok(ret); - } - - let response: GraphQLResponse = response.json().await?; - - let data = match response { - GraphQLResponse::Data(data) => data, - GraphQLResponse::Errors(errors) if errors.is_rate_limited() => { - return Ok(FetchReleaseRet::ReachedRateLimit { retry_after: None }) - } - GraphQLResponse::Errors(errors) => return Err(errors.into()), - }; - - let assets = data - .repository - .and_then(|repository| repository.release) - .map(|release| release.assets); - - if let Some(assets) = assets { - artifacts.assets.extend(assets.nodes); - - match assets.page_info { - GraphQLPageInfo { - end_cursor: Some(end_cursor), - has_next_page: true, - } => { - cond = FilterCondition::After(end_cursor); - } - _ => break Ok(FetchReleaseRet::Artifacts(artifacts)), - } - } else { - break Ok(FetchReleaseRet::ReleaseNotFound); - } - } -} - -pub(super) async fn fetch_release_artifacts( - client: &remote::Client, - release: &GhRelease, - auth_token: Option<&str>, -) -> Result { - if let Some(auth_token) = auth_token { - let res = fetch_release_artifacts_graphql_api(client, release, auth_token) - .await - .map_err(|err| err.context("GraphQL API")); - - match res { - // Fallback to Restful API - Ok(FetchReleaseRet::Unauthorized) => (), - res => return res, - } - } - - fetch_release_artifacts_restful_api(client, release, auth_token) - .await - .map_err(|err| err.context("Restful API")) -} - -#[cfg(test)] -mod test { - use super::*; - use serde::de::value::{BorrowedStrDeserializer, Error}; - - macro_rules! assert_matches { - ($expression:expr, $pattern:pat $(if $guard:expr)? $(,)?) => { - match $expression { - $pattern $(if $guard)? => true, - expr => { - panic!( - "assertion failed: `{expr:?}` does not match `{}`", - stringify!($pattern $(if $guard)?) - ) - } - } - } - } - - #[test] - fn test_graph_ql_error_type() { - let deserialize = |input: &str| { - GraphQLErrorType::deserialize(BorrowedStrDeserializer::<'_, Error>::new(input)).unwrap() - }; - - assert_matches!(deserialize("RATE_LIMITED"), GraphQLErrorType::RateLimited); - assert_matches!( - deserialize("rATE_LIMITED"), - GraphQLErrorType::Other(val) if val == CompactString::new("rATE_LIMITED") - ); - } -} diff --git a/crates/binstalk-downloader/src/lib.rs b/crates/binstalk-downloader/src/lib.rs index c4a23c84..1e4cfad7 100644 --- a/crates/binstalk-downloader/src/lib.rs +++ b/crates/binstalk-downloader/src/lib.rs @@ -1,15 +1,6 @@ #![cfg_attr(docsrs, feature(doc_auto_cfg))] pub use bytes; - pub mod download; - -/// Github API client. -/// Currently only support github.com and does not support other enterprise -/// github. -#[cfg(feature = "gh-api-client")] -pub mod gh_api_client; - pub mod remote; - mod utils; diff --git a/crates/binstalk-fetchers/Cargo.toml b/crates/binstalk-fetchers/Cargo.toml index 5f48a7b2..67816eae 100644 --- a/crates/binstalk-fetchers/Cargo.toml +++ b/crates/binstalk-fetchers/Cargo.toml @@ -12,7 +12,8 @@ license = "GPL-3.0-only" [dependencies] async-trait = "0.1.68" -binstalk-downloader = { version = "0.10.3", path = "../binstalk-downloader", default-features = false, features = ["gh-api-client"] } +binstalk-downloader = { version = "0.10.3", path = "../binstalk-downloader", default-features = false } +binstalk-git-repo-api = { version = "0.0.0", path = "../binstalk-git-repo-api" } binstalk-types = { version = "0.7.0", path = "../binstalk-types" } bytes = "1.4.0" compact_str = { version = "0.7.0" } @@ -25,7 +26,10 @@ minisign-verify = "0.2.1" once_cell = "1.18.0" strum = "0.26.1" thiserror = "1.0.61" -tokio = { version = "1.35.0", features = ["rt", "sync"], default-features = false } +tokio = { version = "1.35.0", features = [ + "rt", + "sync", +], default-features = false } tracing = "0.1.39" url = "2.3.1" diff --git a/crates/binstalk-fetchers/src/common.rs b/crates/binstalk-fetchers/src/common.rs index ae789929..b3a491ae 100644 --- a/crates/binstalk-fetchers/src/common.rs +++ b/crates/binstalk-fetchers/src/common.rs @@ -3,12 +3,12 @@ use std::sync::{ Once, }; -use binstalk_downloader::gh_api_client::{GhReleaseArtifact, HasReleaseArtifact}; pub(super) use binstalk_downloader::{ download::{Download, ExtractedFiles}, - gh_api_client::GhApiClient, remote::{Client, Url}, }; +pub(super) use binstalk_git_repo_api::gh_api_client::GhApiClient; +use binstalk_git_repo_api::gh_api_client::{GhApiError, GhReleaseArtifact, GhReleaseArtifactUrl}; pub(super) use binstalk_types::cargo_toml_binstall::{PkgFmt, PkgMeta}; pub(super) use compact_str::CompactString; pub(super) use tokio::task::JoinHandle; @@ -16,6 +16,39 @@ pub(super) use tracing::{debug, instrument, warn}; use crate::FetchError; +static WARN_RATE_LIMIT_ONCE: Once = Once::new(); +static WARN_UNAUTHORIZED_ONCE: Once = Once::new(); + +pub(super) async fn get_gh_release_artifact_url( + gh_api_client: GhApiClient, + artifact: GhReleaseArtifact, +) -> Result, GhApiError> { + debug!("Using GitHub API to check for existence of artifact, which will also cache the API response"); + + // The future returned has the same size as a pointer + match gh_api_client.has_release_artifact(artifact).await { + Ok(ret) => Ok(ret), + Err(GhApiError::NotFound) => Ok(None), + + Err(GhApiError::RateLimit { retry_after }) => { + WARN_RATE_LIMIT_ONCE.call_once(|| { + warn!("Your GitHub API token (if any) has reached its rate limit and cannot be used again until {retry_after:?}, so we will fallback to HEAD/GET on the url."); + warn!("If you did not supply a github token, consider doing so: GitHub limits unauthorized users to 60 requests per hour per origin IP address."); + }); + Err(GhApiError::RateLimit { retry_after }) + } + Err(GhApiError::Unauthorized) => { + WARN_UNAUTHORIZED_ONCE.call_once(|| { + warn!("GitHub API somehow requires a token for the API access, so we will fallback to HEAD/GET on the url."); + warn!("Please consider supplying a token to cargo-binstall to speedup resolution."); + }); + Err(GhApiError::Unauthorized) + } + + Err(err) => Err(err), + } +} + /// This function returns a future where its size should be at most size of /// 2-4 pointers. pub(super) async fn does_url_exist( @@ -24,32 +57,17 @@ pub(super) async fn does_url_exist( url: &Url, ) -> Result { static GH_API_CLIENT_FAILED: AtomicBool = AtomicBool::new(false); - static WARN_RATE_LIMIT_ONCE: Once = Once::new(); - static WARN_UNAUTHORIZED_ONCE: Once = Once::new(); debug!("Checking for package at: '{url}'"); if !GH_API_CLIENT_FAILED.load(Relaxed) { if let Some(artifact) = GhReleaseArtifact::try_extract_from_url(url) { - debug!("Using GitHub API to check for existence of artifact, which will also cache the API response"); + match get_gh_release_artifact_url(gh_api_client, artifact).await { + Ok(ret) => return Ok(ret.is_some()), - // The future returned has the same size as a pointer - match gh_api_client.has_release_artifact(artifact).await? { - HasReleaseArtifact::Yes => return Ok(true), - HasReleaseArtifact::No | HasReleaseArtifact::NoSuchRelease => return Ok(false), + Err(GhApiError::RateLimit { .. }) | Err(GhApiError::Unauthorized) => {} - HasReleaseArtifact::RateLimit { retry_after } => { - WARN_RATE_LIMIT_ONCE.call_once(|| { - warn!("Your GitHub API token (if any) has reached its rate limit and cannot be used again until {retry_after:?}, so we will fallback to HEAD/GET on the url."); - warn!("If you did not supply a github token, consider doing so: GitHub limits unauthorized users to 60 requests per hour per origin IP address."); - }); - } - HasReleaseArtifact::Unauthorized => { - WARN_UNAUTHORIZED_ONCE.call_once(|| { - warn!("GitHub API somehow requires a token for the API access, so we will fallback to HEAD/GET on the url."); - warn!("Please consider supplying a token to cargo-binstall to speedup resolution."); - }); - } + Err(err) => return Err(err.into()), } GH_API_CLIENT_FAILED.store(true, Relaxed); diff --git a/crates/binstalk-fetchers/src/gh_crate_meta.rs b/crates/binstalk-fetchers/src/gh_crate_meta.rs index 68c66f00..d2d85f50 100644 --- a/crates/binstalk-fetchers/src/gh_crate_meta.rs +++ b/crates/binstalk-fetchers/src/gh_crate_meta.rs @@ -1,16 +1,18 @@ use std::{borrow::Cow, fmt, iter, path::Path, sync::Arc}; +use binstalk_git_repo_api::gh_api_client::{GhApiError, GhReleaseArtifact, GhReleaseArtifactUrl}; use compact_str::{CompactString, ToCompactString}; use either::Either; use leon::Template; use once_cell::sync::OnceCell; use strum::IntoEnumIterator; +use tokio::time::sleep; use tracing::{debug, info, trace, warn}; use url::Url; use crate::{ common::*, futures_resolver::FuturesResolver, Data, FetchError, InvalidPkgFmtError, RepoInfo, - SignaturePolicy, SignatureVerifier, TargetDataErased, + SignaturePolicy, SignatureVerifier, TargetDataErased, DEFAULT_GH_API_RETRY_DURATION, }; pub(crate) mod hosting; @@ -31,6 +33,8 @@ struct Resolved { archive_suffix: Option, repo: Option, subcrate: Option, + gh_release_artifact_url: Option, + is_repo_private: bool, } impl GhCrateMeta { @@ -41,6 +45,7 @@ impl GhCrateMeta { pkg_url: &Template<'_>, repo: Option<&str>, subcrate: Option<&str>, + is_repo_private: bool, ) { let render_url = |ext| { let ctx = Context::from_data_with_repo( @@ -82,16 +87,45 @@ impl GhCrateMeta { let repo = repo.map(ToString::to_string); let subcrate = subcrate.map(ToString::to_string); let archive_suffix = ext.map(ToString::to_string); + let gh_release_artifact = GhReleaseArtifact::try_extract_from_url(&url); + async move { - Ok(does_url_exist(client, gh_api_client, &url) + debug!("Checking for package at: '{url}'"); + + let mut resolved = Resolved { + url: url.clone(), + pkg_fmt, + repo, + subcrate, + archive_suffix, + is_repo_private, + gh_release_artifact_url: None, + }; + + if let Some(artifact) = gh_release_artifact { + loop { + match get_gh_release_artifact_url(gh_api_client.clone(), artifact.clone()) + .await + { + Ok(Some(artifact_url)) => { + resolved.gh_release_artifact_url = Some(artifact_url); + return Ok(Some(resolved)); + } + Ok(None) => return Ok(None), + + Err(GhApiError::RateLimit { retry_after }) => { + sleep(retry_after.unwrap_or(DEFAULT_GH_API_RETRY_DURATION)).await; + } + Err(GhApiError::Unauthorized) if !is_repo_private => break, + + Err(err) => return Err(err.into()), + } + } + } + + Ok(Box::pin(client.remote_gettable(url)) .await? - .then_some(Resolved { - url, - pkg_fmt, - repo, - subcrate, - archive_suffix, - })) + .then_some(resolved)) } })); } @@ -118,10 +152,11 @@ impl super::Fetcher for GhCrateMeta { fn find(self: Arc) -> JoinHandle> { tokio::spawn(async move { - let info = self.data.get_repo_info(&self.client).await?.as_ref(); + let info = self.data.get_repo_info(&self.gh_api_client).await?; let repo = info.map(|info| &info.repo); let subcrate = info.and_then(|info| info.subcrate.as_deref()); + let is_repo_private = info.map(|info| info.is_private).unwrap_or_default(); let mut pkg_fmt = self.target_data.meta.pkg_fmt; @@ -230,13 +265,22 @@ impl super::Fetcher for GhCrateMeta { // basically cartesian product. // | for pkg_fmt in pkg_fmts.clone() { - this.launch_baseline_find_tasks(&resolver, pkg_fmt, &pkg_url, repo, subcrate); + this.launch_baseline_find_tasks( + &resolver, + pkg_fmt, + &pkg_url, + repo, + subcrate, + is_repo_private, + ); } } if let Some(resolved) = resolver.resolve().await? { debug!(?resolved, "Winning URL found!"); - self.resolution.set(resolved).unwrap(); // find() is called first + self.resolution + .set(resolved) + .expect("find() should be only called once"); Ok(true) } else { Ok(false) @@ -245,7 +289,10 @@ impl super::Fetcher for GhCrateMeta { } async fn fetch_and_extract(&self, dst: &Path) -> Result { - let resolved = self.resolution.get().unwrap(); // find() is called first + let resolved = self + .resolution + .get() + .expect("find() should be called once before fetch_and_extract()"); trace!(?resolved, "preparing to fetch"); let verifier = match (self.signature_policy, &self.target_data.meta.signing) { @@ -290,11 +337,18 @@ impl super::Fetcher for GhCrateMeta { "Downloading package", ); let mut data_verifier = verifier.data_verifier()?; - let files = Download::new_with_data_verifier( - self.client.clone(), - resolved.url.clone(), - data_verifier.as_mut(), - ) + let files = match resolved.gh_release_artifact_url.as_ref() { + Some(artifact_url) if resolved.is_repo_private => self + .gh_api_client + .download_artifact(artifact_url.clone()) + .await? + .with_data_verifier(data_verifier.as_mut()), + _ => Download::new_with_data_verifier( + self.client.clone(), + resolved.url.clone(), + data_verifier.as_mut(), + ), + } .and_extract(resolved.pkg_fmt, dst) .await?; trace!("validating signature (if any)"); diff --git a/crates/binstalk-fetchers/src/lib.rs b/crates/binstalk-fetchers/src/lib.rs index a742e87e..688bfbbb 100644 --- a/crates/binstalk-fetchers/src/lib.rs +++ b/crates/binstalk-fetchers/src/lib.rs @@ -1,13 +1,12 @@ #![cfg_attr(docsrs, feature(doc_auto_cfg))] -use std::{path::Path, sync::Arc}; +use std::{path::Path, sync::Arc, time::Duration}; -use binstalk_downloader::{ - download::DownloadError, gh_api_client::GhApiError, remote::Error as RemoteError, -}; +use binstalk_downloader::{download::DownloadError, remote::Error as RemoteError}; +use binstalk_git_repo_api::gh_api_client::{GhApiError, GhRepo}; use binstalk_types::cargo_toml_binstall::SigningAlgorithm; use thiserror::Error as ThisError; -use tokio::sync::OnceCell; +use tokio::{sync::OnceCell, time::sleep}; pub use url::ParseError as UrlParseError; mod gh_crate_meta; @@ -28,6 +27,8 @@ mod futures_resolver; use gh_crate_meta::hosting::RepositoryHost; +static DEFAULT_GH_API_RETRY_DURATION: Duration = Duration::from_secs(1); + #[derive(Debug, ThisError)] #[error("Invalid pkg-url {pkg_url} for {crate_name}@{version} on {target}: {reason}")] pub struct InvalidPkgFmtError { @@ -145,6 +146,7 @@ struct RepoInfo { repo: Url, repository_host: RepositoryHost, subcrate: Option, + is_private: bool, } /// What to do about package signatures @@ -180,29 +182,61 @@ impl Data { } #[instrument(level = "debug")] - async fn get_repo_info(&self, client: &Client) -> Result<&Option, FetchError> { + async fn get_repo_info(&self, client: &GhApiClient) -> Result, FetchError> { self.repo_info .get_or_try_init(move || { Box::pin(async move { - if let Some(repo) = self.repo.as_deref() { - let mut repo = client.get_redirected_final_url(Url::parse(repo)?).await?; - let repository_host = RepositoryHost::guess_git_hosting_services(&repo); + let Some(repo) = self.repo.as_deref() else { + return Ok(None); + }; - let repo_info = RepoInfo { - subcrate: RepoInfo::detect_subcrate(&mut repo, repository_host), - repo, - repository_host, - }; + let mut repo = Url::parse(repo)?; + let mut repository_host = RepositoryHost::guess_git_hosting_services(&repo); - debug!("Resolved repo_info = {repo_info:#?}"); - - Ok(Some(repo_info)) - } else { - Ok(None) + if repository_host == RepositoryHost::Unknown { + repo = client + .remote_client() + .get_redirected_final_url(repo) + .await?; + repository_host = RepositoryHost::guess_git_hosting_services(&repo); } + + let subcrate = RepoInfo::detect_subcrate(&mut repo, repository_host); + + let mut is_private = false; + if repository_host == RepositoryHost::GitHub && client.has_gh_token() { + if let Some(gh_repo) = GhRepo::try_extract_from_url(&repo) { + loop { + match client.get_repo_info(&gh_repo).await { + Ok(Some(gh_repo_info)) => { + is_private = gh_repo_info.is_private(); + break; + } + Ok(None) => return Err(GhApiError::NotFound.into()), + Err(GhApiError::RateLimit { retry_after }) => { + sleep(retry_after.unwrap_or(DEFAULT_GH_API_RETRY_DURATION)) + .await + } + Err(err) => return Err(err.into()), + } + } + } + } + + let repo_info = RepoInfo { + subcrate, + repo, + repository_host, + is_private, + }; + + debug!("Resolved repo_info = {repo_info:#?}"); + + Ok(Some(repo_info)) }) }) .await + .map(Option::as_ref) } } diff --git a/crates/binstalk-git-repo-api/Cargo.toml b/crates/binstalk-git-repo-api/Cargo.toml new file mode 100644 index 00000000..32a7279e --- /dev/null +++ b/crates/binstalk-git-repo-api/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "binstalk-git-repo-api" +description = "The binstall toolkit for accessing API for git repository" +repository = "https://github.com/cargo-bins/cargo-binstall" +documentation = "https://docs.rs/binstalk-git-repo-api" +version = "0.0.0" +rust-version = "1.70.0" +authors = ["Jiahao XU "] +edition = "2021" +license = "Apache-2.0 OR MIT" + +[dependencies] +binstalk-downloader = { version = "0.10.3", path = "../binstalk-downloader", default-features = false, features = [ + "json", +] } +compact_str = "0.7.0" +percent-encoding = "2.2.0" +serde = { version = "1.0.163", features = ["derive"] } +serde-tuple-vec-map = "1.0.1" +serde_json = { version = "1.0.107" } +thiserror = "1.0.52" +tokio = { version = "1.35.0", features = ["sync"], default-features = false } +tracing = "0.1.39" +url = "2.3.1" + +[dev-dependencies] +binstalk-downloader = { version = "0.10.3", path = "../binstalk-downloader" } +tracing-subscriber = "0.3" diff --git a/crates/binstalk-git-repo-api/LICENSE-APACHE b/crates/binstalk-git-repo-api/LICENSE-APACHE new file mode 100644 index 00000000..1b5ec8b7 --- /dev/null +++ b/crates/binstalk-git-repo-api/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/crates/binstalk-git-repo-api/LICENSE-MIT b/crates/binstalk-git-repo-api/LICENSE-MIT new file mode 100644 index 00000000..31aa7938 --- /dev/null +++ b/crates/binstalk-git-repo-api/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/crates/binstalk-git-repo-api/src/gh_api_client.rs b/crates/binstalk-git-repo-api/src/gh_api_client.rs new file mode 100644 index 00000000..d7238dd6 --- /dev/null +++ b/crates/binstalk-git-repo-api/src/gh_api_client.rs @@ -0,0 +1,703 @@ +use std::{ + collections::HashMap, + future::Future, + ops::Deref, + sync::{ + atomic::{AtomicBool, Ordering::Relaxed}, + Arc, Mutex, RwLock, + }, + time::{Duration, Instant}, +}; + +use binstalk_downloader::{download::Download, remote}; +use compact_str::{format_compact, CompactString, ToCompactString}; +use tokio::sync::OnceCell; +use tracing::instrument; +use url::Url; + +mod common; +mod error; +mod release_artifacts; +mod repo_info; + +use common::{check_http_status_and_header, percent_decode_http_url_path}; +pub use error::{GhApiContextError, GhApiError, GhGraphQLErrors}; +pub use repo_info::RepoInfo; + +/// default retry duration if x-ratelimit-reset is not found in response header +const DEFAULT_RETRY_DURATION: Duration = Duration::from_secs(10 * 60); + +#[derive(Clone, Eq, PartialEq, Hash, Debug)] +pub struct GhRepo { + pub owner: CompactString, + pub repo: CompactString, +} +impl GhRepo { + pub fn repo_url(&self) -> Result { + Url::parse(&format_compact!( + "https://github.com/{}/{}", + self.owner, + self.repo + )) + } + + pub fn try_extract_from_url(url: &Url) -> Option { + if url.domain() != Some("github.com") { + return None; + } + + let mut path_segments = url.path_segments()?; + + Some(Self { + owner: path_segments.next()?.to_compact_string(), + repo: path_segments.next()?.to_compact_string(), + }) + } +} + +/// The keys required to identify a github release. +#[derive(Clone, Eq, PartialEq, Hash, Debug)] +pub struct GhRelease { + pub repo: GhRepo, + pub tag: CompactString, +} + +/// The Github Release and one of its artifact. +#[derive(Clone, Eq, PartialEq, Hash, Debug)] +pub struct GhReleaseArtifact { + pub release: GhRelease, + pub artifact_name: CompactString, +} + +impl GhReleaseArtifact { + /// Create [`GhReleaseArtifact`] from url. + pub fn try_extract_from_url(url: &remote::Url) -> Option { + if url.domain() != Some("github.com") { + return None; + } + + let mut path_segments = url.path_segments()?; + + let owner = path_segments.next()?; + let repo = path_segments.next()?; + + if (path_segments.next()?, path_segments.next()?) != ("releases", "download") { + return None; + } + + let tag = path_segments.next()?; + let artifact_name = path_segments.next()?; + + (path_segments.next().is_none() && url.fragment().is_none() && url.query().is_none()).then( + || Self { + release: GhRelease { + repo: GhRepo { + owner: percent_decode_http_url_path(owner), + repo: percent_decode_http_url_path(repo), + }, + tag: percent_decode_http_url_path(tag), + }, + artifact_name: percent_decode_http_url_path(artifact_name), + }, + ) + } +} + +#[derive(Debug)] +struct Map(RwLock>>); + +impl Default for Map { + fn default() -> Self { + Self(Default::default()) + } +} + +impl Map +where + K: Eq + std::hash::Hash, + V: Default, +{ + fn get(&self, k: K) -> Arc { + let optional_value = self.0.read().unwrap().deref().get(&k).cloned(); + optional_value.unwrap_or_else(|| Arc::clone(self.0.write().unwrap().entry(k).or_default())) + } +} + +#[derive(Debug)] +struct Inner { + client: remote::Client, + release_artifacts: Map>>, + retry_after: Mutex>, + + auth_token: Option, + is_auth_token_valid: AtomicBool, +} + +/// Github API client for querying whether a release artifact exitsts. +/// Can only handle github.com for now. +#[derive(Clone, Debug)] +pub struct GhApiClient(Arc); + +impl GhApiClient { + pub fn new(client: remote::Client, auth_token: Option) -> Self { + Self(Arc::new(Inner { + client, + release_artifacts: Default::default(), + retry_after: Default::default(), + + auth_token, + is_auth_token_valid: AtomicBool::new(true), + })) + } + + pub fn remote_client(&self) -> &remote::Client { + &self.0.client + } +} + +impl GhApiClient { + fn check_retry_after(&self) -> Result<(), GhApiError> { + let mut guard = self.0.retry_after.lock().unwrap(); + + if let Some(retry_after) = *guard { + if retry_after.elapsed().is_zero() { + return Err(GhApiError::RateLimit { + retry_after: Some(retry_after - Instant::now()), + }); + } else { + // Instant retry_after is already reached. + *guard = None; + } + } + + Ok(()) + } + + fn get_auth_token(&self) -> Option<&str> { + if self.0.is_auth_token_valid.load(Relaxed) { + self.0.auth_token.as_deref() + } else { + None + } + } + + pub fn has_gh_token(&self) -> bool { + self.get_auth_token().is_some() + } + + async fn do_fetch( + &self, + graphql_func: GraphQLFn, + restful_func: RestfulFn, + data: &T, + ) -> Result + where + GraphQLFn: Fn(&remote::Client, &T, &str) -> GraphQLFut, + RestfulFn: Fn(&remote::Client, &T) -> RestfulFut, + GraphQLFut: Future> + Send + Sync + 'static, + RestfulFut: Future> + Send + Sync + 'static, + { + self.check_retry_after()?; + + if let Some(auth_token) = self.get_auth_token() { + match graphql_func(&self.0.client, data, auth_token).await { + Err(GhApiError::Unauthorized) => { + self.0.is_auth_token_valid.store(false, Relaxed); + } + res => return res.map_err(|err| err.context("GraphQL API")), + } + } + + restful_func(&self.0.client, data) + .await + .map_err(|err| err.context("Restful API")) + } + + #[instrument(level = "debug", skip(self), ret)] + pub async fn get_repo_info(&self, repo: &GhRepo) -> Result, GhApiError> { + match self + .do_fetch( + repo_info::fetch_repo_info_graphql_api, + repo_info::fetch_repo_info_restful_api, + repo, + ) + .await + { + Ok(repo_info) => Ok(repo_info), + Err(GhApiError::NotFound) => Ok(None), + Err(err) => Err(err), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct GhReleaseArtifactUrl(Url); + +impl GhApiClient { + /// Return `Ok(Some(api_artifact_url))` if exists. + /// + /// The returned future is guaranteed to be pointer size. + #[instrument(level = "debug", skip(self), ret)] + pub async fn has_release_artifact( + &self, + GhReleaseArtifact { + release, + artifact_name, + }: GhReleaseArtifact, + ) -> Result, GhApiError> { + let once_cell = self.0.release_artifacts.get(release.clone()); + let res = once_cell + .get_or_try_init(|| { + Box::pin(async { + match self + .do_fetch( + release_artifacts::fetch_release_artifacts_graphql_api, + release_artifacts::fetch_release_artifacts_restful_api, + &release, + ) + .await + { + Ok(artifacts) => Ok(Some(artifacts)), + Err(GhApiError::NotFound) => Ok(None), + Err(err) => Err(err), + } + }) + }) + .await; + + match res { + Ok(Some(artifacts)) => Ok(artifacts + .get_artifact_url(&artifact_name) + .map(GhReleaseArtifactUrl)), + Ok(None) => Ok(None), + Err(GhApiError::RateLimit { retry_after }) => { + *self.0.retry_after.lock().unwrap() = + Some(Instant::now() + retry_after.unwrap_or(DEFAULT_RETRY_DURATION)); + + Err(GhApiError::RateLimit { retry_after }) + } + Err(err) => Err(err), + } + } + + pub async fn download_artifact( + &self, + artifact_url: GhReleaseArtifactUrl, + ) -> Result, GhApiError> { + self.check_retry_after()?; + + let Some(auth_token) = self.get_auth_token() else { + return Err(GhApiError::Unauthorized); + }; + + let response = self + .0 + .client + .get(artifact_url.0) + .header("Accept", "application/octet-stream") + .bearer_auth(&auth_token) + .send(false) + .await?; + + match check_http_status_and_header(&response) { + Err(GhApiError::Unauthorized) => { + self.0.is_auth_token_valid.store(false, Relaxed); + } + res => res?, + } + + Ok(Download::from_response(response)) + } +} + +#[cfg(test)] +mod test { + use super::*; + use compact_str::{CompactString, ToCompactString}; + use std::{env, num::NonZeroU16, time::Duration}; + use tokio::time::sleep; + use tracing::subscriber::set_global_default; + use tracing_subscriber::{filter::LevelFilter, fmt::fmt}; + + static DEFAULT_RETRY_AFTER: Duration = Duration::from_secs(1); + + mod cargo_binstall_v0_20_1 { + use super::{CompactString, GhRelease, GhRepo}; + + pub(super) const RELEASE: GhRelease = GhRelease { + repo: GhRepo { + owner: CompactString::new_inline("cargo-bins"), + repo: CompactString::new_inline("cargo-binstall"), + }, + tag: CompactString::new_inline("v0.20.1"), + }; + + pub(super) const ARTIFACTS: &[&str] = &[ + "cargo-binstall-aarch64-apple-darwin.full.zip", + "cargo-binstall-aarch64-apple-darwin.zip", + "cargo-binstall-aarch64-pc-windows-msvc.full.zip", + "cargo-binstall-aarch64-pc-windows-msvc.zip", + "cargo-binstall-aarch64-unknown-linux-gnu.full.tgz", + "cargo-binstall-aarch64-unknown-linux-gnu.tgz", + "cargo-binstall-aarch64-unknown-linux-musl.full.tgz", + "cargo-binstall-aarch64-unknown-linux-musl.tgz", + "cargo-binstall-armv7-unknown-linux-gnueabihf.full.tgz", + "cargo-binstall-armv7-unknown-linux-gnueabihf.tgz", + "cargo-binstall-armv7-unknown-linux-musleabihf.full.tgz", + "cargo-binstall-armv7-unknown-linux-musleabihf.tgz", + "cargo-binstall-universal-apple-darwin.full.zip", + "cargo-binstall-universal-apple-darwin.zip", + "cargo-binstall-x86_64-apple-darwin.full.zip", + "cargo-binstall-x86_64-apple-darwin.zip", + "cargo-binstall-x86_64-pc-windows-msvc.full.zip", + "cargo-binstall-x86_64-pc-windows-msvc.zip", + "cargo-binstall-x86_64-unknown-linux-gnu.full.tgz", + "cargo-binstall-x86_64-unknown-linux-gnu.tgz", + "cargo-binstall-x86_64-unknown-linux-musl.full.tgz", + "cargo-binstall-x86_64-unknown-linux-musl.tgz", + ]; + } + + mod cargo_audit_v_0_17_6 { + use super::*; + + pub(super) const RELEASE: GhRelease = GhRelease { + repo: GhRepo { + owner: CompactString::new_inline("rustsec"), + repo: CompactString::new_inline("rustsec"), + }, + tag: CompactString::new_inline("cargo-audit/v0.17.6"), + }; + + pub(super) const ARTIFACTS: &[&str] = &[ + "cargo-audit-aarch64-unknown-linux-gnu-v0.17.6.tgz", + "cargo-audit-armv7-unknown-linux-gnueabihf-v0.17.6.tgz", + "cargo-audit-x86_64-apple-darwin-v0.17.6.tgz", + "cargo-audit-x86_64-pc-windows-msvc-v0.17.6.zip", + "cargo-audit-x86_64-unknown-linux-gnu-v0.17.6.tgz", + "cargo-audit-x86_64-unknown-linux-musl-v0.17.6.tgz", + ]; + + #[test] + fn extract_with_escaped_characters() { + let release_artifact = try_extract_artifact_from_str( +"https://github.com/rustsec/rustsec/releases/download/cargo-audit%2Fv0.17.6/cargo-audit-aarch64-unknown-linux-gnu-v0.17.6.tgz" + ).unwrap(); + + assert_eq!( + release_artifact, + GhReleaseArtifact { + release: RELEASE, + artifact_name: CompactString::from( + "cargo-audit-aarch64-unknown-linux-gnu-v0.17.6.tgz", + ) + } + ); + } + } + + #[test] + fn gh_repo_extract_from_and_to_url() { + [ + "https://github.com/cargo-bins/cargo-binstall", + "https://github.com/rustsec/rustsec", + ] + .into_iter() + .for_each(|url| { + let url = Url::parse(&url).unwrap(); + assert_eq!( + GhRepo::try_extract_from_url(&url) + .unwrap() + .repo_url() + .unwrap(), + url + ); + }) + } + + fn try_extract_artifact_from_str(s: &str) -> Option { + GhReleaseArtifact::try_extract_from_url(&url::Url::parse(s).unwrap()) + } + + fn assert_extract_gh_release_artifacts_failures(urls: &[&str]) { + for url in urls { + assert_eq!(try_extract_artifact_from_str(url), None); + } + } + + #[test] + fn extract_gh_release_artifacts_failure() { + use cargo_binstall_v0_20_1::*; + + let GhRelease { + repo: GhRepo { owner, repo }, + tag, + } = RELEASE; + + assert_extract_gh_release_artifacts_failures(&[ + "https://examle.com", + "https://github.com", + &format!("https://github.com/{owner}"), + &format!("https://github.com/{owner}/{repo}"), + &format!("https://github.com/{owner}/{repo}/123e"), + &format!("https://github.com/{owner}/{repo}/releases/21343"), + &format!("https://github.com/{owner}/{repo}/releases/download"), + &format!("https://github.com/{owner}/{repo}/releases/download/{tag}"), + &format!("https://github.com/{owner}/{repo}/releases/download/{tag}/a/23"), + &format!("https://github.com/{owner}/{repo}/releases/download/{tag}/a#a=12"), + &format!("https://github.com/{owner}/{repo}/releases/download/{tag}/a?page=3"), + ]); + } + + #[test] + fn extract_gh_release_artifacts_success() { + use cargo_binstall_v0_20_1::*; + + let GhRelease { + repo: GhRepo { owner, repo }, + tag, + } = RELEASE; + + for artifact in ARTIFACTS { + let GhReleaseArtifact { + release, + artifact_name, + } = try_extract_artifact_from_str(&format!( + "https://github.com/{owner}/{repo}/releases/download/{tag}/{artifact}" + )) + .unwrap(); + + assert_eq!(release, RELEASE); + assert_eq!(artifact_name, artifact); + } + } + + fn init_logger() { + // Disable time, target, file, line_num, thread name/ids to make the + // output more readable + let subscriber = fmt() + .without_time() + .with_target(false) + .with_file(false) + .with_line_number(false) + .with_thread_names(false) + .with_thread_ids(false) + .with_test_writer() + .with_max_level(LevelFilter::DEBUG) + .finish(); + + // Setup global subscriber + let _ = set_global_default(subscriber); + } + + fn create_remote_client() -> remote::Client { + remote::Client::new( + concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")), + None, + NonZeroU16::new(10).unwrap(), + 1.try_into().unwrap(), + [], + ) + .unwrap() + } + + /// Mark this as an async fn so that you won't accidentally use it in + /// sync context. + fn create_client() -> Vec { + let client = create_remote_client(); + + let mut gh_clients = vec![GhApiClient::new(client.clone(), None)]; + + if let Ok(token) = env::var("GITHUB_TOKEN") { + gh_clients.push(GhApiClient::new(client, Some(token.into()))); + } + + gh_clients + } + + #[tokio::test] + async fn test_get_repo_info() { + const PUBLIC_REPOS: [GhRepo; 1] = [GhRepo { + owner: CompactString::new_inline("cargo-bins"), + repo: CompactString::new_inline("cargo-binstall"), + }]; + const PRIVATE_REPOS: [GhRepo; 1] = [GhRepo { + owner: CompactString::new_inline("cargo-bins"), + repo: CompactString::new_inline("private-repo-for-testing"), + }]; + const NON_EXISTENT_REPOS: [GhRepo; 1] = [GhRepo { + owner: CompactString::new_inline("cargo-bins"), + repo: CompactString::new_inline("ttt"), + }]; + + init_logger(); + + let mut tests: Vec<(_, _)> = Vec::new(); + + for client in create_client() { + for repo in PUBLIC_REPOS { + let client = client.clone(); + + tests.push(( + Some(RepoInfo::new(repo.clone(), false)), + tokio::spawn(async move { client.get_repo_info(&repo).await }), + )); + } + + for repo in NON_EXISTENT_REPOS { + let client = client.clone(); + + tests.push(( + None, + tokio::spawn(async move { client.get_repo_info(&repo).await }), + )); + } + + if client.has_gh_token() { + for repo in PRIVATE_REPOS { + let client = client.clone(); + + tests.push(( + Some(RepoInfo::new(repo.clone(), true)), + tokio::spawn(async move { + loop { + match client.get_repo_info(&repo).await { + Err(GhApiError::RateLimit { retry_after }) => { + sleep(retry_after.unwrap_or(DEFAULT_RETRY_AFTER)).await + } + res => break res, + } + } + }), + )); + } + } + } + + for (expected, task) in tests { + assert_eq!(task.await.unwrap().unwrap(), expected); + } + } + + #[tokio::test] + async fn test_has_release_artifact_and_download_artifacts() { + const RELEASES: [(GhRelease, &[&str]); 2] = [ + ( + cargo_binstall_v0_20_1::RELEASE, + cargo_binstall_v0_20_1::ARTIFACTS, + ), + ( + cargo_audit_v_0_17_6::RELEASE, + cargo_audit_v_0_17_6::ARTIFACTS, + ), + ]; + const NON_EXISTENT_RELEASES: [GhRelease; 1] = [GhRelease { + repo: GhRepo { + owner: CompactString::new_inline("cargo-bins"), + repo: CompactString::new_inline("cargo-binstall"), + }, + // We are currently at v0.20.1 and we would never release + // anything older than v0.20.1 + tag: CompactString::new_inline("v0.18.2"), + }]; + + init_logger(); + + let mut tasks = Vec::new(); + + for client in create_client() { + for (release, artifacts) in RELEASES { + for artifact_name in artifacts { + let client = client.clone(); + let release = release.clone(); + tasks.push(tokio::spawn(async move { + let artifact = GhReleaseArtifact { + release, + artifact_name: artifact_name.to_compact_string(), + }; + + let browser_download_task = client.get_auth_token().map(|_| { + tokio::spawn( + Download::new( + client.remote_client().clone(), + Url::parse(&format!( + "https://github.com/{}/{}/releases/download/{}/{}", + artifact.release.repo.owner, + artifact.release.repo.repo, + artifact.release.tag, + artifact.artifact_name, + )) + .unwrap(), + ) + .into_bytes(), + ) + }); + + let artifact_url = loop { + match client.has_release_artifact(artifact.clone()).await { + Err(GhApiError::RateLimit { retry_after }) => { + sleep(retry_after.unwrap_or(DEFAULT_RETRY_AFTER)).await + } + res => break res.unwrap().unwrap(), + } + }; + + if let Some(browser_download_task) = browser_download_task { + let artifact_download_data = loop { + match client.download_artifact(artifact_url.clone()).await { + Err(GhApiError::RateLimit { retry_after }) => { + sleep(retry_after.unwrap_or(DEFAULT_RETRY_AFTER)).await + } + res => break res.unwrap(), + } + } + .into_bytes() + .await + .unwrap(); + + let browser_download_data = + browser_download_task.await.unwrap().unwrap(); + + assert_eq!(artifact_download_data, browser_download_data); + } + })); + } + + let client = client.clone(); + tasks.push(tokio::spawn(async move { + assert_eq!( + client + .has_release_artifact(GhReleaseArtifact { + release, + artifact_name: "123z".to_compact_string(), + }) + .await + .unwrap(), + None + ); + })); + } + + for release in NON_EXISTENT_RELEASES { + let client = client.clone(); + + tasks.push(tokio::spawn(async move { + assert_eq!( + client + .has_release_artifact(GhReleaseArtifact { + release, + artifact_name: "1234".to_compact_string(), + }) + .await + .unwrap(), + None + ); + })); + } + } + + for task in tasks { + task.await.unwrap(); + } + } +} diff --git a/crates/binstalk-git-repo-api/src/gh_api_client/common.rs b/crates/binstalk-git-repo-api/src/gh_api_client/common.rs new file mode 100644 index 00000000..b09823e9 --- /dev/null +++ b/crates/binstalk-git-repo-api/src/gh_api_client/common.rs @@ -0,0 +1,143 @@ +use std::{fmt::Debug, future::Future, sync::OnceLock, time::Duration}; + +use binstalk_downloader::remote::{self, Response, Url}; +use compact_str::CompactString; +use percent_encoding::percent_decode_str; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use serde_json::to_string as to_json_string; +use tracing::debug; + +use super::{GhApiError, GhGraphQLErrors}; + +pub(super) fn percent_decode_http_url_path(input: &str) -> CompactString { + if input.contains('%') { + percent_decode_str(input).decode_utf8_lossy().into() + } else { + // No '%', no need to decode. + CompactString::new(input) + } +} + +pub(super) fn check_http_status_and_header(response: &Response) -> Result<(), GhApiError> { + let headers = response.headers(); + + match response.status() { + remote::StatusCode::FORBIDDEN + if headers + .get("x-ratelimit-remaining") + .map(|val| val == "0") + .unwrap_or(false) => + { + Err(GhApiError::RateLimit { + retry_after: headers.get("x-ratelimit-reset").and_then(|value| { + let secs = value.to_str().ok()?.parse().ok()?; + Some(Duration::from_secs(secs)) + }), + }) + } + + remote::StatusCode::UNAUTHORIZED => Err(GhApiError::Unauthorized), + remote::StatusCode::NOT_FOUND => Err(GhApiError::NotFound), + + _ => Ok(()), + } +} + +fn get_api_endpoint() -> &'static Url { + static API_ENDPOINT: OnceLock = OnceLock::new(); + + API_ENDPOINT.get_or_init(|| { + Url::parse("https://api.github.com/").expect("Literal provided must be a valid url") + }) +} + +pub(super) fn issue_restful_api( + client: &remote::Client, + path: &[&str], +) -> impl Future> + Send + Sync + 'static +where + T: DeserializeOwned, +{ + let mut url = get_api_endpoint().clone(); + + url.path_segments_mut() + .expect("get_api_endpoint() should return a https url") + .extend(path); + + debug!("Getting restful API: {url}"); + + let future = client + .get(url) + .header("Accept", "application/vnd.github+json") + .header("X-GitHub-Api-Version", "2022-11-28") + .send(false); + + async move { + let response = future.await?; + + check_http_status_and_header(&response)?; + + Ok(response.json().await?) + } +} + +#[derive(Debug, Deserialize)] +struct GraphQLResponse { + data: T, + errors: Option, +} + +#[derive(Serialize)] +struct GraphQLQuery { + query: String, +} + +fn get_graphql_endpoint() -> Url { + let mut graphql_endpoint = get_api_endpoint().clone(); + + graphql_endpoint + .path_segments_mut() + .expect("get_api_endpoint() should return a https url") + .push("graphql"); + + graphql_endpoint +} + +pub(super) fn issue_graphql_query( + client: &remote::Client, + query: String, + auth_token: &str, +) -> impl Future> + Send + Sync + 'static +where + T: DeserializeOwned + Debug, +{ + let res = to_json_string(&GraphQLQuery { query }) + .map_err(remote::Error::from) + .map(|graphql_query| { + let graphql_endpoint = get_graphql_endpoint(); + + debug!("Sending graphql query to {graphql_endpoint}: '{graphql_query}'"); + + let request_builder = client + .post(graphql_endpoint, graphql_query) + .header("Accept", "application/vnd.github+json") + .bearer_auth(&auth_token); + + request_builder.send(false) + }); + + async move { + let response = res?.await?; + check_http_status_and_header(&response)?; + + let mut response: GraphQLResponse = response.json().await?; + + debug!("response = {response:?}"); + + if let Some(error) = response.errors.take() { + Err(error.into()) + } else { + Ok(response.data) + } + } +} diff --git a/crates/binstalk-git-repo-api/src/gh_api_client/error.rs b/crates/binstalk-git-repo-api/src/gh_api_client/error.rs new file mode 100644 index 00000000..1787a372 --- /dev/null +++ b/crates/binstalk-git-repo-api/src/gh_api_client/error.rs @@ -0,0 +1,203 @@ +use std::{error, fmt, io, time::Duration}; + +use binstalk_downloader::remote; +use compact_str::{CompactString, ToCompactString}; +use serde::{de::Deserializer, Deserialize}; +use thiserror::Error as ThisError; + +#[derive(ThisError, Debug)] +#[error("Context: '{context}', err: '{err}'")] +pub struct GhApiContextError { + context: CompactString, + #[source] + err: GhApiError, +} + +#[derive(ThisError, Debug)] +#[non_exhaustive] +pub enum GhApiError { + #[error("IO Error: {0}")] + Io(#[from] io::Error), + + #[error("Remote Error: {0}")] + Remote(#[from] remote::Error), + + #[error("Failed to parse url: {0}")] + InvalidUrl(#[from] url::ParseError), + + /// A wrapped error providing the context the error is about. + #[error(transparent)] + Context(Box), + + #[error("Remote failed to process GraphQL query: {0}")] + GraphQLErrors(GhGraphQLErrors), + + #[error("Hit rate-limit, retry after {retry_after:?}")] + RateLimit { retry_after: Option }, + + #[error("Corresponding resource is not found")] + NotFound, + + #[error("Does not have permission to access the API")] + Unauthorized, +} + +impl GhApiError { + /// Attach context to [`GhApiError`] + pub fn context(self, context: impl fmt::Display) -> Self { + use GhApiError::*; + + if matches!(self, RateLimit { .. } | NotFound | Unauthorized) { + self + } else { + Self::Context(Box::new(GhApiContextError { + context: context.to_compact_string(), + err: self, + })) + } + } +} + +impl From for GhApiError { + fn from(e: GhGraphQLErrors) -> Self { + if e.is_rate_limited() { + Self::RateLimit { retry_after: None } + } else if e.is_not_found_error() { + Self::NotFound + } else { + Self::GraphQLErrors(e) + } + } +} + +#[derive(Debug, Deserialize)] +pub struct GhGraphQLErrors(Box<[GraphQLError]>); + +impl GhGraphQLErrors { + fn is_rate_limited(&self) -> bool { + self.0 + .iter() + .any(|error| matches!(error.error_type, GraphQLErrorType::RateLimited)) + } + + fn is_not_found_error(&self) -> bool { + self.0 + .iter() + .any(|error| matches!(&error.error_type, GraphQLErrorType::Other(error_type) if *error_type == "NOT_FOUND")) + } +} + +impl error::Error for GhGraphQLErrors {} + +impl fmt::Display for GhGraphQLErrors { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let last_error_index = self.0.len() - 1; + + for (i, error) in self.0.iter().enumerate() { + write!( + f, + "type: '{error_type}', msg: '{msg}'", + error_type = error.error_type, + msg = error.message, + )?; + + for location in error.locations.as_deref().into_iter().flatten() { + write!( + f, + ", occured on query line {line} col {col}", + line = location.line, + col = location.column + )?; + } + + for (k, v) in &error.others { + write!(f, ", {k}: {v}")?; + } + + if i < last_error_index { + f.write_str("\n")?; + } + } + + Ok(()) + } +} + +#[derive(Debug, Deserialize)] +struct GraphQLError { + message: CompactString, + locations: Option>, + + #[serde(rename = "type")] + error_type: GraphQLErrorType, + + #[serde(flatten, with = "tuple_vec_map")] + others: Vec<(CompactString, serde_json::Value)>, +} + +#[derive(Debug)] +pub(super) enum GraphQLErrorType { + RateLimited, + Other(CompactString), +} + +impl fmt::Display for GraphQLErrorType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + GraphQLErrorType::RateLimited => "RATE_LIMITED", + GraphQLErrorType::Other(s) => s, + }) + } +} + +impl<'de> Deserialize<'de> for GraphQLErrorType { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = CompactString::deserialize(deserializer)?; + Ok(match &*s { + "RATE_LIMITED" => GraphQLErrorType::RateLimited, + _ => GraphQLErrorType::Other(s), + }) + } +} + +#[derive(Debug, Deserialize)] +struct GraphQLLocation { + line: u64, + column: u64, +} + +#[cfg(test)] +mod test { + use super::*; + use serde::de::value::{BorrowedStrDeserializer, Error}; + + macro_rules! assert_matches { + ($expression:expr, $pattern:pat $(if $guard:expr)? $(,)?) => { + match $expression { + $pattern $(if $guard)? => true, + expr => { + panic!( + "assertion failed: `{expr:?}` does not match `{}`", + stringify!($pattern $(if $guard)?) + ) + } + } + } + } + + #[test] + fn test_graph_ql_error_type() { + let deserialize = |input: &str| { + GraphQLErrorType::deserialize(BorrowedStrDeserializer::<'_, Error>::new(input)).unwrap() + }; + + assert_matches!(deserialize("RATE_LIMITED"), GraphQLErrorType::RateLimited); + assert_matches!( + deserialize("rATE_LIMITED"), + GraphQLErrorType::Other(val) if val == CompactString::new("rATE_LIMITED") + ); + } +} diff --git a/crates/binstalk-git-repo-api/src/gh_api_client/release_artifacts.rs b/crates/binstalk-git-repo-api/src/gh_api_client/release_artifacts.rs new file mode 100644 index 00000000..312d8a73 --- /dev/null +++ b/crates/binstalk-git-repo-api/src/gh_api_client/release_artifacts.rs @@ -0,0 +1,187 @@ +use std::{ + borrow::Borrow, + collections::HashSet, + fmt, + future::Future, + hash::{Hash, Hasher}, +}; + +use binstalk_downloader::remote::{self}; +use compact_str::{CompactString, ToCompactString}; +use serde::Deserialize; +use url::Url; + +use super::{ + common::{issue_graphql_query, issue_restful_api}, + GhApiError, GhRelease, GhRepo, +}; + +// Only include fields we do care about + +#[derive(Eq, Deserialize, Debug)] +struct Artifact { + name: CompactString, + url: Url, +} + +// Manually implement PartialEq and Hash to ensure it will always produce the +// same hash as a str with the same content, and that the comparison will be +// the same to coparing a string. + +impl PartialEq for Artifact { + fn eq(&self, other: &Self) -> bool { + self.name.eq(&other.name) + } +} + +impl Hash for Artifact { + fn hash(&self, state: &mut H) + where + H: Hasher, + { + let s: &str = self.name.as_str(); + s.hash(state) + } +} + +// Implement Borrow so that we can use call +// `HashSet::contains::` + +impl Borrow for Artifact { + fn borrow(&self) -> &str { + &self.name + } +} + +#[derive(Debug, Default, Deserialize)] +pub(super) struct Artifacts { + assets: HashSet, +} + +impl Artifacts { + /// get url for downloading the artifact using GitHub API (for private repository). + pub(super) fn get_artifact_url(&self, artifact_name: &str) -> Option { + self.assets + .get(artifact_name) + .map(|artifact| artifact.url.clone()) + } +} + +pub(super) fn fetch_release_artifacts_restful_api( + client: &remote::Client, + GhRelease { + repo: GhRepo { owner, repo }, + tag, + }: &GhRelease, +) -> impl Future> + Send + Sync + 'static { + issue_restful_api(client, &["repos", owner, repo, "releases", "tags", tag]) +} + +#[derive(Debug, Deserialize)] +struct GraphQLData { + repository: Option, +} + +#[derive(Debug, Deserialize)] +struct GraphQLRepo { + release: Option, +} + +#[derive(Debug, Deserialize)] +struct GraphQLRelease { + #[serde(rename = "releaseAssets")] + assets: GraphQLReleaseAssets, +} + +#[derive(Debug, Deserialize)] +struct GraphQLReleaseAssets { + nodes: Vec, + #[serde(rename = "pageInfo")] + page_info: GraphQLPageInfo, +} + +#[derive(Debug, Deserialize)] +struct GraphQLPageInfo { + #[serde(rename = "endCursor")] + end_cursor: Option, + #[serde(rename = "hasNextPage")] + has_next_page: bool, +} + +enum FilterCondition { + Init, + After(CompactString), +} + +impl fmt::Display for FilterCondition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + // GitHub imposes a limit of 100 for the value passed to param "first" + FilterCondition::Init => f.write_str("first:100"), + FilterCondition::After(end_cursor) => write!(f, r#"first:100,after:"{end_cursor}""#), + } + } +} + +pub(super) fn fetch_release_artifacts_graphql_api( + client: &remote::Client, + GhRelease { + repo: GhRepo { owner, repo }, + tag, + }: &GhRelease, + auth_token: &str, +) -> impl Future> + Send + Sync + 'static { + let client = client.clone(); + let auth_token = auth_token.to_compact_string(); + + let base_query_prefix = format!( + r#" +query {{ + repository(owner:"{owner}",name:"{repo}") {{ + release(tagName:"{tag}") {{"# + ); + + let base_query_suffix = r#" + nodes { name url } + pageInfo { endCursor hasNextPage } +}}}}"# + .trim(); + + async move { + let mut artifacts = Artifacts::default(); + let mut cond = FilterCondition::Init; + let base_query_prefix = base_query_prefix.trim(); + + loop { + let query = format!( + r#" +{base_query_prefix} +releaseAssets({cond}) {{ +{base_query_suffix}"# + ); + + let data: GraphQLData = issue_graphql_query(&client, query, &auth_token).await?; + + let assets = data + .repository + .and_then(|repository| repository.release) + .map(|release| release.assets); + + if let Some(assets) = assets { + artifacts.assets.extend(assets.nodes); + + match assets.page_info { + GraphQLPageInfo { + end_cursor: Some(end_cursor), + has_next_page: true, + } => { + cond = FilterCondition::After(end_cursor); + } + _ => break Ok(artifacts), + } + } else { + break Err(GhApiError::NotFound); + } + } + } +} diff --git a/crates/binstalk-git-repo-api/src/gh_api_client/repo_info.rs b/crates/binstalk-git-repo-api/src/gh_api_client/repo_info.rs new file mode 100644 index 00000000..ddc6d152 --- /dev/null +++ b/crates/binstalk-git-repo-api/src/gh_api_client/repo_info.rs @@ -0,0 +1,80 @@ +use std::future::Future; + +use compact_str::CompactString; +use serde::Deserialize; + +use super::{ + common::{issue_graphql_query, issue_restful_api}, + remote, GhApiError, GhRepo, +}; + +#[derive(Clone, Eq, PartialEq, Hash, Debug, Deserialize)] +struct Owner { + login: CompactString, +} + +#[derive(Clone, Eq, PartialEq, Hash, Debug, Deserialize)] +pub struct RepoInfo { + owner: Owner, + name: CompactString, + private: bool, +} + +impl RepoInfo { + #[cfg(test)] + pub(crate) fn new(GhRepo { owner, repo }: GhRepo, private: bool) -> Self { + Self { + owner: Owner { login: owner }, + name: repo, + private, + } + } + pub fn repo(&self) -> GhRepo { + GhRepo { + owner: self.owner.login.clone(), + repo: self.name.clone(), + } + } + + pub fn is_private(&self) -> bool { + self.private + } +} + +pub(super) fn fetch_repo_info_restful_api( + client: &remote::Client, + GhRepo { owner, repo }: &GhRepo, +) -> impl Future, GhApiError>> + Send + Sync + 'static { + issue_restful_api(client, &["repos", owner, repo]) +} + +#[derive(Debug, Deserialize)] +struct GraphQLData { + repository: Option, +} + +pub(super) fn fetch_repo_info_graphql_api( + client: &remote::Client, + GhRepo { owner, repo }: &GhRepo, + auth_token: &str, +) -> impl Future, GhApiError>> + Send + Sync + 'static { + let query = format!( + r#" +query {{ + repository(owner:"{owner}",name:"{repo}") {{ + owner {{ + login + }} + name + private: isPrivate + }} +}}"# + ); + + let future = issue_graphql_query(client, query, auth_token); + + async move { + let data: GraphQLData = future.await?; + Ok(data.repository) + } +} diff --git a/crates/binstalk-git-repo-api/src/lib.rs b/crates/binstalk-git-repo-api/src/lib.rs new file mode 100644 index 00000000..7d7dd52c --- /dev/null +++ b/crates/binstalk-git-repo-api/src/lib.rs @@ -0,0 +1 @@ +pub mod gh_api_client; diff --git a/crates/binstalk/Cargo.toml b/crates/binstalk/Cargo.toml index 12409e0c..e14816bd 100644 --- a/crates/binstalk/Cargo.toml +++ b/crates/binstalk/Cargo.toml @@ -11,9 +11,8 @@ license = "GPL-3.0-only" [dependencies] binstalk-bins = { version = "0.4.0", path = "../binstalk-bins" } -binstalk-downloader = { version = "0.10.3", path = "../binstalk-downloader", default-features = false, features = [ - "gh-api-client", -] } +binstalk-downloader = { version = "0.10.3", path = "../binstalk-downloader", default-features = false } +binstalk-git-repo-api = { version = "0.0.0", path = "../binstalk-git-repo-api" } binstalk-fetchers = { version = "0.4.1", path = "../binstalk-fetchers", features = [ "quickinstall", ] } diff --git a/crates/binstalk/src/errors.rs b/crates/binstalk/src/errors.rs index c5afd7a6..f367299d 100644 --- a/crates/binstalk/src/errors.rs +++ b/crates/binstalk/src/errors.rs @@ -4,9 +4,7 @@ use std::{ process::{ExitCode, ExitStatus, Termination}, }; -use binstalk_downloader::{ - download::DownloadError, gh_api_client::GhApiError, remote::Error as RemoteError, -}; +use binstalk_downloader::{download::DownloadError, remote::Error as RemoteError}; use binstalk_fetchers::FetchError; use compact_str::CompactString; use itertools::Itertools; @@ -19,7 +17,8 @@ use tracing::{error, warn}; use crate::{ bins, helpers::{ - cargo_toml::Error as CargoTomlError, cargo_toml_workspace::Error as LoadManifestFromWSError, + cargo_toml::Error as CargoTomlError, + cargo_toml_workspace::Error as LoadManifestFromWSError, gh_api_client::GhApiError, }, registry::{InvalidRegistryError, RegistryError}, }; diff --git a/crates/binstalk/src/helpers.rs b/crates/binstalk/src/helpers.rs index 058813de..4e5792e6 100644 --- a/crates/binstalk/src/helpers.rs +++ b/crates/binstalk/src/helpers.rs @@ -1,10 +1,13 @@ pub mod jobserver_client; -pub mod remote; +pub mod remote { + pub use binstalk_downloader::remote::*; + pub use url::ParseError as UrlParseError; +} pub(crate) mod target_triple; pub mod tasks; pub(crate) use binstalk_downloader::download; -pub use binstalk_downloader::gh_api_client; +pub use binstalk_git_repo_api::gh_api_client; pub(crate) use cargo_toml_workspace::{self, cargo_toml}; #[cfg(feature = "git")] diff --git a/crates/binstalk/src/helpers/remote.rs b/crates/binstalk/src/helpers/remote.rs deleted file mode 100644 index 24635b89..00000000 --- a/crates/binstalk/src/helpers/remote.rs +++ /dev/null @@ -1,54 +0,0 @@ -pub use binstalk_downloader::remote::*; -pub use url::ParseError as UrlParseError; - -use binstalk_downloader::gh_api_client::{GhApiClient, GhReleaseArtifact, HasReleaseArtifact}; -use std::sync::{ - atomic::{AtomicBool, Ordering::Relaxed}, - Once, -}; -use tracing::{debug, warn}; - -use crate::errors::BinstallError; - -/// This function returns a future where its size should be at most size of -/// 2 pointers. -pub async fn does_url_exist( - client: Client, - gh_api_client: GhApiClient, - url: &Url, -) -> Result { - static GH_API_CLIENT_FAILED: AtomicBool = AtomicBool::new(false); - static WARN_RATE_LIMIT_ONCE: Once = Once::new(); - static WARN_UNAUTHORIZED_ONCE: Once = Once::new(); - - debug!("Checking for package at: '{url}'"); - - if !GH_API_CLIENT_FAILED.load(Relaxed) { - if let Some(artifact) = GhReleaseArtifact::try_extract_from_url(url) { - debug!("Using GitHub API to check for existence of artifact, which will also cache the API response"); - - // The future returned has the same size as a pointer - match gh_api_client.has_release_artifact(artifact).await? { - HasReleaseArtifact::Yes => return Ok(true), - HasReleaseArtifact::No | HasReleaseArtifact::NoSuchRelease => return Ok(false), - - HasReleaseArtifact::RateLimit { retry_after } => { - WARN_RATE_LIMIT_ONCE.call_once(|| { - warn!("Your GitHub API token (if any) has reached its rate limit and cannot be used again until {retry_after:?}, so we will fallback to HEAD/GET on the url."); - warn!("If you did not supply a github token, consider doing so: GitHub limits unauthorized users to 60 requests per hour per origin IP address."); - }); - } - HasReleaseArtifact::Unauthorized => { - WARN_UNAUTHORIZED_ONCE.call_once(|| { - warn!("GitHub API somehow requires a token for the API access, so we will fallback to HEAD/GET on the url."); - warn!("Please consider supplying a token to cargo-binstall to speedup resolution."); - }); - } - } - - GH_API_CLIENT_FAILED.store(true, Relaxed); - } - } - - Ok(Box::pin(client.remote_gettable(url.clone())).await?) -} diff --git a/crates/binstalk/src/ops.rs b/crates/binstalk/src/ops.rs index 2124c14e..66366be6 100644 --- a/crates/binstalk/src/ops.rs +++ b/crates/binstalk/src/ops.rs @@ -6,9 +6,7 @@ use semver::VersionReq; use crate::{ fetchers::{Data, Fetcher, SignaturePolicy, TargetDataErased}, - helpers::{ - self, gh_api_client::GhApiClient, jobserver_client::LazyJobserverClient, remote::Client, - }, + helpers::{gh_api_client::GhApiClient, jobserver_client::LazyJobserverClient, remote::Client}, manifests::cargo_toml_binstall::PkgOverride, registry::Registry, DesiredTargets, @@ -23,7 +21,7 @@ pub type Resolver = #[non_exhaustive] pub enum CargoTomlFetchOverride { #[cfg(feature = "git")] - Git(helpers::git::GitUrl), + Git(crate::helpers::git::GitUrl), Path(PathBuf), } diff --git a/crates/binstalk/src/ops/resolve.rs b/crates/binstalk/src/ops/resolve.rs index 994bc6fd..0fa65d9b 100644 --- a/crates/binstalk/src/ops/resolve.rs +++ b/crates/binstalk/src/ops/resolve.rs @@ -12,7 +12,6 @@ use itertools::Itertools; use leon::Template; use maybe_owned::MaybeOwned; use semver::{Version, VersionReq}; -use tempfile::TempDir; use tokio::task::spawn_blocking; use tracing::{debug, error, info, instrument, warn}; @@ -21,7 +20,7 @@ use crate::{ errors::{BinstallError, VersionParseError}, fetchers::{Data, Fetcher, TargetData}, helpers::{ - self, cargo_toml::Manifest, cargo_toml_workspace::load_manifest_from_workspace, + cargo_toml::Manifest, cargo_toml_workspace::load_manifest_from_workspace, download::ExtractedFiles, remote::Client, target_triple::TargetTriple, tasks::AutoAbortJoinHandle, }, @@ -361,7 +360,7 @@ impl PackageInfo { } #[cfg(feature = "git")] Some(Git(git_url)) => { - use helpers::git::{GitCancellationToken, Repository as GitRepository}; + use crate::helpers::git::{GitCancellationToken, Repository as GitRepository}; let git_url = git_url.clone(); let name = name.clone(); @@ -370,7 +369,7 @@ impl PackageInfo { let cancel_on_drop = cancellation_token.clone().cancel_on_drop(); let ret = spawn_blocking(move || { - let dir = TempDir::new()?; + let dir = tempfile::TempDir::new()?; GitRepository::shallow_clone(git_url, dir.as_ref(), Some(cancellation_token))?; load_manifest_from_workspace(dir.as_ref(), &name).map_err(BinstallError::from) diff --git a/e2e-tests/manifests/private-github-repo-test-Cargo.toml b/e2e-tests/manifests/private-github-repo-test-Cargo.toml new file mode 100644 index 00000000..cf5cf9e6 --- /dev/null +++ b/e2e-tests/manifests/private-github-repo-test-Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "cargo-binstall" +description = "Rust binary package installer for CI integration" +repository = "https://github.com/cargo-bins/private-repo-for-testing" +version = "0.12.0" +rust-version = "1.61.0" +authors = ["ryan "] +edition = "2021" +license = "GPL-3.0" + +[[bin]] +name = "cargo-binstall" +path = "src/main.rs" diff --git a/e2e-tests/private-github-repo.sh b/e2e-tests/private-github-repo.sh new file mode 100755 index 00000000..e0050101 --- /dev/null +++ b/e2e-tests/private-github-repo.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -euxo pipefail + +unset CARGO_INSTALL_ROOT + +CARGO_HOME=$(mktemp -d 2>/dev/null || mktemp -d -t 'cargo-home') +export CARGO_HOME +export PATH="$CARGO_HOME/bin:$PATH" + +# Install binaries using `--manifest-path` +# Also test default github template +"./$1" binstall --force --manifest-path "manifests/private-github-repo-test-Cargo.toml" --no-confirm cargo-binstall --strategies crate-meta-data + +# Test that the installed binaries can be run +cargo binstall --help >/dev/null + +cargo_binstall_version="$(cargo binstall -V)" +echo "$cargo_binstall_version" + +[ "$cargo_binstall_version" = "cargo-binstall 0.12.0" ] diff --git a/justfile b/justfile index d01a9510..d80759d0 100644 --- a/justfile +++ b/justfile @@ -219,7 +219,7 @@ check: print-env {{cargo-bin}} check -p cargo-binstall --no-default-features --features rustls {{cargo-check-args}} --profile check-only cargo-hack hack check -p binstalk-downloader \ --feature-powerset \ - --include-features default,json,gh-api-client \ + --include-features default,json \ --profile check-only \ {{cargo-check-args}} @@ -248,6 +248,7 @@ e2e-test-git: (e2e-test "git") e2e-test-registries: (e2e-test "registries") e2e-test-signing: (e2e-test "signing") e2e-test-continue-on-failure: (e2e-test "continue-on-failure") +e2e-test-private-github-repo: (e2e-test "private-github-repo") # WinTLS (Windows in CI) does not have TLS 1.3 support [windows] @@ -256,7 +257,7 @@ e2e-test-tls: (e2e-test "tls" "1.2") [macos] e2e-test-tls: (e2e-test "tls" "1.2") (e2e-test "tls" "1.3") -e2e-tests: e2e-test-live e2e-test-manifest-path e2e-test-git e2e-test-other-repos e2e-test-strategies e2e-test-version-syntax e2e-test-upgrade e2e-test-tls e2e-test-self-upgrade-no-symlink e2e-test-uninstall e2e-test-subcrate e2e-test-no-track e2e-test-registries e2e-test-signing e2e-test-continue-on-failure +e2e-tests: e2e-test-live e2e-test-manifest-path e2e-test-git e2e-test-other-repos e2e-test-strategies e2e-test-version-syntax e2e-test-upgrade e2e-test-tls e2e-test-self-upgrade-no-symlink e2e-test-uninstall e2e-test-subcrate e2e-test-no-track e2e-test-registries e2e-test-signing e2e-test-continue-on-failure e2e-test-private-github-repo unit-tests: print-env {{cargo-bin}} test {{cargo-build-args}}