Refactor: Extract new crate binstalk-fetchers (#1291)

To reduce `binstalk` codegen and enable better reuse.

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
This commit is contained in:
Jiahao XU 2023-08-14 13:20:34 +10:00 committed by GitHub
parent 623f7ff4ed
commit 76c72469eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 1008 additions and 122 deletions

View file

@ -0,0 +1,60 @@
use std::sync::{
atomic::{AtomicBool, Ordering::Relaxed},
Once,
};
use binstalk_downloader::gh_api_client::{GhReleaseArtifact, HasReleaseArtifact};
pub(super) use binstalk_downloader::{
download::{Download, ExtractedFiles},
gh_api_client::GhApiClient,
remote::{Client, Method, Url},
};
pub(super) use binstalk_types::cargo_toml_binstall::{PkgFmt, PkgMeta};
pub(super) use compact_str::CompactString;
pub(super) use tokio::task::JoinHandle;
pub(super) use tracing::{debug, instrument, warn};
use crate::FetchError;
/// This function returns a future where its size should be at most size of
/// 2-4 pointers.
pub(super) async fn does_url_exist(
client: Client,
gh_api_client: GhApiClient,
url: &Url,
) -> Result<bool, FetchError> {
static GH_API_CLIENT_FAILED: AtomicBool = AtomicBool::new(false);
static WARN_RATE_LIMIT_ONCE: Once = Once::new();
static WARN_UNAUTHORIZED_ONCE: Once = Once::new();
debug!("Checking for package at: '{url}'");
if !GH_API_CLIENT_FAILED.load(Relaxed) {
if let Some(artifact) = GhReleaseArtifact::try_extract_from_url(url) {
debug!("Using GitHub API to check for existence of artifact, which will also cache the API response");
// The future returned has the same size as a pointer
match gh_api_client.has_release_artifact(artifact).await? {
HasReleaseArtifact::Yes => return Ok(true),
HasReleaseArtifact::No | HasReleaseArtifact::NoSuchRelease => return Ok(false),
HasReleaseArtifact::RateLimit { retry_after } => {
WARN_RATE_LIMIT_ONCE.call_once(|| {
warn!("Your GitHub API token (if any) has reached its rate limit and cannot be used again until {retry_after:?}, so we will fallback to HEAD/GET on the url.");
warn!("If you did not supply a github token, consider doing so: GitHub limits unauthorized users to 60 requests per hour per origin IP address.");
});
}
HasReleaseArtifact::Unauthorized => {
WARN_UNAUTHORIZED_ONCE.call_once(|| {
warn!("GitHub API somehow requires a token for the API access, so we will fallback to HEAD/GET on the url.");
warn!("Please consider supplying a token to cargo-binstall to speedup resolution.");
});
}
}
GH_API_CLIENT_FAILED.store(true, Relaxed);
}
}
Ok(Box::pin(client.remote_gettable(url.clone())).await?)
}

View file

@ -0,0 +1,76 @@
use std::{future::Future, pin::Pin};
use tokio::sync::mpsc;
/// Given multiple futures with output = `Result<Option<T>, E>`,
/// returns the the first one that returns either `Err(_)` or
/// `Ok(Some(_))`.
pub struct FuturesResolver<T, E> {
rx: mpsc::Receiver<Result<T, E>>,
tx: mpsc::Sender<Result<T, E>>,
}
impl<T, E> Default for FuturesResolver<T, E> {
fn default() -> Self {
// We only need the first one, so the channel is of size 1.
let (tx, rx) = mpsc::channel(1);
Self { tx, rx }
}
}
impl<T: Send + 'static, E: Send + 'static> FuturesResolver<T, E> {
/// Insert new future into this resolver, they will start running
/// right away.
pub fn push<Fut>(&self, fut: Fut)
where
Fut: Future<Output = Result<Option<T>, E>> + Send + 'static,
{
let tx = self.tx.clone();
tokio::spawn(async move {
tokio::pin!(fut);
Self::spawn_inner(fut, tx).await;
});
}
async fn spawn_inner(
fut: Pin<&mut (dyn Future<Output = Result<Option<T>, E>> + Send)>,
tx: mpsc::Sender<Result<T, E>>,
) {
let res = tokio::select! {
biased;
_ = tx.closed() => return,
res = fut => res,
};
if let Some(res) = res.transpose() {
// try_send can only fail due to being full or being closed.
//
// In both cases, this could means some other future has
// completed first.
//
// For closed, it could additionally means that the task
// is cancelled.
tx.try_send(res).ok();
}
}
/// Insert multiple futures into this resolver, they will start running
/// right away.
pub fn extend<Fut, Iter>(&self, iter: Iter)
where
Fut: Future<Output = Result<Option<T>, E>> + Send + 'static,
Iter: IntoIterator<Item = Fut>,
{
iter.into_iter().for_each(|fut| self.push(fut));
}
/// Return the resolution.
pub fn resolve(self) -> impl Future<Output = Result<Option<T>, E>> {
let mut rx = self.rx;
drop(self.tx);
async move { rx.recv().await.transpose() }
}
}

View file

@ -0,0 +1,543 @@
use std::{borrow::Cow, fmt, iter, marker::PhantomData, path::Path, sync::Arc};
use compact_str::{CompactString, ToCompactString};
use either::Either;
use leon::Template;
use once_cell::sync::OnceCell;
use strum::IntoEnumIterator;
use tracing::{debug, warn};
use url::Url;
use crate::{
common::*, futures_resolver::FuturesResolver, Data, FetchError, InvalidPkgFmtError, RepoInfo,
TargetDataErased,
};
pub(crate) mod hosting;
pub struct GhCrateMeta {
client: Client,
gh_api_client: GhApiClient,
data: Arc<Data>,
target_data: Arc<TargetDataErased>,
resolution: OnceCell<(Url, PkgFmt)>,
}
impl GhCrateMeta {
fn launch_baseline_find_tasks(
&self,
futures_resolver: &FuturesResolver<(Url, PkgFmt), FetchError>,
pkg_fmt: PkgFmt,
pkg_url: &Template<'_>,
repo: Option<&str>,
subcrate: Option<&str>,
) {
let render_url = |ext| {
let ctx = Context::from_data_with_repo(
&self.data,
&self.target_data.target,
&self.target_data.target_related_info,
ext,
repo,
subcrate,
);
match ctx.render_url_with_compiled_tt(pkg_url) {
Ok(url) => Some(url),
Err(err) => {
warn!("Failed to render url for {ctx:#?}: {err}");
None
}
}
};
let is_windows = self.target_data.target.contains("windows");
let urls = if pkg_url.has_any_of_keys(&["format", "archive-format", "archive-suffix"]) {
// build up list of potential URLs
Either::Left(
pkg_fmt
.extensions(is_windows)
.iter()
.filter_map(|ext| render_url(Some(ext))),
)
} else {
Either::Right(render_url(None).into_iter())
};
// go check all potential URLs at once
futures_resolver.extend(urls.map(move |url| {
let client = self.client.clone();
let gh_api_client = self.gh_api_client.clone();
async move {
Ok(does_url_exist(client, gh_api_client, &url)
.await?
.then_some((url, pkg_fmt)))
}
}));
}
}
#[async_trait::async_trait]
impl super::Fetcher for GhCrateMeta {
fn new(
client: Client,
gh_api_client: GhApiClient,
data: Arc<Data>,
target_data: Arc<TargetDataErased>,
) -> Arc<dyn super::Fetcher> {
Arc::new(Self {
client,
gh_api_client,
data,
target_data,
resolution: OnceCell::new(),
})
}
fn find(self: Arc<Self>) -> JoinHandle<Result<bool, FetchError>> {
tokio::spawn(async move {
let info = self.data.get_repo_info(&self.client).await?.as_ref();
let repo = info.map(|info| &info.repo);
let subcrate = info.and_then(|info| info.subcrate.as_deref());
let mut pkg_fmt = self.target_data.meta.pkg_fmt;
let pkg_urls = if let Some(pkg_url) = self.target_data.meta.pkg_url.as_deref() {
let template = Template::parse(pkg_url)?;
if pkg_fmt.is_none()
&& !template.has_any_of_keys(&["format", "archive-format", "archive-suffix"])
{
// The crate does not specify the pkg-fmt, yet its pkg-url
// template doesn't contains format, archive-format or
// archive-suffix which is required for automatically
// deducing the pkg-fmt.
//
// We will attempt to guess the pkg-fmt there, but this is
// just a best-effort
pkg_fmt = PkgFmt::guess_pkg_format(pkg_url);
let crate_name = &self.data.name;
let version = &self.data.version;
let target = &self.target_data.target;
if pkg_fmt.is_none() {
return Err(InvalidPkgFmtError {
crate_name: crate_name.clone(),
version: version.clone(),
target: target.into(),
pkg_url: pkg_url.into(),
reason:
&"pkg-fmt is not specified, yet pkg-url does not contain format, \
archive-format or archive-suffix which is required for automatically deducing pkg-fmt",
}
.into());
}
warn!(
"Crate {crate_name}@{version} on target {target} does not specify pkg-fmt \
but its pkg-url also does not contain key format, archive-format or \
archive-suffix.\nbinstall was able to guess that from pkg-url, but \
just note that it could be wrong:\npkg-fmt=\"{pkg_fmt}\", pkg-url=\"{pkg_url}\"",
pkg_fmt = pkg_fmt.unwrap(),
);
}
Either::Left(iter::once(template))
} else if let Some(RepoInfo {
repo,
repository_host,
..
}) = info
{
if let Some(pkg_urls) = repository_host.get_default_pkg_url_template() {
let has_subcrate = subcrate.is_some();
Either::Right(
pkg_urls
.map(Template::cast)
// If subcrate is Some, then all templates will be included.
// Otherwise, only templates without key "subcrate" will be
// included.
.filter(move |template| has_subcrate || !template.has_key("subcrate")),
)
} else {
warn!(
concat!(
"Unknown repository {}, cargo-binstall cannot provide default pkg_url for it.\n",
"Please ask the upstream to provide it for target {}."
),
repo, self.target_data.target
);
return Ok(false);
}
} else {
warn!(
concat!(
"Package does not specify repository, cargo-binstall cannot provide default pkg_url for it.\n",
"Please ask the upstream to provide it for target {}."
),
self.target_data.target
);
return Ok(false);
};
// Convert Option<Url> to Option<String> to reduce size of future.
let repo = repo.map(|u| u.as_str().trim_end_matches('/'));
// Use reference to self to fix error of closure
// launch_baseline_find_tasks which moves `this`
let this = &self;
let pkg_fmts = if let Some(pkg_fmt) = pkg_fmt {
Either::Left(iter::once(pkg_fmt))
} else {
Either::Right(PkgFmt::iter())
};
let resolver = FuturesResolver::default();
// Iterate over pkg_urls first to avoid String::clone.
for pkg_url in pkg_urls {
// Clone iter pkg_fmts to ensure all pkg_fmts is
// iterated over for each pkg_url, which is
// basically cartesian product.
// |
for pkg_fmt in pkg_fmts.clone() {
this.launch_baseline_find_tasks(&resolver, pkg_fmt, &pkg_url, repo, subcrate);
}
}
if let Some((url, pkg_fmt)) = resolver.resolve().await? {
debug!("Winning URL is {url}, with pkg_fmt {pkg_fmt}");
self.resolution.set((url, pkg_fmt)).unwrap(); // find() is called first
Ok(true)
} else {
Ok(false)
}
})
}
async fn fetch_and_extract(&self, dst: &Path) -> Result<ExtractedFiles, FetchError> {
let (url, pkg_fmt) = self.resolution.get().unwrap(); // find() is called first
debug!(
"Downloading package from: '{url}' dst:{} fmt:{pkg_fmt:?}",
dst.display()
);
Ok(Download::new(self.client.clone(), url.clone())
.and_extract(*pkg_fmt, dst)
.await?)
}
fn pkg_fmt(&self) -> PkgFmt {
self.resolution.get().unwrap().1
}
fn target_meta(&self) -> PkgMeta {
let mut meta = self.target_data.meta.clone();
meta.pkg_fmt = Some(self.pkg_fmt());
meta
}
fn source_name(&self) -> CompactString {
self.resolution
.get()
.map(|(url, _pkg_fmt)| {
if let Some(domain) = url.domain() {
domain.to_compact_string()
} else if let Some(host) = url.host_str() {
host.to_compact_string()
} else {
url.to_compact_string()
}
})
.unwrap_or_else(|| "invalid url".into())
}
fn fetcher_name(&self) -> &'static str {
"GhCrateMeta"
}
fn is_third_party(&self) -> bool {
false
}
fn target(&self) -> &str {
&self.target_data.target
}
fn target_data(&self) -> &Arc<TargetDataErased> {
&self.target_data
}
}
/// Template for constructing download paths
#[derive(Clone)]
struct Context<'c> {
name: &'c str,
repo: Option<&'c str>,
target: &'c str,
version: &'c str,
/// Archive format e.g. tar.gz, zip
archive_format: Option<&'c str>,
archive_suffix: Option<&'c str>,
/// Filename extension on the binary, i.e. .exe on Windows, nothing otherwise
binary_ext: &'c str,
/// Workspace of the crate inside the repository.
subcrate: Option<&'c str>,
target_related_info: &'c dyn leon::Values,
}
impl fmt::Debug for Context<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
#[allow(dead_code)]
#[derive(Debug)]
struct Context<'c> {
name: &'c str,
repo: Option<&'c str>,
target: &'c str,
version: &'c str,
archive_format: Option<&'c str>,
archive_suffix: Option<&'c str>,
binary_ext: &'c str,
subcrate: Option<&'c str>,
target_related_info: PhantomData<&'c dyn leon::Values>,
}
fmt::Debug::fmt(
&Context {
name: self.name,
repo: self.repo,
target: self.target,
version: self.version,
archive_format: self.archive_format,
archive_suffix: self.archive_suffix,
binary_ext: self.binary_ext,
subcrate: self.subcrate,
target_related_info: PhantomData,
},
f,
)
}
}
impl leon::Values for Context<'_> {
fn get_value<'s>(&'s self, key: &str) -> Option<Cow<'s, str>> {
match key {
"name" => Some(Cow::Borrowed(self.name)),
"repo" => self.repo.map(Cow::Borrowed),
"target" => Some(Cow::Borrowed(self.target)),
"version" => Some(Cow::Borrowed(self.version)),
"archive-format" => self.archive_format.map(Cow::Borrowed),
// Soft-deprecated alias for archive-format
"format" => self.archive_format.map(Cow::Borrowed),
"archive-suffix" => self.archive_suffix.map(Cow::Borrowed),
"binary-ext" => Some(Cow::Borrowed(self.binary_ext)),
"subcrate" => self.subcrate.map(Cow::Borrowed),
key => self.target_related_info.get_value(key),
}
}
}
impl<'c> Context<'c> {
fn from_data_with_repo(
data: &'c Data,
target: &'c str,
target_related_info: &'c dyn leon::Values,
archive_suffix: Option<&'c str>,
repo: Option<&'c str>,
subcrate: Option<&'c str>,
) -> Self {
let archive_format = archive_suffix.map(|archive_suffix| {
if archive_suffix.is_empty() {
// Empty archive_suffix means PkgFmt::Bin
"bin"
} else {
debug_assert!(archive_suffix.starts_with('.'), "{archive_suffix}");
&archive_suffix[1..]
}
});
Self {
name: &data.name,
repo,
target,
version: &data.version,
archive_format,
archive_suffix,
binary_ext: if target.contains("windows") {
".exe"
} else {
""
},
subcrate,
target_related_info,
}
}
/// * `tt` - must have added a template named "pkg_url".
fn render_url_with_compiled_tt(&self, tt: &Template<'_>) -> Result<Url, FetchError> {
debug!("Render {tt:#?} using context: {self:?}");
Ok(Url::parse(&tt.render(self)?)?)
}
#[cfg(test)]
fn render_url(&self, template: &str) -> Result<Url, FetchError> {
debug!("Render {template} using context in render_url: {self:?}");
let tt = Template::parse(template)?;
self.render_url_with_compiled_tt(&tt)
}
}
#[cfg(test)]
mod test {
use super::{super::Data, Context};
use compact_str::ToCompactString;
use url::Url;
const DEFAULT_PKG_URL: &str = "{ repo }/releases/download/v{ version }/{ name }-{ target }-v{ version }.{ archive-format }";
fn assert_context_rendering(
data: &Data,
target: &str,
archive_format: &str,
template: &str,
expected_url: &str,
) {
// The template provided doesn't need this, so just returning None
// is OK.
let target_info = leon::vals(|_| None);
let ctx = Context::from_data_with_repo(
data,
target,
&target_info,
Some(archive_format),
data.repo.as_deref(),
None,
);
let expected_url = Url::parse(expected_url).unwrap();
assert_eq!(ctx.render_url(template).unwrap(), expected_url);
}
#[test]
fn defaults() {
assert_context_rendering(
&Data::new(
"cargo-binstall".to_compact_string(),
"1.2.3".to_compact_string(),
Some("https://github.com/ryankurte/cargo-binstall".to_string()),
),
"x86_64-unknown-linux-gnu",
".tgz",
DEFAULT_PKG_URL,
"https://github.com/ryankurte/cargo-binstall/releases/download/v1.2.3/cargo-binstall-x86_64-unknown-linux-gnu-v1.2.3.tgz"
);
}
#[test]
fn no_repo_but_full_url() {
assert_context_rendering(
&Data::new(
"cargo-binstall".to_compact_string(),
"1.2.3".to_compact_string(),
None,
),
"x86_64-unknown-linux-gnu",
".tgz",
&format!("https://example.com{}", &DEFAULT_PKG_URL[8..]),
"https://example.com/releases/download/v1.2.3/cargo-binstall-x86_64-unknown-linux-gnu-v1.2.3.tgz"
);
}
#[test]
fn different_url() {
assert_context_rendering(
&Data::new(
"radio-sx128x".to_compact_string(),
"0.14.1-alpha.5".to_compact_string(),
Some("https://github.com/rust-iot/rust-radio-sx128x".to_string()),
),
"x86_64-unknown-linux-gnu",
".tgz",
"{ repo }/releases/download/v{ version }/sx128x-util-{ target }-v{ version }.{ archive-format }",
"https://github.com/rust-iot/rust-radio-sx128x/releases/download/v0.14.1-alpha.5/sx128x-util-x86_64-unknown-linux-gnu-v0.14.1-alpha.5.tgz"
);
}
#[test]
fn deprecated_format() {
assert_context_rendering(
&Data::new(
"radio-sx128x".to_compact_string(),
"0.14.1-alpha.5".to_compact_string(),
Some("https://github.com/rust-iot/rust-radio-sx128x".to_string()),
),
"x86_64-unknown-linux-gnu",
".tgz",
"{ repo }/releases/download/v{ version }/sx128x-util-{ target }-v{ version }.{ format }",
"https://github.com/rust-iot/rust-radio-sx128x/releases/download/v0.14.1-alpha.5/sx128x-util-x86_64-unknown-linux-gnu-v0.14.1-alpha.5.tgz"
);
}
#[test]
fn different_ext() {
assert_context_rendering(
&Data::new(
"cargo-watch".to_compact_string(),
"9.0.0".to_compact_string(),
Some("https://github.com/watchexec/cargo-watch".to_string()),
),
"aarch64-apple-darwin",
".txz",
"{ repo }/releases/download/v{ version }/{ name }-v{ version }-{ target }.tar.xz",
"https://github.com/watchexec/cargo-watch/releases/download/v9.0.0/cargo-watch-v9.0.0-aarch64-apple-darwin.tar.xz"
);
}
#[test]
fn no_archive() {
assert_context_rendering(
&Data::new(
"cargo-watch".to_compact_string(),
"9.0.0".to_compact_string(),
Some("https://github.com/watchexec/cargo-watch".to_string()),
),
"aarch64-pc-windows-msvc",
".bin",
"{ repo }/releases/download/v{ version }/{ name }-v{ version }-{ target }{ binary-ext }",
"https://github.com/watchexec/cargo-watch/releases/download/v9.0.0/cargo-watch-v9.0.0-aarch64-pc-windows-msvc.exe"
);
}
}

View file

@ -0,0 +1,117 @@
use itertools::Itertools;
use leon::{Item, Template};
use leon_macros::template;
use url::Url;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum RepositoryHost {
GitHub,
GitLab,
BitBucket,
SourceForge,
Unknown,
}
/// Make sure to update possible_dirs in `bins::infer_bin_dir_template`
/// if you modified FULL_FILENAMES or NOVERSION_FILENAMES.
pub const FULL_FILENAMES: &[Template<'_>] = &[
template!("/{ name }-{ target }-v{ version }{ archive-suffix }"),
template!("/{ name }-{ target }-{ version }{ archive-suffix }"),
template!("/{ name }-{ version }-{ target }{ archive-suffix }"),
template!("/{ name }-v{ version }-{ target }{ archive-suffix }"),
template!("/{ name }_{ target }_v{ version }{ archive-suffix }"),
template!("/{ name }_{ target }_{ version }{ archive-suffix }"),
template!("/{ name }_{ version }_{ target }{ archive-suffix }"),
template!("/{ name }_v{ version }_{ target }{ archive-suffix }"),
];
pub const NOVERSION_FILENAMES: &[Template<'_>] = &[
template!("/{ name }-{ target }{ archive-suffix }"),
template!("/{ name }_{ target }{ archive-suffix }"),
];
const GITHUB_RELEASE_PATHS: &[Template<'_>] = &[
template!("{ repo }/releases/download/{ version }"),
template!("{ repo }/releases/download/v{ version }"),
// %2F is escaped form of '/'
template!("{ repo }/releases/download/{ subcrate }%2F{ version }"),
template!("{ repo }/releases/download/{ subcrate }%2Fv{ version }"),
];
const GITLAB_RELEASE_PATHS: &[Template<'_>] = &[
template!("{ repo }/-/releases/{ version }/downloads/binaries"),
template!("{ repo }/-/releases/v{ version }/downloads/binaries"),
// %2F is escaped form of '/'
template!("{ repo }/-/releases/{ subcrate }%2F{ version }/downloads/binaries"),
template!("{ repo }/-/releases/{ subcrate }%2Fv{ version }/downloads/binaries"),
];
const BITBUCKET_RELEASE_PATHS: &[Template<'_>] = &[template!("{ repo }/downloads")];
const SOURCEFORGE_RELEASE_PATHS: &[Template<'_>] = &[
template!("{ repo }/files/binaries/{ version }"),
template!("{ repo }/files/binaries/v{ version }"),
// %2F is escaped form of '/'
template!("{ repo }/files/binaries/{ subcrate }%2F{ version }"),
template!("{ repo }/files/binaries/{ subcrate }%2Fv{ version }"),
];
impl RepositoryHost {
pub fn guess_git_hosting_services(repo: &Url) -> Self {
use RepositoryHost::*;
match repo.domain() {
Some(domain) if domain.starts_with("github") => GitHub,
Some(domain) if domain.starts_with("gitlab") => GitLab,
Some(domain) if domain == "bitbucket.org" => BitBucket,
Some(domain) if domain == "sourceforge.net" => SourceForge,
_ => Unknown,
}
}
pub fn get_default_pkg_url_template(
self,
) -> Option<impl Iterator<Item = Template<'static>> + Clone + 'static> {
use RepositoryHost::*;
match self {
GitHub => Some(apply_filenames_to_paths(
GITHUB_RELEASE_PATHS,
&[FULL_FILENAMES, NOVERSION_FILENAMES],
"",
)),
GitLab => Some(apply_filenames_to_paths(
GITLAB_RELEASE_PATHS,
&[FULL_FILENAMES, NOVERSION_FILENAMES],
"",
)),
BitBucket => Some(apply_filenames_to_paths(
BITBUCKET_RELEASE_PATHS,
&[FULL_FILENAMES],
"",
)),
SourceForge => Some(apply_filenames_to_paths(
SOURCEFORGE_RELEASE_PATHS,
&[FULL_FILENAMES, NOVERSION_FILENAMES],
"/download",
)),
Unknown => None,
}
}
}
fn apply_filenames_to_paths(
paths: &'static [Template<'static>],
filenames: &'static [&'static [Template<'static>]],
suffix: &'static str,
) -> impl Iterator<Item = Template<'static>> + Clone + 'static {
filenames
.iter()
.flat_map(|fs| fs.iter())
.cartesian_product(paths.iter())
.map(move |(filename, path)| {
let mut template = path.clone() + filename;
template += Item::Text(suffix);
template
})
}

View file

@ -0,0 +1,324 @@
use std::{path::Path, sync::Arc};
use binstalk_downloader::{
download::DownloadError, gh_api_client::GhApiError, remote::Error as RemoteError,
};
use thiserror::Error as ThisError;
use tokio::sync::OnceCell;
pub use url::ParseError as UrlParseError;
mod gh_crate_meta;
pub use gh_crate_meta::*;
mod quickinstall;
pub use quickinstall::*;
mod common;
use common::*;
mod futures_resolver;
use gh_crate_meta::hosting::RepositoryHost;
#[derive(Debug, ThisError)]
#[error("Invalid pkg-url {pkg_url} for {crate_name}@{version} on {target}: {reason}")]
pub struct InvalidPkgFmtError {
pub crate_name: CompactString,
pub version: CompactString,
pub target: CompactString,
pub pkg_url: Box<str>,
pub reason: &'static &'static str,
}
#[derive(Debug, ThisError, miette::Diagnostic)]
#[non_exhaustive]
#[cfg_attr(feature = "miette", derive(miette::Diagnostic))]
pub enum FetchError {
#[error(transparent)]
Download(#[from] DownloadError),
#[error("Failed to parse template: {0}")]
#[diagnostic(transparent)]
TemplateParse(#[from] leon::ParseError),
#[error("Failed to render template: {0}")]
#[diagnostic(transparent)]
TemplateRender(#[from] leon::RenderError),
#[error("Failed to render template: {0}")]
GhApi(#[from] GhApiError),
#[error(transparent)]
InvalidPkgFmt(Box<InvalidPkgFmtError>),
#[error("Failed to parse url: {0}")]
UrlParse(#[from] UrlParseError),
}
impl From<RemoteError> for FetchError {
fn from(e: RemoteError) -> Self {
DownloadError::from(e).into()
}
}
impl From<InvalidPkgFmtError> for FetchError {
fn from(e: InvalidPkgFmtError) -> Self {
Self::InvalidPkgFmt(Box::new(e))
}
}
#[async_trait::async_trait]
pub trait Fetcher: Send + Sync {
/// Create a new fetcher from some data
#[allow(clippy::new_ret_no_self)]
fn new(
client: Client,
gh_api_client: GhApiClient,
data: Arc<Data>,
target_data: Arc<TargetDataErased>,
) -> Arc<dyn Fetcher>
where
Self: Sized;
/// Fetch a package and extract
async fn fetch_and_extract(&self, dst: &Path) -> Result<ExtractedFiles, FetchError>;
/// Find the package, if it is available for download
///
/// This may look for multiple remote targets, but must write (using some form of interior
/// mutability) the best one to the implementing struct in some way so `fetch_and_extract` can
/// proceed without additional work.
///
/// Must return `true` if a package is available, `false` if none is, and reserve errors to
/// fatal conditions only.
fn find(self: Arc<Self>) -> JoinHandle<Result<bool, FetchError>>;
/// Report to upstream that cargo-binstall tries to use this fetcher.
/// Currently it is only overriden by [`quickinstall::QuickInstall`].
fn report_to_upstream(self: Arc<Self>) {}
/// Return the package format
fn pkg_fmt(&self) -> PkgFmt;
/// Return finalized target meta.
fn target_meta(&self) -> PkgMeta;
/// A short human-readable name or descriptor for the package source
fn source_name(&self) -> CompactString;
/// A short human-readable name, must contains only characters
/// and numbers and it also must be unique.
///
/// It is used to create a temporary dir where it is used for
/// [`Fetcher::fetch_and_extract`].
fn fetcher_name(&self) -> &'static str;
/// Should return true if the remote is from a third-party source
fn is_third_party(&self) -> bool;
/// Return the target for this fetcher
fn target(&self) -> &str;
fn target_data(&self) -> &Arc<TargetDataErased>;
}
#[derive(Clone, Debug)]
struct RepoInfo {
repo: Url,
repository_host: RepositoryHost,
subcrate: Option<CompactString>,
}
/// Data required to fetch a package
#[derive(Clone, Debug)]
pub struct Data {
name: CompactString,
version: CompactString,
repo: Option<String>,
repo_info: OnceCell<Option<RepoInfo>>,
}
impl Data {
pub fn new(name: CompactString, version: CompactString, repo: Option<String>) -> Self {
Self {
name,
version,
repo,
repo_info: OnceCell::new(),
}
}
#[instrument(level = "debug")]
async fn get_repo_info(&self, client: &Client) -> Result<&Option<RepoInfo>, FetchError> {
self.repo_info
.get_or_try_init(move || {
Box::pin(async move {
if let Some(repo) = self.repo.as_deref() {
let mut repo = client.get_redirected_final_url(Url::parse(repo)?).await?;
let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
let repo_info = RepoInfo {
subcrate: RepoInfo::detect_subcrate(&mut repo, repository_host),
repo,
repository_host,
};
debug!("Resolved repo_info = {repo_info:#?}");
Ok(Some(repo_info))
} else {
Ok(None)
}
})
})
.await
}
}
impl RepoInfo {
/// If `repo` contains a subcrate, then extracts and returns it.
/// It will also remove that subcrate path from `repo` to match
/// `scheme:/{repo_owner}/{repo_name}`
fn detect_subcrate(repo: &mut Url, repository_host: RepositoryHost) -> Option<CompactString> {
match repository_host {
RepositoryHost::GitHub => Self::detect_subcrate_common(repo, &["tree"]),
RepositoryHost::GitLab => Self::detect_subcrate_common(repo, &["-", "blob"]),
_ => None,
}
}
fn detect_subcrate_common(repo: &mut Url, seps: &[&str]) -> Option<CompactString> {
let mut path_segments = repo.path_segments()?;
let _repo_owner = path_segments.next()?;
let _repo_name = path_segments.next()?;
// Skip separators
for sep in seps.iter().copied() {
if path_segments.next()? != sep {
return None;
}
}
// Skip branch name
let _branch_name = path_segments.next()?;
let (subcrate, is_crate_present) = match path_segments.next()? {
// subcrate url is of path /crates/$subcrate_name, e.g. wasm-bindgen-cli
"crates" => (path_segments.next()?, true),
// subcrate url is of path $subcrate_name, e.g. cargo-audit
subcrate => (subcrate, false),
};
if path_segments.next().is_some() {
// A subcrate url should not contain anything more.
None
} else {
let subcrate = subcrate.into();
// Pop subcrate path to match regular repo style:
//
// scheme:/{addr}/{repo_owner}/{repo_name}
//
// path_segments() succeeds, so path_segments_mut()
// must also succeeds.
let mut paths = repo.path_segments_mut().unwrap();
paths.pop(); // pop subcrate
if is_crate_present {
paths.pop(); // pop crate
}
paths.pop(); // pop branch name
seps.iter().for_each(|_| {
paths.pop();
}); // pop separators
Some(subcrate)
}
}
}
/// Target specific data required to fetch a package
#[derive(Clone, Debug)]
pub struct TargetData<T: leon::Values + ?Sized> {
pub target: String,
pub meta: PkgMeta,
/// More target related info, it's recommend to provide the following keys:
/// - target_family,
/// - target_arch
/// - target_libc
/// - target_vendor
pub target_related_info: T,
}
pub type TargetDataErased = TargetData<dyn leon::Values + Send + Sync + 'static>;
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_detect_subcrate_github() {
// cargo-audit
let urls = [
"https://github.com/RustSec/rustsec/tree/main/cargo-audit",
"https://github.com/RustSec/rustsec/tree/master/cargo-audit",
];
for url in urls {
let mut repo = Url::parse(url).unwrap();
let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
assert_eq!(repository_host, RepositoryHost::GitHub);
let subcrate_prefix = RepoInfo::detect_subcrate(&mut repo, repository_host).unwrap();
assert_eq!(subcrate_prefix, "cargo-audit");
assert_eq!(
repo,
Url::parse("https://github.com/RustSec/rustsec").unwrap()
);
}
// wasm-bindgen-cli
let urls = [
"https://github.com/rustwasm/wasm-bindgen/tree/main/crates/cli",
"https://github.com/rustwasm/wasm-bindgen/tree/master/crates/cli",
];
for url in urls {
let mut repo = Url::parse(url).unwrap();
let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
assert_eq!(repository_host, RepositoryHost::GitHub);
let subcrate_prefix = RepoInfo::detect_subcrate(&mut repo, repository_host).unwrap();
assert_eq!(subcrate_prefix, "cli");
assert_eq!(
repo,
Url::parse("https://github.com/rustwasm/wasm-bindgen").unwrap()
);
}
}
#[test]
fn test_detect_subcrate_gitlab() {
let urls = [
"https://gitlab.kitware.com/NobodyXu/hello/-/blob/main/cargo-binstall",
"https://gitlab.kitware.com/NobodyXu/hello/-/blob/master/cargo-binstall",
];
for url in urls {
let mut repo = Url::parse(url).unwrap();
let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
assert_eq!(repository_host, RepositoryHost::GitLab);
let subcrate_prefix = RepoInfo::detect_subcrate(&mut repo, repository_host).unwrap();
assert_eq!(subcrate_prefix, "cargo-binstall");
assert_eq!(
repo,
Url::parse("https://gitlab.kitware.com/NobodyXu/hello").unwrap()
);
}
}
}

View file

@ -0,0 +1,242 @@
use std::{path::Path, sync::Arc};
use binstalk_types::cargo_toml_binstall::{PkgFmt, PkgMeta};
use tokio::sync::OnceCell;
use url::Url;
use crate::{common::*, Data, FetchError, TargetDataErased};
const BASE_URL: &str = "https://github.com/cargo-bins/cargo-quickinstall/releases/download";
const STATS_URL: &str = "https://warehouse-clerk-tmp.vercel.app/api/crate";
const QUICKINSTALL_SUPPORTED_TARGETS_URL: &str =
"https://raw.githubusercontent.com/cargo-bins/cargo-quickinstall/main/supported-targets";
fn is_universal_macos(target: &str) -> bool {
["universal-apple-darwin", "universal2-apple-darwin"].contains(&target)
}
async fn get_quickinstall_supported_targets(
client: &Client,
) -> Result<&'static [CompactString], FetchError> {
static SUPPORTED_TARGETS: OnceCell<Box<[CompactString]>> = OnceCell::const_new();
SUPPORTED_TARGETS
.get_or_try_init(|| async {
let bytes = client
.get(Url::parse(QUICKINSTALL_SUPPORTED_TARGETS_URL)?)
.send(true)
.await?
.bytes()
.await?;
let mut v: Vec<CompactString> = String::from_utf8_lossy(&bytes)
.split_whitespace()
.map(CompactString::new)
.collect();
v.sort_unstable();
v.dedup();
Ok(v.into())
})
.await
.map(Box::as_ref)
}
pub struct QuickInstall {
client: Client,
gh_api_client: GhApiClient,
is_supported_v: OnceCell<bool>,
package: String,
package_url: Url,
stats_url: Url,
target_data: Arc<TargetDataErased>,
}
impl QuickInstall {
async fn is_supported(&self) -> Result<bool, FetchError> {
self.is_supported_v
.get_or_try_init(|| async {
Ok(get_quickinstall_supported_targets(&self.client)
.await?
.binary_search(&CompactString::new(&self.target_data.target))
.is_ok())
})
.await
.copied()
}
}
#[async_trait::async_trait]
impl super::Fetcher for QuickInstall {
fn new(
client: Client,
gh_api_client: GhApiClient,
data: Arc<Data>,
target_data: Arc<TargetDataErased>,
) -> Arc<dyn super::Fetcher> {
let crate_name = &data.name;
let version = &data.version;
let target = &target_data.target;
let package = format!("{crate_name}-{version}-{target}");
Arc::new(Self {
client,
gh_api_client,
is_supported_v: OnceCell::new(),
package_url: Url::parse(&format!(
"{BASE_URL}/{crate_name}-{version}/{package}.tar.gz",
))
.expect("package_url is pre-generated and should never be invalid url"),
stats_url: Url::parse(&format!("{STATS_URL}/{package}.tar.gz",))
.expect("stats_url is pre-generated and should never be invalid url"),
package,
target_data,
})
}
fn find(self: Arc<Self>) -> JoinHandle<Result<bool, FetchError>> {
tokio::spawn(async move {
if !self.is_supported().await? {
return Ok(false);
}
does_url_exist(
self.client.clone(),
self.gh_api_client.clone(),
&self.package_url,
)
.await
})
}
fn report_to_upstream(self: Arc<Self>) {
if cfg!(debug_assertions) {
debug!("Not sending quickinstall report in debug mode");
} else if is_universal_macos(&self.target_data.target) {
debug!(
r#"Not sending quickinstall report for universal-apple-darwin
and universal2-apple-darwin.
Quickinstall does not support these targets, it only supports targets supported
by rust officially."#,
);
} else if self.is_supported_v.get().copied() != Some(false) {
tokio::spawn(async move {
if let Err(err) = self.report().await {
warn!(
"Failed to send quickinstall report for package {}: {err}",
self.package
)
}
});
}
}
async fn fetch_and_extract(&self, dst: &Path) -> Result<ExtractedFiles, FetchError> {
let url = &self.package_url;
debug!("Downloading package from: '{url}'");
Ok(Download::new(self.client.clone(), url.clone())
.and_extract(self.pkg_fmt(), dst)
.await?)
}
fn pkg_fmt(&self) -> PkgFmt {
PkgFmt::Tgz
}
fn target_meta(&self) -> PkgMeta {
let mut meta = self.target_data.meta.clone();
meta.pkg_fmt = Some(self.pkg_fmt());
meta.bin_dir = Some("{ bin }{ binary-ext }".to_string());
meta
}
fn source_name(&self) -> CompactString {
CompactString::from("QuickInstall")
}
fn fetcher_name(&self) -> &'static str {
"QuickInstall"
}
fn is_third_party(&self) -> bool {
true
}
fn target(&self) -> &str {
&self.target_data.target
}
fn target_data(&self) -> &Arc<TargetDataErased> {
&self.target_data
}
}
impl QuickInstall {
pub async fn report(&self) -> Result<(), FetchError> {
if !self.is_supported().await? {
debug!(
"Not sending quickinstall report for {} since Quickinstall does not support these targets.",
self.target_data.target
);
return Ok(());
}
let url = self.stats_url.clone();
debug!("Sending installation report to quickinstall ({url})");
self.client.request(Method::HEAD, url).send(true).await?;
Ok(())
}
}
#[cfg(test)]
mod test {
use super::{get_quickinstall_supported_targets, Client, CompactString};
use std::num::NonZeroU16;
/// Mark this as an async fn so that you won't accidentally use it in
/// sync context.
async fn create_client() -> Client {
Client::new(
concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")),
None,
NonZeroU16::new(10).unwrap(),
1.try_into().unwrap(),
[],
)
.unwrap()
}
#[tokio::test]
async fn test_get_quickinstall_supported_targets() {
let supported_targets = get_quickinstall_supported_targets(&create_client().await)
.await
.unwrap();
[
"x86_64-pc-windows-msvc",
"x86_64-apple-darwin",
"aarch64-apple-darwin",
"x86_64-unknown-linux-gnu",
"x86_64-unknown-linux-musl",
"aarch64-unknown-linux-gnu",
"aarch64-unknown-linux-musl",
"aarch64-pc-windows-msvc",
"armv7-unknown-linux-musleabihf",
"armv7-unknown-linux-gnueabihf",
]
.into_iter()
.for_each(|known_supported_target| {
supported_targets
.binary_search(&CompactString::new(known_supported_target))
.unwrap();
});
}
}