Support subcrate in repo (#991)

Fixed #838

 - Add new key `subcrate` for rendering `pkg-url`
 - Add new release paths in GitHub, GitLab & SourceForge using key `subcrate` for auto-detection
 - Add subcrate detection for GitHub and GitLab
 - Add `debug!` when using gh api token in `GhApiClient::new`
 - Add subcrate testing to `e2e-tests/subcrate.sh`
 - Bump cargo-release to 0.24.9 in e2e-tests/live.sh
   to fix test failure on MacOS without libssl installed in `/usr/local/`.
 - Optimize GhCrateMeta: Detect subcrate and repo-host in `Data::get_repo_info`
    to cache the result and avoid duplicate works, this also makes the code
    more ergonomic by removing the need to some `unwrap()` plus making it
    more efficient since we don't need to clone the url just to modify it.
 - Add instrument to `Data::get_repo_info`
 - Fix `shellcheck` err in `e2e-tests/*.sh`

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
This commit is contained in:
Jiahao XU 2023-04-24 19:41:20 +10:00 committed by GitHub
parent 5e269193c0
commit 0261d12d9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 234 additions and 40 deletions

View file

@ -7,7 +7,7 @@ use std::{
use compact_str::{CompactString, ToCompactString};
use tokio::sync::OnceCell;
use tracing::warn;
use tracing::{debug, warn};
use crate::remote;
@ -106,6 +106,7 @@ impl GhApiClient {
pub fn new(client: remote::Client, auth_token: Option<CompactString>) -> Self {
let auth_token = auth_token.and_then(|auth_token| {
if gh_prefixed(&auth_token) {
debug!("Using gh api token");
Some(auth_token)
} else {
warn!("Invalid auth_token, expected 'gh*_' or `github_*`, fallback to unauthorized mode");

View file

@ -4,6 +4,7 @@ use compact_str::CompactString;
pub use gh_crate_meta::*;
pub use quickinstall::*;
use tokio::sync::OnceCell;
use tracing::{debug, instrument};
use url::Url;
use crate::{
@ -18,6 +19,8 @@ use crate::{
pub(crate) mod gh_crate_meta;
pub(crate) mod quickinstall;
use gh_crate_meta::hosting::RepositoryHost;
#[async_trait::async_trait]
pub trait Fetcher: Send + Sync {
/// Create a new fetcher from some data
@ -71,13 +74,20 @@ pub trait Fetcher: Send + Sync {
fn target(&self) -> &str;
}
#[derive(Clone, Debug)]
struct RepoInfo {
repo: Url,
repository_host: RepositoryHost,
subcrate: Option<String>,
}
/// Data required to fetch a package
#[derive(Clone, Debug)]
pub struct Data {
name: CompactString,
version: CompactString,
repo: Option<String>,
repo_final_url: OnceCell<Option<Url>>,
repo_info: OnceCell<Option<RepoInfo>>,
}
impl Data {
@ -86,18 +96,28 @@ impl Data {
name,
version,
repo,
repo_final_url: OnceCell::new(),
repo_info: OnceCell::new(),
}
}
async fn resolve_final_repo_url(&self, client: &Client) -> Result<&Option<Url>, BinstallError> {
self.repo_final_url
#[instrument(level = "debug")]
async fn get_repo_info(&self, client: &Client) -> Result<&Option<RepoInfo>, BinstallError> {
self.repo_info
.get_or_try_init(move || {
Box::pin(async move {
if let Some(repo) = self.repo.as_deref() {
Ok(Some(
client.get_redirected_final_url(Url::parse(repo)?).await?,
))
let mut repo = client.get_redirected_final_url(Url::parse(repo)?).await?;
let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
let repo_info = RepoInfo {
subcrate: RepoInfo::detect_subcrate(&mut repo, repository_host),
repo,
repository_host,
};
debug!("Resolved repo_info = {repo_info:#?}");
Ok(Some(repo_info))
} else {
Ok(None)
}
@ -107,9 +127,113 @@ impl Data {
}
}
impl RepoInfo {
/// If `repo` contains a subcrate, then extracts and returns it.
/// It will also remove that subcrate path from `repo` to match
/// `scheme:/{repo_owner}/{repo_name}`
fn detect_subcrate(repo: &mut Url, repository_host: RepositoryHost) -> Option<String> {
match repository_host {
RepositoryHost::GitHub => Self::detect_subcrate_common(repo, &["tree"]),
RepositoryHost::GitLab => Self::detect_subcrate_common(repo, &["-", "blob"]),
_ => None,
}
}
fn detect_subcrate_common(repo: &mut Url, seps: &[&str]) -> Option<String> {
let mut path_segments = repo.path_segments()?;
let _repo_owner = path_segments.next()?;
let _repo_name = path_segments.next()?;
// Skip separators
for sep in seps.iter().copied() {
if path_segments.next()? != sep {
return None;
}
}
// Skip branch name
let _branch_name = path_segments.next()?;
let subcrate = path_segments.next()?;
if path_segments.next().is_some() {
// A subcrate url should not contain anything more.
None
} else {
let subcrate = subcrate.to_string();
// Pop subcrate path to match regular repo style:
//
// scheme:/{addr}/{repo_owner}/{repo_name}
//
// path_segments() succeeds, so path_segments_mut()
// must also succeeds.
let mut paths = repo.path_segments_mut().unwrap();
paths.pop(); // pop subcrate
paths.pop(); // pop branch name
seps.iter().for_each(|_| {
paths.pop();
}); // pop separators
Some(subcrate)
}
}
}
/// Target specific data required to fetch a package
#[derive(Clone, Debug)]
pub struct TargetData {
pub target: String,
pub meta: PkgMeta,
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_detect_subcrate_github() {
let urls = [
"https://github.com/RustSec/rustsec/tree/main/cargo-audit",
"https://github.com/RustSec/rustsec/tree/master/cargo-audit",
];
for url in urls {
let mut repo = Url::parse(url).unwrap();
let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
assert_eq!(repository_host, RepositoryHost::GitHub);
let subcrate_prefix = RepoInfo::detect_subcrate(&mut repo, repository_host).unwrap();
assert_eq!(subcrate_prefix, "cargo-audit");
assert_eq!(
repo,
Url::parse("https://github.com/RustSec/rustsec").unwrap()
);
}
}
#[test]
fn test_detect_subcrate_gitlab() {
let urls = [
"https://gitlab.kitware.com/NobodyXu/hello/-/blob/main/cargo-binstall",
"https://gitlab.kitware.com/NobodyXu/hello/-/blob/master/cargo-binstall",
];
for url in urls {
let mut repo = Url::parse(url).unwrap();
let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
assert_eq!(repository_host, RepositoryHost::GitLab);
let subcrate_prefix = RepoInfo::detect_subcrate(&mut repo, repository_host).unwrap();
assert_eq!(subcrate_prefix, "cargo-binstall");
assert_eq!(
repo,
Url::parse("https://gitlab.kitware.com/NobodyXu/hello").unwrap()
);
}
}
}

View file

@ -20,10 +20,9 @@ use crate::{
manifests::cargo_toml_binstall::{PkgFmt, PkgMeta},
};
use super::{Data, TargetData};
use super::{Data, RepoInfo, TargetData};
pub(crate) mod hosting;
use hosting::RepositoryHost;
pub struct GhCrateMeta {
client: Client,
@ -40,9 +39,16 @@ impl GhCrateMeta {
pkg_fmt: PkgFmt,
pkg_url: &Template<'_>,
repo: Option<&str>,
subcrate: Option<&str>,
) {
let render_url = |ext| {
let ctx = Context::from_data_with_repo(&self.data, &self.target_data.target, ext, repo);
let ctx = Context::from_data_with_repo(
&self.data,
&self.target_data.target,
ext,
repo,
subcrate,
);
match ctx.render_url_with_compiled_tt(pkg_url) {
Ok(url) => Some(url),
Err(err) => {
@ -99,7 +105,10 @@ impl super::Fetcher for GhCrateMeta {
fn find(self: Arc<Self>) -> AutoAbortJoinHandle<Result<bool, BinstallError>> {
AutoAbortJoinHandle::spawn(async move {
let repo = self.data.resolve_final_repo_url(&self.client).await?;
let info = self.data.get_repo_info(&self.client).await?.as_ref();
let repo = info.map(|info| &info.repo);
let subcrate = info.and_then(|info| info.subcrate.as_deref());
let mut pkg_fmt = self.target_data.meta.pkg_fmt;
@ -143,11 +152,23 @@ impl super::Fetcher for GhCrateMeta {
}
Either::Left(iter::once(template))
} else if let Some(repo) = repo.as_ref() {
if let Some(pkg_urls) =
RepositoryHost::guess_git_hosting_services(repo)?.get_default_pkg_url_template()
{
Either::Right(pkg_urls.map(Template::cast))
} else if let Some(RepoInfo {
repo,
repository_host,
..
}) = info
{
if let Some(pkg_urls) = repository_host.get_default_pkg_url_template() {
let has_subcrate = subcrate.is_some();
Either::Right(
pkg_urls
.map(Template::cast)
// If subcrate is Some, then all templates will be included.
// Otherwise, only templates without key "subcrate" will be
// included.
.filter(move |template| has_subcrate || !template.has_key("subcrate")),
)
} else {
warn!(
concat!(
@ -172,7 +193,7 @@ impl super::Fetcher for GhCrateMeta {
};
// Convert Option<Url> to Option<String> to reduce size of future.
let repo = repo.as_ref().map(|u| u.as_str().trim_end_matches('/'));
let repo = repo.map(|u| u.as_str().trim_end_matches('/'));
// Use reference to self to fix error of closure
// launch_baseline_find_tasks which moves `this`
@ -193,7 +214,7 @@ impl super::Fetcher for GhCrateMeta {
// basically cartesian product.
// |
for pkg_fmt in pkg_fmts.clone() {
this.launch_baseline_find_tasks(&resolver, pkg_fmt, &pkg_url, repo);
this.launch_baseline_find_tasks(&resolver, pkg_fmt, &pkg_url, repo, subcrate);
}
}
@ -271,6 +292,9 @@ struct Context<'c> {
/// Filename extension on the binary, i.e. .exe on Windows, nothing otherwise
pub binary_ext: &'c str,
/// Workspace of the crate inside the repository.
pub subcrate: Option<&'c str>,
}
impl leon::Values for Context<'_> {
@ -290,6 +314,8 @@ impl leon::Values for Context<'_> {
"binary-ext" => Some(Cow::Borrowed(self.binary_ext)),
"subcrate" => self.subcrate.map(Cow::Borrowed),
_ => None,
}
}
@ -301,6 +327,7 @@ impl<'c> Context<'c> {
target: &'c str,
archive_suffix: Option<&'c str>,
repo: Option<&'c str>,
subcrate: Option<&'c str>,
) -> Self {
let archive_format = archive_suffix.map(|archive_suffix| {
if archive_suffix.is_empty() {
@ -325,12 +352,19 @@ impl<'c> Context<'c> {
} else {
""
},
subcrate,
}
}
#[cfg(test)]
pub(self) fn from_data(data: &'c Data, target: &'c str, archive_format: &'c str) -> Self {
Self::from_data_with_repo(data, target, Some(archive_format), data.repo.as_deref())
Self::from_data_with_repo(
data,
target,
Some(archive_format),
data.repo.as_deref(),
None,
)
}
/// * `tt` - must have added a template named "pkg_url".

View file

@ -3,9 +3,7 @@ use leon::{Item, Template};
use leon_macros::template;
use url::Url;
use crate::errors::BinstallError;
#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum RepositoryHost {
GitHub,
GitLab,
@ -35,11 +33,17 @@ pub const NOVERSION_FILENAMES: &[Template<'_>] = &[
const GITHUB_RELEASE_PATHS: &[Template<'_>] = &[
template!("{ repo }/releases/download/{ version }"),
template!("{ repo }/releases/download/v{ version }"),
// %2F is escaped form of '/'
template!("{ repo }/releases/download/{ subcrate }%2F{ version }"),
template!("{ repo }/releases/download/{ subcrate }%2Fv{ version }"),
];
const GITLAB_RELEASE_PATHS: &[Template<'_>] = &[
template!("{ repo }/-/releases/{ version }/downloads/binaries"),
template!("{ repo }/-/releases/v{ version }/downloads/binaries"),
// %2F is escaped form of '/'
template!("{ repo }/-/releases/{ subcrate }%2F{ version }/downloads/binaries"),
template!("{ repo }/-/releases/{ subcrate }%2Fv{ version }/downloads/binaries"),
];
const BITBUCKET_RELEASE_PATHS: &[Template<'_>] = &[template!("{ repo }/downloads")];
@ -47,18 +51,21 @@ const BITBUCKET_RELEASE_PATHS: &[Template<'_>] = &[template!("{ repo }/downloads
const SOURCEFORGE_RELEASE_PATHS: &[Template<'_>] = &[
template!("{ repo }/files/binaries/{ version }"),
template!("{ repo }/files/binaries/v{ version }"),
// %2F is escaped form of '/'
template!("{ repo }/files/binaries/{ subcrate }%2F{ version }"),
template!("{ repo }/files/binaries/{ subcrate }%2Fv{ version }"),
];
impl RepositoryHost {
pub fn guess_git_hosting_services(repo: &Url) -> Result<Self, BinstallError> {
pub fn guess_git_hosting_services(repo: &Url) -> Self {
use RepositoryHost::*;
match repo.domain() {
Some(domain) if domain.starts_with("github") => Ok(GitHub),
Some(domain) if domain.starts_with("gitlab") => Ok(GitLab),
Some(domain) if domain == "bitbucket.org" => Ok(BitBucket),
Some(domain) if domain == "sourceforge.net" => Ok(SourceForge),
_ => Ok(Unknown),
Some(domain) if domain.starts_with("github") => GitHub,
Some(domain) if domain.starts_with("gitlab") => GitLab,
Some(domain) if domain == "bitbucket.org" => BitBucket,
Some(domain) if domain == "sourceforge.net" => SourceForge,
_ => Unknown,
}
}