From 5acfda9379c43c9b8fda32f36fb8baed79ad5ef1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 17 Jul 2023 17:46:12 +0200 Subject: [PATCH] avoid worktree checkouts (#1207) * perform a bare git cloen to avoid worktree checkouts This will be way faster on windows * feat: use the git repository directly to obtain crate information * Apply code review changes Signed-off-by: Jiahao XU * Impl `fmt::Display` for `helpers::git::GitUrl` and also refactor implementation of `git::Repository::{shallow_clone, shallow_clone_bare}`. Signed-off-by: Jiahao XU * Fix clippy lint warnings Signed-off-by: Jiahao XU * Fix typo Signed-off-by: Jiahao XU --------- Signed-off-by: Jiahao XU Co-authored-by: Jiahao XU --- .../src/drivers/registry/git_registry.rs | 58 ++++---- crates/binstalk/src/helpers/git.rs | 130 ++++++++++++++---- 2 files changed, 137 insertions(+), 51 deletions(-) diff --git a/crates/binstalk/src/drivers/registry/git_registry.rs b/crates/binstalk/src/drivers/registry/git_registry.rs index 3922bbd0..f551f5b8 100644 --- a/crates/binstalk/src/drivers/registry/git_registry.rs +++ b/crates/binstalk/src/drivers/registry/git_registry.rs @@ -1,9 +1,4 @@ -use std::{ - fs::File, - io::{self, BufReader, Read}, - path::PathBuf, - sync::Arc, -}; +use std::{io, path::PathBuf, sync::Arc}; use cargo_toml::Manifest; use compact_str::{CompactString, ToCompactString}; @@ -29,7 +24,8 @@ use crate::{ #[derive(Debug)] struct GitIndex { - path: TempDir, + _tempdir: TempDir, + repo: Repository, dl_template: CompactString, } @@ -37,15 +33,24 @@ impl GitIndex { fn new(url: GitUrl) -> Result { let tempdir = TempDir::new()?; - Repository::shallow_clone(url, tempdir.as_ref())?; + let repo = Repository::shallow_clone_bare(url.clone(), tempdir.as_ref())?; - let mut v = Vec::with_capacity(100); - File::open(tempdir.as_ref().join("config.json"))?.read_to_end(&mut v)?; + let config: RegistryConfig = { + let config = repo + .get_head_commit_entry_data_by_path("config.json")? + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::NotFound, + format!("config.json not found in repository `{url}`"), + ) + })?; - let config: RegistryConfig = json_from_slice(&v).map_err(RegistryError::from)?; + json_from_slice(&config).map_err(RegistryError::from)? + }; Ok(Self { - path: tempdir, + _tempdir: tempdir, + repo, dl_template: config.dl, }) } @@ -70,27 +75,24 @@ impl GitRegistry { /// WARNING: This is a blocking operation. fn find_crate_matched_ver( - mut path: PathBuf, + repo: &Repository, crate_name: &str, (c1, c2): &(CompactString, Option), version_req: &VersionReq, ) -> Result { + let mut path = PathBuf::with_capacity(128); path.push(&**c1); if let Some(c2) = c2 { path.push(&**c2); } path.push(&*crate_name.to_lowercase()); - - let f = File::open(path) - .map_err(|e| match e.kind() { - io::ErrorKind::NotFound => RegistryError::NotFound(crate_name.into()).into(), - _ => BinstallError::from(e), - }) - .map(BufReader::new)?; + let crate_versions = repo + .get_head_commit_entry_data_by_path(path)? + .ok_or_else(|| RegistryError::NotFound(crate_name.into()))?; MatchedVersion::find( - &mut JsonDeserializer::from_reader(f).into_iter(), + &mut JsonDeserializer::from_slice(&crate_versions).into_iter(), version_req, ) } @@ -107,17 +109,17 @@ impl GitRegistry { let this = self.clone(); let (version, dl_url) = spawn_blocking(move || { - let GitIndex { path, dl_template } = this + let GitIndex { + _tempdir: _, + repo, + dl_template, + } = this .0 .git_index .get_or_try_init(|| GitIndex::new(this.0.url.clone()))?; - let MatchedVersion { version, cksum } = Self::find_crate_matched_ver( - path.as_ref().to_owned(), - &crate_name, - &crate_prefix, - &version_req, - )?; + let MatchedVersion { version, cksum } = + Self::find_crate_matched_ver(repo, &crate_name, &crate_prefix, &version_req)?; let url = Url::parse(&render_dl_template( dl_template, diff --git a/crates/binstalk/src/helpers/git.rs b/crates/binstalk/src/helpers/git.rs index f4ebb85b..b84519a4 100644 --- a/crates/binstalk/src/helpers/git.rs +++ b/crates/binstalk/src/helpers/git.rs @@ -1,4 +1,4 @@ -use std::{num::NonZeroU32, path::Path, str::FromStr, sync::atomic::AtomicBool}; +use std::{fmt, mem, num::NonZeroU32, path::Path, str::FromStr, sync::atomic::AtomicBool}; use compact_str::CompactString; use gix::{clone, create, open, remote, Url}; @@ -21,6 +21,15 @@ pub enum GitError { #[error("Failed to checkout: {0}")] CheckOutError(#[source] Box), + + #[error("HEAD ref was corrupt in crates-io index repository clone")] + HeadCommit(#[source] Box), + + #[error("tree of head commit wasn't present in crates-io index repository clone")] + GetTreeOfCommit(#[source] Box), + + #[error("An object was missing in the crates-io index repository clone")] + ObjectLookup(#[source] Box), } impl From for GitError { @@ -41,9 +50,36 @@ impl From for GitError { } } +impl From for GitError { + fn from(e: gix::reference::head_commit::Error) -> Self { + Self::HeadCommit(Box::new(e)) + } +} + +impl From for GitError { + fn from(e: gix::object::commit::Error) -> Self { + Self::GetTreeOfCommit(Box::new(e)) + } +} + +impl From for GitError { + fn from(e: gix::object::find::existing::Error) -> Self { + Self::ObjectLookup(Box::new(e)) + } +} + #[derive(Clone, Debug)] pub struct GitUrl(Url); +impl fmt::Display for GitUrl { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let url_bstr = self.0.to_bstring(); + let url_str = String::from_utf8_lossy(&url_bstr); + + f.write_str(&url_str) + } +} + impl FromStr for GitUrl { type Err = GitUrlParseError; @@ -53,39 +89,87 @@ impl FromStr for GitUrl { } #[derive(Debug)] -pub struct Repository(gix::Repository); +pub struct Repository(gix::ThreadSafeRepository); impl Repository { + fn prepare_fetch( + url: GitUrl, + path: &Path, + kind: create::Kind, + ) -> Result { + Ok(clone::PrepareFetch::new( + url.0, + path, + kind, + create::Options { + destination_must_be_empty: true, + ..Default::default() + }, + open::Options::isolated(), + )? + .with_shallow(remote::fetch::Shallow::DepthAtRemote( + NonZeroU32::new(1).unwrap(), + ))) + } + + /// WARNING: This is a blocking operation, if you want to use it in + /// async context then you must wrap the call in [`tokio::task::spawn_blocking`]. + /// + /// WARNING: This function must be called after tokio runtime is initialized. + pub fn shallow_clone_bare(url: GitUrl, path: &Path) -> Result { + debug!("Shallow cloning {url} to {}", path.display()); + + Ok(Self( + Self::prepare_fetch(url, path, create::Kind::Bare)? + .fetch_only( + &mut TracingProgress::new(CompactString::new("Cloning")), + &AtomicBool::new(false), + )? + .0 + .into(), + )) + } + /// WARNING: This is a blocking operation, if you want to use it in /// async context then you must wrap the call in [`tokio::task::spawn_blocking`]. /// /// WARNING: This function must be called after tokio runtime is initialized. pub fn shallow_clone(url: GitUrl, path: &Path) -> Result { - let url_bstr = url.0.to_bstring(); - let url_str = String::from_utf8_lossy(&url_bstr); - - debug!("Shallow cloning {url_str} to {}", path.display()); + debug!("Shallow cloning {url} to {} with worktree", path.display()); let mut progress = TracingProgress::new(CompactString::new("Cloning")); Ok(Self( - clone::PrepareFetch::new( - url.0, - path, - create::Kind::WithWorktree, - create::Options { - destination_must_be_empty: true, - ..Default::default() - }, - open::Options::isolated(), - )? - .with_shallow(remote::fetch::Shallow::DepthAtRemote( - NonZeroU32::new(1).unwrap(), - )) - .fetch_then_checkout(&mut progress, &AtomicBool::new(false))? - .0 - .main_worktree(&mut progress, &AtomicBool::new(false))? - .0, + Self::prepare_fetch(url, path, create::Kind::WithWorktree)? + .fetch_then_checkout(&mut progress, &AtomicBool::new(false))? + .0 + .main_worktree(&mut progress, &AtomicBool::new(false))? + .0 + .into(), )) } + + #[inline(always)] + pub fn get_head_commit_entry_data_by_path( + &self, + path: impl AsRef, + ) -> Result>, GitError> { + fn inner(this: &Repository, path: &Path) -> Result>, GitError> { + Ok( + if let Some(entry) = this + .0 + .to_thread_local() + .head_commit()? + .tree()? + .lookup_entry_by_path(path)? + { + Some(mem::take(&mut entry.object()?.data)) + } else { + None + }, + ) + } + + inner(self, path.as_ref()) + } }