avoid worktree checkouts (#1207)

* perform a bare git cloen to avoid worktree checkouts

This will be way faster on windows

* feat: use the git repository directly to obtain crate information

* Apply code review changes

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>

* Impl `fmt::Display` for `helpers::git::GitUrl`

and also refactor implementation of `git::Repository::{shallow_clone,
shallow_clone_bare}`.

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>

* Fix clippy lint warnings

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>

* Fix typo

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>

---------

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
Co-authored-by: Jiahao XU <Jiahao_XU@outlook.com>
This commit is contained in:
Sebastian Thiel 2023-07-17 17:46:12 +02:00 committed by GitHub
parent b08849b7ce
commit 5acfda9379
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 137 additions and 51 deletions

View file

@ -1,9 +1,4 @@
use std::{
fs::File,
io::{self, BufReader, Read},
path::PathBuf,
sync::Arc,
};
use std::{io, path::PathBuf, sync::Arc};
use cargo_toml::Manifest;
use compact_str::{CompactString, ToCompactString};
@ -29,7 +24,8 @@ use crate::{
#[derive(Debug)]
struct GitIndex {
path: TempDir,
_tempdir: TempDir,
repo: Repository,
dl_template: CompactString,
}
@ -37,15 +33,24 @@ impl GitIndex {
fn new(url: GitUrl) -> Result<Self, BinstallError> {
let tempdir = TempDir::new()?;
Repository::shallow_clone(url, tempdir.as_ref())?;
let repo = Repository::shallow_clone_bare(url.clone(), tempdir.as_ref())?;
let mut v = Vec::with_capacity(100);
File::open(tempdir.as_ref().join("config.json"))?.read_to_end(&mut v)?;
let config: RegistryConfig = {
let config = repo
.get_head_commit_entry_data_by_path("config.json")?
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::NotFound,
format!("config.json not found in repository `{url}`"),
)
})?;
let config: RegistryConfig = json_from_slice(&v).map_err(RegistryError::from)?;
json_from_slice(&config).map_err(RegistryError::from)?
};
Ok(Self {
path: tempdir,
_tempdir: tempdir,
repo,
dl_template: config.dl,
})
}
@ -70,27 +75,24 @@ impl GitRegistry {
/// WARNING: This is a blocking operation.
fn find_crate_matched_ver(
mut path: PathBuf,
repo: &Repository,
crate_name: &str,
(c1, c2): &(CompactString, Option<CompactString>),
version_req: &VersionReq,
) -> Result<MatchedVersion, BinstallError> {
let mut path = PathBuf::with_capacity(128);
path.push(&**c1);
if let Some(c2) = c2 {
path.push(&**c2);
}
path.push(&*crate_name.to_lowercase());
let f = File::open(path)
.map_err(|e| match e.kind() {
io::ErrorKind::NotFound => RegistryError::NotFound(crate_name.into()).into(),
_ => BinstallError::from(e),
})
.map(BufReader::new)?;
let crate_versions = repo
.get_head_commit_entry_data_by_path(path)?
.ok_or_else(|| RegistryError::NotFound(crate_name.into()))?;
MatchedVersion::find(
&mut JsonDeserializer::from_reader(f).into_iter(),
&mut JsonDeserializer::from_slice(&crate_versions).into_iter(),
version_req,
)
}
@ -107,17 +109,17 @@ impl GitRegistry {
let this = self.clone();
let (version, dl_url) = spawn_blocking(move || {
let GitIndex { path, dl_template } = this
let GitIndex {
_tempdir: _,
repo,
dl_template,
} = this
.0
.git_index
.get_or_try_init(|| GitIndex::new(this.0.url.clone()))?;
let MatchedVersion { version, cksum } = Self::find_crate_matched_ver(
path.as_ref().to_owned(),
&crate_name,
&crate_prefix,
&version_req,
)?;
let MatchedVersion { version, cksum } =
Self::find_crate_matched_ver(repo, &crate_name, &crate_prefix, &version_req)?;
let url = Url::parse(&render_dl_template(
dl_template,

View file

@ -1,4 +1,4 @@
use std::{num::NonZeroU32, path::Path, str::FromStr, sync::atomic::AtomicBool};
use std::{fmt, mem, num::NonZeroU32, path::Path, str::FromStr, sync::atomic::AtomicBool};
use compact_str::CompactString;
use gix::{clone, create, open, remote, Url};
@ -21,6 +21,15 @@ pub enum GitError {
#[error("Failed to checkout: {0}")]
CheckOutError(#[source] Box<clone::checkout::main_worktree::Error>),
#[error("HEAD ref was corrupt in crates-io index repository clone")]
HeadCommit(#[source] Box<gix::reference::head_commit::Error>),
#[error("tree of head commit wasn't present in crates-io index repository clone")]
GetTreeOfCommit(#[source] Box<gix::object::commit::Error>),
#[error("An object was missing in the crates-io index repository clone")]
ObjectLookup(#[source] Box<gix::object::find::existing::Error>),
}
impl From<clone::Error> for GitError {
@ -41,9 +50,36 @@ impl From<clone::checkout::main_worktree::Error> for GitError {
}
}
impl From<gix::reference::head_commit::Error> for GitError {
fn from(e: gix::reference::head_commit::Error) -> Self {
Self::HeadCommit(Box::new(e))
}
}
impl From<gix::object::commit::Error> for GitError {
fn from(e: gix::object::commit::Error) -> Self {
Self::GetTreeOfCommit(Box::new(e))
}
}
impl From<gix::object::find::existing::Error> for GitError {
fn from(e: gix::object::find::existing::Error) -> Self {
Self::ObjectLookup(Box::new(e))
}
}
#[derive(Clone, Debug)]
pub struct GitUrl(Url);
impl fmt::Display for GitUrl {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let url_bstr = self.0.to_bstring();
let url_str = String::from_utf8_lossy(&url_bstr);
f.write_str(&url_str)
}
}
impl FromStr for GitUrl {
type Err = GitUrlParseError;
@ -53,39 +89,87 @@ impl FromStr for GitUrl {
}
#[derive(Debug)]
pub struct Repository(gix::Repository);
pub struct Repository(gix::ThreadSafeRepository);
impl Repository {
fn prepare_fetch(
url: GitUrl,
path: &Path,
kind: create::Kind,
) -> Result<clone::PrepareFetch, GitError> {
Ok(clone::PrepareFetch::new(
url.0,
path,
kind,
create::Options {
destination_must_be_empty: true,
..Default::default()
},
open::Options::isolated(),
)?
.with_shallow(remote::fetch::Shallow::DepthAtRemote(
NonZeroU32::new(1).unwrap(),
)))
}
/// WARNING: This is a blocking operation, if you want to use it in
/// async context then you must wrap the call in [`tokio::task::spawn_blocking`].
///
/// WARNING: This function must be called after tokio runtime is initialized.
pub fn shallow_clone_bare(url: GitUrl, path: &Path) -> Result<Self, GitError> {
debug!("Shallow cloning {url} to {}", path.display());
Ok(Self(
Self::prepare_fetch(url, path, create::Kind::Bare)?
.fetch_only(
&mut TracingProgress::new(CompactString::new("Cloning")),
&AtomicBool::new(false),
)?
.0
.into(),
))
}
/// WARNING: This is a blocking operation, if you want to use it in
/// async context then you must wrap the call in [`tokio::task::spawn_blocking`].
///
/// WARNING: This function must be called after tokio runtime is initialized.
pub fn shallow_clone(url: GitUrl, path: &Path) -> Result<Self, GitError> {
let url_bstr = url.0.to_bstring();
let url_str = String::from_utf8_lossy(&url_bstr);
debug!("Shallow cloning {url_str} to {}", path.display());
debug!("Shallow cloning {url} to {} with worktree", path.display());
let mut progress = TracingProgress::new(CompactString::new("Cloning"));
Ok(Self(
clone::PrepareFetch::new(
url.0,
path,
create::Kind::WithWorktree,
create::Options {
destination_must_be_empty: true,
..Default::default()
},
open::Options::isolated(),
)?
.with_shallow(remote::fetch::Shallow::DepthAtRemote(
NonZeroU32::new(1).unwrap(),
))
.fetch_then_checkout(&mut progress, &AtomicBool::new(false))?
.0
.main_worktree(&mut progress, &AtomicBool::new(false))?
.0,
Self::prepare_fetch(url, path, create::Kind::WithWorktree)?
.fetch_then_checkout(&mut progress, &AtomicBool::new(false))?
.0
.main_worktree(&mut progress, &AtomicBool::new(false))?
.0
.into(),
))
}
#[inline(always)]
pub fn get_head_commit_entry_data_by_path(
&self,
path: impl AsRef<Path>,
) -> Result<Option<Vec<u8>>, GitError> {
fn inner(this: &Repository, path: &Path) -> Result<Option<Vec<u8>>, GitError> {
Ok(
if let Some(entry) = this
.0
.to_thread_local()
.head_commit()?
.tree()?
.lookup_entry_by_path(path)?
{
Some(mem::take(&mut entry.object()?.data))
} else {
None
},
)
}
inner(self, path.as_ref())
}
}