From bd686134484069d9b1dedaa997217f3e032746bc Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 18:40:21 +1000 Subject: [PATCH 01/65] Refactor and add new enum `PkgFmtDecomposed` Signed-off-by: Jiahao XU --- src/fmt.rs | 74 ++++++++++++++++++++++++++++++++++ src/helpers/async_extracter.rs | 2 +- src/lib.rs | 31 ++------------ 3 files changed, 78 insertions(+), 29 deletions(-) create mode 100644 src/fmt.rs diff --git a/src/fmt.rs b/src/fmt.rs new file mode 100644 index 00000000..34a9dc1e --- /dev/null +++ b/src/fmt.rs @@ -0,0 +1,74 @@ +use serde::{Deserialize, Serialize}; +use strum_macros::{Display, EnumString, EnumVariantNames}; + +/// Binary format enumeration +#[derive( + Debug, Copy, Clone, PartialEq, Serialize, Deserialize, Display, EnumString, EnumVariantNames, +)] +#[strum(serialize_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum PkgFmt { + /// Download format is TAR (uncompressed) + Tar, + /// Download format is TGZ (TAR + GZip) + Tgz, + /// Download format is TAR + XZ + Txz, + /// Download format is TAR + Zstd + Tzstd, + /// Download format is Zip + Zip, + /// Download format is raw / binary + Bin, +} + +impl Default for PkgFmt { + fn default() -> Self { + Self::Tgz + } +} + +impl PkgFmt { + /// If self is one of the tar based formats, + /// return Some. + pub fn decompose(self) -> PkgFmtDecomposed { + match self { + PkgFmt::Tar => PkgFmtDecomposed::Tar(TarBasedFmt::Tar), + PkgFmt::Tgz => PkgFmtDecomposed::Tar(TarBasedFmt::Tgz), + PkgFmt::Txz => PkgFmtDecomposed::Tar(TarBasedFmt::Txz), + PkgFmt::Tzstd => PkgFmtDecomposed::Tar(TarBasedFmt::Tzstd), + PkgFmt::Bin => PkgFmtDecomposed::Bin, + PkgFmt::Zip => PkgFmtDecomposed::Zip, + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum PkgFmtDecomposed { + Tar(TarBasedFmt), + Bin, + Zip, +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum TarBasedFmt { + /// Download format is TAR (uncompressed) + Tar, + /// Download format is TGZ (TAR + GZip) + Tgz, + /// Download format is TAR + XZ + Txz, + /// Download format is TAR + Zstd + Tzstd, +} + +impl From for PkgFmt { + fn from(fmt: TarBasedFmt) -> Self { + match fmt { + TarBasedFmt::Tar => PkgFmt::Tar, + TarBasedFmt::Tgz => PkgFmt::Tgz, + TarBasedFmt::Txz => PkgFmt::Txz, + TarBasedFmt::Tzstd => PkgFmt::Tzstd, + } + } +} diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 7f858a1b..6286eddd 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -12,7 +12,7 @@ use tokio::{ }; use super::{extracter::*, readable_rx::*}; -use crate::{BinstallError, PkgFmt}; +use crate::{BinstallError, PkgFmt, TarBasedFmt}; pub(crate) enum Content { /// Data to write to file diff --git a/src/lib.rs b/src/lib.rs index 0ad83f42..ca2b019a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; use serde::{Deserialize, Serialize}; -use strum_macros::{Display, EnumString, EnumVariantNames}; pub mod drivers; pub use drivers::*; @@ -18,6 +17,9 @@ pub mod fetchers; mod target; pub use target::*; +mod fmt; +pub use fmt::*; + /// Default package path template (may be overridden in package Cargo.toml) pub const DEFAULT_PKG_URL: &str = "{ repo }/releases/download/v{ version }/{ name }-{ target }-v{ version }.{ archive-format }"; @@ -25,33 +27,6 @@ pub const DEFAULT_PKG_URL: &str = /// Default binary name template (may be overridden in package Cargo.toml) pub const DEFAULT_BIN_DIR: &str = "{ name }-{ target }-v{ version }/{ bin }{ binary-ext }"; -/// Binary format enumeration -#[derive( - Debug, Copy, Clone, PartialEq, Serialize, Deserialize, Display, EnumString, EnumVariantNames, -)] -#[strum(serialize_all = "snake_case")] -#[serde(rename_all = "snake_case")] -pub enum PkgFmt { - /// Download format is TAR (uncompressed) - Tar, - /// Download format is TGZ (TAR + GZip) - Tgz, - /// Download format is TAR + XZ - Txz, - /// Download format is TAR + Zstd - Tzstd, - /// Download format is Zip - Zip, - /// Download format is raw / binary - Bin, -} - -impl Default for PkgFmt { - fn default() -> Self { - Self::Tgz - } -} - /// `binstall` metadata container /// /// Required to nest metadata under `package.metadata.binstall` From cbd57a1bcea75016f12b44f13eb4cef7a38cde68 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 19:28:22 +1000 Subject: [PATCH 02/65] Refactor `async_extracter`: Create multi extracters dedicated to different tasks Signed-off-by: Jiahao XU --- src/helpers.rs | 59 ++++++++++++++++++++-------- src/helpers/async_extracter.rs | 70 ++++++++++++++++++++++++++++------ 2 files changed, 101 insertions(+), 28 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index 5d6d350a..55089076 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -1,18 +1,16 @@ -use std::{ - path::{Path, PathBuf}, -}; +use std::path::{Path, PathBuf}; use cargo_toml::Manifest; use log::debug; -use reqwest::Method; +use reqwest::{Method, Response}; use serde::Serialize; use tinytemplate::TinyTemplate; use url::Url; -use crate::{BinstallError, Meta, PkgFmt}; +use crate::{BinstallError, Meta, PkgFmt, PkgFmtDecomposed}; mod async_extracter; -pub use async_extracter::extract_archive_stream; +pub use async_extracter::*; mod auto_abort_join_handle; pub use auto_abort_join_handle::AutoAbortJoinHandle; @@ -45,13 +43,41 @@ pub async fn remote_exists(url: Url, method: Method) -> Result Result { + reqwest::get(url.clone()) + .await + .and_then(|r| r.error_for_status()) + .map_err(|err| BinstallError::Http { + method: Method::GET, + url, + err, + }) +} + /// Download a file from the provided URL and extract it to the provided path. pub async fn download_and_extract>( url: Url, fmt: PkgFmt, path: P, ) -> Result<(), BinstallError> { - download_and_extract_with_filter:: bool, _>(url, fmt, path.as_ref(), None).await + debug!("Downloading from: '{url}'"); + + let resp = create_request(url).await?; + + let path = path.as_ref(); + debug!("Downloading to file: '{}'", path.display()); + + let stream = resp.bytes_stream(); + + match fmt.decompose() { + PkgFmtDecomposed::Tar(fmt) => extract_tar_based_stream(stream, path, fmt).await?, + PkgFmtDecomposed::Bin => extract_bin(stream, path).await?, + PkgFmtDecomposed::Zip => extract_zip(stream, path).await?, + } + + debug!("Download OK, written to file: '{}'", path.display()); + + Ok(()) } /// Download a file from the provided URL and extract part of it to @@ -72,19 +98,20 @@ pub async fn download_and_extract_with_filter< ) -> Result<(), BinstallError> { debug!("Downloading from: '{url}'"); - let resp = reqwest::get(url.clone()) - .await - .and_then(|r| r.error_for_status()) - .map_err(|err| BinstallError::Http { - method: Method::GET, - url, - err, - })?; + let resp = create_request(url).await?; let path = path.as_ref(); debug!("Downloading to file: '{}'", path.display()); - extract_archive_stream(resp.bytes_stream(), path, fmt, filter).await?; + let stream = resp.bytes_stream(); + + match fmt.decompose() { + PkgFmtDecomposed::Tar(fmt) => { + extract_tar_based_stream_with_filter(stream, path, fmt, filter).await? + } + PkgFmtDecomposed::Bin => extract_bin(stream, path).await?, + PkgFmtDecomposed::Zip => extract_zip(stream, path).await?, + } debug!("Download OK, written to file: '{}'", path.display()); diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 6286eddd..7d0b4542 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -199,24 +199,70 @@ impl AsyncExtracter { } } -/// * `output` - If `fmt` is `PkgFmt::Bin`, then this is the filename -/// for the bin. -/// Otherwise, it is the directory where the extracted content will be put. -/// * `fmt` - The format of the archive to feed in. -/// * `filter` - If Some, then it will pass the path of the file to it -/// and only extract ones which filter returns `true`. -/// Note that this is a best-effort and it only works when `fmt` -/// is not `PkgFmt::Bin` or `PkgFmt::Zip`. -pub async fn extract_archive_stream bool + Send + 'static, E>( +pub async fn extract_bin( mut stream: impl Stream> + Unpin, output: &Path, - fmt: PkgFmt, - filter: Option, ) -> Result<(), BinstallError> where BinstallError: From, { - let mut extracter = AsyncExtracter::new(output, fmt, filter); + let mut extracter = AsyncExtracter::new:: bool>(output, PkgFmt::Bin, None); + + while let Some(res) = stream.next().await { + extracter.feed(res?).await?; + } + + extracter.done().await +} + +pub async fn extract_zip( + mut stream: impl Stream> + Unpin, + output: &Path, +) -> Result<(), BinstallError> +where + BinstallError: From, +{ + let mut extracter = AsyncExtracter::new:: bool>(output, PkgFmt::Zip, None); + + while let Some(res) = stream.next().await { + extracter.feed(res?).await?; + } + + extracter.done().await +} + +/// * `filter` - If Some, then it will pass the path of the file to it +/// and only extract ones which filter returns `true`. +pub async fn extract_tar_based_stream_with_filter< + Filter: FnMut(&Path) -> bool + Send + 'static, + E, +>( + mut stream: impl Stream> + Unpin, + output: &Path, + fmt: TarBasedFmt, + filter: Option, +) -> Result<(), BinstallError> +where + BinstallError: From, +{ + let mut extracter = AsyncExtracter::new(output, fmt.into(), filter); + + while let Some(res) = stream.next().await { + extracter.feed(res?).await?; + } + + extracter.done().await +} + +pub async fn extract_tar_based_stream( + mut stream: impl Stream> + Unpin, + output: &Path, + fmt: TarBasedFmt, +) -> Result<(), BinstallError> +where + BinstallError: From, +{ + let mut extracter = AsyncExtracter::new:: bool>(output, fmt.into(), None); while let Some(res) = stream.next().await { extracter.feed(res?).await?; From b1b79921b2818930ee4088ff87e0fc3cf3db828e Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 19:30:06 +1000 Subject: [PATCH 03/65] Simplify `download_and_extract_with_filter`: Take `TarBasedFmt` instead of `PkgFmt` Signed-off-by: Jiahao XU --- src/drivers.rs | 4 ++-- src/helpers.rs | 12 +++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/drivers.rs b/src/drivers.rs index b16513d9..db520d4e 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -7,7 +7,7 @@ use log::debug; use semver::{Version, VersionReq}; use url::Url; -use crate::{helpers::*, BinstallError, PkgFmt}; +use crate::{helpers::*, BinstallError, TarBasedFmt}; fn find_version<'a, V: Iterator>( requirement: &str, @@ -112,7 +112,7 @@ pub async fn fetch_crate_cratesio( download_and_extract_with_filter( Url::parse(&crate_url)?, - PkgFmt::Tgz, + TarBasedFmt::Tgz, &temp_dir, Some(move |path: &Path| path == cargo_toml || path == main || path.starts_with(&bin)), ) diff --git a/src/helpers.rs b/src/helpers.rs index 55089076..210cb00c 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -7,7 +7,7 @@ use serde::Serialize; use tinytemplate::TinyTemplate; use url::Url; -use crate::{BinstallError, Meta, PkgFmt, PkgFmtDecomposed}; +use crate::{BinstallError, Meta, PkgFmt, PkgFmtDecomposed, TarBasedFmt}; mod async_extracter; pub use async_extracter::*; @@ -92,7 +92,7 @@ pub async fn download_and_extract_with_filter< P: AsRef, >( url: Url, - fmt: PkgFmt, + fmt: TarBasedFmt, path: P, filter: Option, ) -> Result<(), BinstallError> { @@ -105,13 +105,7 @@ pub async fn download_and_extract_with_filter< let stream = resp.bytes_stream(); - match fmt.decompose() { - PkgFmtDecomposed::Tar(fmt) => { - extract_tar_based_stream_with_filter(stream, path, fmt, filter).await? - } - PkgFmtDecomposed::Bin => extract_bin(stream, path).await?, - PkgFmtDecomposed::Zip => extract_zip(stream, path).await?, - } + extract_tar_based_stream_with_filter(stream, path, fmt, filter).await?; debug!("Download OK, written to file: '{}'", path.display()); From d1033758a774d3a820eb6b1fba6270b5417cc0b0 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 19:30:31 +1000 Subject: [PATCH 04/65] Update doc of `download_and_extract_with_filter` Signed-off-by: Jiahao XU --- src/helpers.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index 210cb00c..12cf35c7 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -85,8 +85,6 @@ pub async fn download_and_extract>( /// /// * `filter` - If Some, then it will pass the path of the file to it /// and only extract ones which filter returns `true`. -/// Note that this is a best-effort and it only works when `fmt` -/// is not `PkgFmt::Bin` or `PkgFmt::Zip`. pub async fn download_and_extract_with_filter< Filter: FnMut(&Path) -> bool + Send + 'static, P: AsRef, From 7b52eaad5b3be1b143fe686bf4f3190fed73183c Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 20:10:46 +1000 Subject: [PATCH 05/65] Rewrite `AsyncExtracter`: Extract fmt logic as callback fn Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 232 ++++++++++++++++++--------------- src/helpers/extracter.rs | 4 +- 2 files changed, 128 insertions(+), 108 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 7d0b4542..26274d65 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -1,3 +1,4 @@ +use std::fmt::Debug; use std::fs; use std::io::{self, Seek, Write}; use std::path::Path; @@ -22,28 +23,47 @@ pub(crate) enum Content { Abort, } +/// AsyncExtracter will pass the `Bytes` you give to another thread via +/// a `mpsc` and decompress and unpack it if needed. +/// +/// After all write is done, you must call `AsyncExtracter::done`, +/// otherwise the extracted content will be removed on drop. +/// +/// # Advantages +/// +/// `download_and_extract` has the following advantages over downloading +/// plus extracting in on the same thread: +/// +/// - The code is pipelined instead of storing the downloaded file in memory +/// and extract it, except for `PkgFmt::Zip`, since `ZipArchiver::new` +/// requires `std::io::Seek`, so it fallbacks to writing the a file then +/// unzip it. +/// - The async part (downloading) and the extracting part runs in parallel +/// using `tokio::spawn_nonblocking`. +/// - Compressing/writing which takes a lot of CPU time will not block +/// the runtime anymore. +/// - For any PkgFmt except for `PkgFmt::Zip` and `PkgFmt::Bin` (basically +/// all `tar` based formats), it can extract only specified files. +/// This means that `super::drivers::fetch_crate_cratesio` no longer need to +/// extract the whole crate and write them to disk, it now only extract the +/// relevant part (`Cargo.toml`) out to disk and open it. #[derive(Debug)] -struct AsyncExtracterInner { +struct AsyncExtracterInner { /// Use AutoAbortJoinHandle so that the task /// will be cancelled on failure. - handle: JoinHandle>, + handle: JoinHandle>, tx: mpsc::Sender, } -impl AsyncExtracterInner { - /// * `filter` - If Some, then it will pass the path of the file to it - /// and only extract ones which filter returns `true`. - /// Note that this is a best-effort and it only works when `fmt` - /// is not `PkgFmt::Bin` or `PkgFmt::Zip`. - fn new bool + Send + 'static>( - path: &Path, - fmt: PkgFmt, - filter: Option, +impl AsyncExtracterInner { + fn new) -> Result + Send + 'static>( + f: F, ) -> Self { - let path = path.to_owned(); - let (tx, mut rx) = mpsc::channel::(100); + let (tx, rx) = mpsc::channel::(100); let handle = spawn_blocking(move || { + f(rx) + /* fs::create_dir_all(path.parent().unwrap())?; match fmt { @@ -78,29 +98,12 @@ impl AsyncExtracterInner { } Ok(()) + */ }); Self { handle, tx } } - fn read_into_file( - file: &mut fs::File, - rx: &mut mpsc::Receiver, - ) -> Result<(), BinstallError> { - while let Some(content) = rx.blocking_recv() { - match content { - Content::Data(bytes) => file.write_all(&*bytes)?, - Content::Abort => { - return Err(io::Error::new(io::ErrorKind::Other, "Aborted").into()) - } - } - } - - file.flush()?; - - Ok(()) - } - /// Upon error, this extracter shall not be reused. /// Otherwise, `Self::done` would panic. async fn feed(&mut self, bytes: Bytes) -> Result<(), BinstallError> { @@ -114,7 +117,7 @@ impl AsyncExtracterInner { } } - async fn done(mut self) -> Result<(), BinstallError> { + async fn done(mut self) -> Result { // Drop tx as soon as possible so that the task would wrap up what it // was doing and flush out all the pending data. drop(self.tx); @@ -122,7 +125,7 @@ impl AsyncExtracterInner { Self::wait(&mut self.handle).await } - async fn wait(handle: &mut JoinHandle>) -> Result<(), BinstallError> { + async fn wait(handle: &mut JoinHandle>) -> Result { match handle.await { Ok(res) => res, Err(join_err) => Err(io::Error::new(io::ErrorKind::Other, join_err).into()), @@ -143,92 +146,98 @@ impl AsyncExtracterInner { } } -/// AsyncExtracter will pass the `Bytes` you give to another thread via -/// a `mpsc` and decompress and unpack it if needed. -/// -/// After all write is done, you must call `AsyncExtracter::done`, -/// otherwise the extracted content will be removed on drop. -/// -/// # Advantages -/// -/// `download_and_extract` has the following advantages over downloading -/// plus extracting in on the same thread: -/// -/// - The code is pipelined instead of storing the downloaded file in memory -/// and extract it, except for `PkgFmt::Zip`, since `ZipArchiver::new` -/// requires `std::io::Seek`, so it fallbacks to writing the a file then -/// unzip it. -/// - The async part (downloading) and the extracting part runs in parallel -/// using `tokio::spawn_nonblocking`. -/// - Compressing/writing which takes a lot of CPU time will not block -/// the runtime anymore. -/// - For any PkgFmt except for `PkgFmt::Zip` and `PkgFmt::Bin` (basically -/// all `tar` based formats), it can extract only specified files. -/// This means that `super::drivers::fetch_crate_cratesio` no longer need to -/// extract the whole crate and write them to disk, it now only extract the -/// relevant part (`Cargo.toml`) out to disk and open it. -#[derive(Debug)] -struct AsyncExtracter(ScopeGuard); +async fn extract_impl< + F: FnOnce(mpsc::Receiver) -> Result + Send + 'static, + T: Debug + Send + 'static, + S: Stream> + Unpin, + E, +>( + mut stream: S, + f: F, +) -> Result +where + BinstallError: From, +{ + let mut extracter = guard(AsyncExtracterInner::new(f), AsyncExtracterInner::abort); -impl AsyncExtracter { - /// * `path` - If `fmt` is `PkgFmt::Bin`, then this is the filename - /// for the bin. - /// Otherwise, it is the directory where the extracted content will be put. - /// * `fmt` - The format of the archive to feed in. - /// * `filter` - If Some, then it will pass the path of the file to it - /// and only extract ones which filter returns `true`. - /// Note that this is a best-effort and it only works when `fmt` - /// is not `PkgFmt::Bin` or `PkgFmt::Zip`. - fn new bool + Send + 'static>( - path: &Path, - fmt: PkgFmt, - filter: Option, - ) -> Self { - let inner = AsyncExtracterInner::new(path, fmt, filter); - Self(guard(inner, AsyncExtracterInner::abort)) + while let Some(res) = stream.next().await { + extracter.feed(res?).await?; } - /// Upon error, this extracter shall not be reused. - /// Otherwise, `Self::done` would panic. - async fn feed(&mut self, bytes: Bytes) -> Result<(), BinstallError> { - self.0.feed(bytes).await + ScopeGuard::into_inner(extracter).done().await +} + +fn read_into_file( + file: &mut fs::File, + rx: &mut mpsc::Receiver, +) -> Result<(), BinstallError> { + while let Some(content) = rx.blocking_recv() { + match content { + Content::Data(bytes) => file.write_all(&*bytes)?, + Content::Abort => return Err(io::Error::new(io::ErrorKind::Other, "Aborted").into()), + } } - async fn done(self) -> Result<(), BinstallError> { - ScopeGuard::into_inner(self.0).done().await - } + file.flush()?; + + Ok(()) } pub async fn extract_bin( - mut stream: impl Stream> + Unpin, + stream: impl Stream> + Unpin, output: &Path, ) -> Result<(), BinstallError> where BinstallError: From, { - let mut extracter = AsyncExtracter::new:: bool>(output, PkgFmt::Bin, None); + let path = output.to_owned(); - while let Some(res) = stream.next().await { - extracter.feed(res?).await?; - } + extract_impl(stream, move |mut rx| { + fs::create_dir_all(path.parent().unwrap())?; - extracter.done().await + let mut file = fs::File::create(&path)?; + + // remove it unless the operation isn't aborted and no write + // fails. + let remove_guard = guard(&path, |path| { + fs::remove_file(path).ok(); + }); + + read_into_file(&mut file, &mut rx)?; + + // Operation isn't aborted and all writes succeed, + // disarm the remove_guard. + ScopeGuard::into_inner(remove_guard); + + Ok(()) + }) + .await } pub async fn extract_zip( - mut stream: impl Stream> + Unpin, + stream: impl Stream> + Unpin, output: &Path, ) -> Result<(), BinstallError> where BinstallError: From, { - let mut extracter = AsyncExtracter::new:: bool>(output, PkgFmt::Zip, None); + let path = output.to_owned(); - while let Some(res) = stream.next().await { - extracter.feed(res?).await?; - } + extract_impl(stream, move |mut rx| { + fs::create_dir_all(path.parent().unwrap())?; - extracter.done().await + let mut file = tempfile()?; + + read_into_file(&mut file, &mut rx)?; + + // rewind it so that we can pass it to unzip + file.rewind()?; + + unzip(file, &path)?; + + Ok(()) + }) + .await } /// * `filter` - If Some, then it will pass the path of the file to it @@ -237,7 +246,7 @@ pub async fn extract_tar_based_stream_with_filter< Filter: FnMut(&Path) -> bool + Send + 'static, E, >( - mut stream: impl Stream> + Unpin, + stream: impl Stream> + Unpin, output: &Path, fmt: TarBasedFmt, filter: Option, @@ -245,28 +254,39 @@ pub async fn extract_tar_based_stream_with_filter< where BinstallError: From, { - let mut extracter = AsyncExtracter::new(output, fmt.into(), filter); + let path = output.to_owned(); - while let Some(res) = stream.next().await { - extracter.feed(res?).await?; - } + extract_impl(stream, move |mut rx| { + fs::create_dir_all(path.parent().unwrap())?; - extracter.done().await + extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt.into(), &path, filter)?; + + Ok(()) + }) + .await } pub async fn extract_tar_based_stream( - mut stream: impl Stream> + Unpin, + stream: impl Stream> + Unpin, output: &Path, fmt: TarBasedFmt, ) -> Result<(), BinstallError> where BinstallError: From, { - let mut extracter = AsyncExtracter::new:: bool>(output, fmt.into(), None); + let path = output.to_owned(); - while let Some(res) = stream.next().await { - extracter.feed(res?).await?; - } + extract_impl(stream, move |mut rx| { + fs::create_dir_all(path.parent().unwrap())?; - extracter.done().await + extract_compressed_from_readable:: bool, _>( + ReadableRx::new(&mut rx), + fmt.into(), + &path, + None, + )?; + + Ok(()) + }) + .await } diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index 42426693..a393c935 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -56,8 +56,8 @@ fn untar bool>( /// and only extract ones which filter returns `true`. /// Note that this is a best-effort and it only works when `fmt` /// is not `PkgFmt::Bin` or `PkgFmt::Zip`. -pub(crate) fn extract_compressed_from_readable bool>( - dat: impl BufRead, +pub(crate) fn extract_compressed_from_readable bool, R: BufRead>( + dat: R, fmt: PkgFmt, path: &Path, filter: Option, From 57b40d809eb6ee8682b2b5554ab6214fad0f76cc Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 20:13:10 +1000 Subject: [PATCH 06/65] Cleanup mod `async_extracter` Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 56 ++++------------------------------ 1 file changed, 6 insertions(+), 50 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 26274d65..b592db02 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -5,7 +5,7 @@ use std::path::Path; use bytes::Bytes; use futures_util::stream::{Stream, StreamExt}; -use scopeguard::{guard, Always, ScopeGuard}; +use scopeguard::{guard, ScopeGuard}; use tempfile::tempfile; use tokio::{ sync::mpsc, @@ -13,7 +13,7 @@ use tokio::{ }; use super::{extracter::*, readable_rx::*}; -use crate::{BinstallError, PkgFmt, TarBasedFmt}; +use crate::{BinstallError, TarBasedFmt}; pub(crate) enum Content { /// Data to write to file @@ -61,45 +61,7 @@ impl AsyncExtracterInner { ) -> Self { let (tx, rx) = mpsc::channel::(100); - let handle = spawn_blocking(move || { - f(rx) - /* - fs::create_dir_all(path.parent().unwrap())?; - - match fmt { - PkgFmt::Bin => { - let mut file = fs::File::create(&path)?; - - // remove it unless the operation isn't aborted and no write - // fails. - let remove_guard = guard(&path, |path| { - fs::remove_file(path).ok(); - }); - - Self::read_into_file(&mut file, &mut rx)?; - - // Operation isn't aborted and all writes succeed, - // disarm the remove_guard. - ScopeGuard::into_inner(remove_guard); - } - PkgFmt::Zip => { - let mut file = tempfile()?; - - Self::read_into_file(&mut file, &mut rx)?; - - // rewind it so that we can pass it to unzip - file.rewind()?; - - unzip(file, &path)?; - } - _ => { - extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, &path, filter)? - } - } - - Ok(()) - */ - }); + let handle = spawn_blocking(move || f(rx)); Self { handle, tx } } @@ -233,9 +195,7 @@ where // rewind it so that we can pass it to unzip file.rewind()?; - unzip(file, &path)?; - - Ok(()) + unzip(file, &path) }) .await } @@ -259,9 +219,7 @@ where extract_impl(stream, move |mut rx| { fs::create_dir_all(path.parent().unwrap())?; - extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt.into(), &path, filter)?; - - Ok(()) + extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt.into(), &path, filter) }) .await } @@ -284,9 +242,7 @@ where fmt.into(), &path, None, - )?; - - Ok(()) + ) }) .await } From 5a43ee2681ea1df7457bdd046eb9169f4ef320f0 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 20:16:41 +1000 Subject: [PATCH 07/65] Simplify `extract_compressed_from_readable` impl Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 4 ++-- src/helpers/extracter.rs | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index b592db02..ac84ab16 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -219,7 +219,7 @@ where extract_impl(stream, move |mut rx| { fs::create_dir_all(path.parent().unwrap())?; - extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt.into(), &path, filter) + extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, &path, filter) }) .await } @@ -239,7 +239,7 @@ where extract_compressed_from_readable:: bool, _>( ReadableRx::new(&mut rx), - fmt.into(), + fmt, &path, None, ) diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index a393c935..cda13ef8 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -9,7 +9,7 @@ use xz2::bufread::XzDecoder; use zip::read::ZipArchive; use zstd::stream::Decoder as ZstdDecoder; -use crate::{BinstallError, PkgFmt}; +use crate::{BinstallError, TarBasedFmt}; /// * `filter` - If Some, then it will pass the path of the file to it /// and only extract ones which filter returns `true`. @@ -58,32 +58,34 @@ fn untar bool>( /// is not `PkgFmt::Bin` or `PkgFmt::Zip`. pub(crate) fn extract_compressed_from_readable bool, R: BufRead>( dat: R, - fmt: PkgFmt, + fmt: TarBasedFmt, path: &Path, filter: Option, ) -> Result<(), BinstallError> { + use TarBasedFmt::*; + match fmt { - PkgFmt::Tar => { + Tar => { // Extract to install dir debug!("Extracting from tar archive to `{path:?}`"); untar(dat, path, filter)? } - PkgFmt::Tgz => { + Tgz => { // Extract to install dir debug!("Decompressing from tgz archive to `{path:?}`"); let tar = GzDecoder::new(dat); untar(tar, path, filter)?; } - PkgFmt::Txz => { + Txz => { // Extract to install dir debug!("Decompressing from txz archive to `{path:?}`"); let tar = XzDecoder::new(dat); untar(tar, path, filter)?; } - PkgFmt::Tzstd => { + Tzstd => { // Extract to install dir debug!("Decompressing from tzstd archive to `{path:?}`"); @@ -94,8 +96,6 @@ pub(crate) fn extract_compressed_from_readable bool, R: let tar = ZstdDecoder::with_buffer(dat)?; untar(tar, path, filter)?; } - PkgFmt::Zip => panic!("Unexpected PkgFmt::Zip!"), - PkgFmt::Bin => panic!("Unexpected PkgFmt::Bin!"), }; Ok(()) From 90a96cabc98cea1ba539164987cad65051b0ec3b Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 20:31:46 +1000 Subject: [PATCH 08/65] Rewrite `untar` to take a visitor & simplify signature of `download_and_extract_with_filter` Signed-off-by: Jiahao XU --- src/drivers.rs | 2 +- src/helpers.rs | 2 +- src/helpers/async_extracter.rs | 48 ++++++++++++++++++++++++++++---- src/helpers/extracter.rs | 50 ++++++++++++++-------------------- 4 files changed, 64 insertions(+), 38 deletions(-) diff --git a/src/drivers.rs b/src/drivers.rs index db520d4e..1c6a5eba 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -114,7 +114,7 @@ pub async fn fetch_crate_cratesio( Url::parse(&crate_url)?, TarBasedFmt::Tgz, &temp_dir, - Some(move |path: &Path| path == cargo_toml || path == main || path.starts_with(&bin)), + move |path: &Path| path == cargo_toml || path == main || path.starts_with(&bin), ) .await?; diff --git a/src/helpers.rs b/src/helpers.rs index 12cf35c7..bd2d904f 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -92,7 +92,7 @@ pub async fn download_and_extract_with_filter< url: Url, fmt: TarBasedFmt, path: P, - filter: Option, + filter: Filter, ) -> Result<(), BinstallError> { debug!("Downloading from: '{url}'"); diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index ac84ab16..5a4051ac 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -1,11 +1,14 @@ use std::fmt::Debug; use std::fs; -use std::io::{self, Seek, Write}; -use std::path::Path; +use std::io::{self, Read, Seek, Write}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; use bytes::Bytes; use futures_util::stream::{Stream, StreamExt}; +use log::debug; use scopeguard::{guard, ScopeGuard}; +use tar::Entries; use tempfile::tempfile; use tokio::{ sync::mpsc, @@ -209,17 +212,42 @@ pub async fn extract_tar_based_stream_with_filter< stream: impl Stream> + Unpin, output: &Path, fmt: TarBasedFmt, - filter: Option, + filter: Filter, ) -> Result<(), BinstallError> where BinstallError: From, { - let path = output.to_owned(); + struct Visitor(F, Arc); + + impl bool + Send + 'static> TarEntriesVisitor for Visitor { + fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError> { + for res in entries { + let mut entry = res?; + let entry_path = entry.path()?; + + if self.0(&entry_path) { + debug!("Extracting {entry_path:#?}"); + + let dst = self.1.join(entry_path); + + fs::create_dir_all(dst.parent().unwrap())?; + + entry.unpack(dst)?; + } + } + + Ok(()) + } + } + + let path = Arc::new(output.to_owned()); + + let visitor = Visitor(filter, path.clone()); extract_impl(stream, move |mut rx| { fs::create_dir_all(path.parent().unwrap())?; - extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, &path, filter) + extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, &*path, Some(visitor)) }) .await } @@ -232,12 +260,20 @@ pub async fn extract_tar_based_stream( where BinstallError: From, { + struct DummyVisitor; + + impl TarEntriesVisitor for DummyVisitor { + fn visit(&mut self, _entries: Entries<'_, R>) -> Result<(), BinstallError> { + unimplemented!() + } + } + let path = output.to_owned(); extract_impl(stream, move |mut rx| { fs::create_dir_all(path.parent().unwrap())?; - extract_compressed_from_readable:: bool, _>( + extract_compressed_from_readable::( ReadableRx::new(&mut rx), fmt, &path, diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index cda13ef8..969a1556 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -1,44 +1,34 @@ -use std::fs::{self, File}; +use std::fs::File; use std::io::{BufRead, Read}; use std::path::Path; use flate2::bufread::GzDecoder; use log::debug; -use tar::Archive; +use tar::{Archive, Entries}; use xz2::bufread::XzDecoder; use zip::read::ZipArchive; use zstd::stream::Decoder as ZstdDecoder; use crate::{BinstallError, TarBasedFmt}; -/// * `filter` - If Some, then it will pass the path of the file to it -/// and only extract ones which filter returns `true`. -/// Note that this is a best-effort and it only works when `fmt` -/// is not `PkgFmt::Bin` or `PkgFmt::Zip`. -fn untar bool>( - dat: impl Read, +pub trait TarEntriesVisitor { + fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError>; +} + +/// * `f` - If Some, then this function will pass +/// the entries of the `dat` to it and let it decides +/// what to do with the tar. +fn untar( + dat: R, path: &Path, - filter: Option, + visitor: Option, ) -> Result<(), BinstallError> { let mut tar = Archive::new(dat); - if let Some(mut filter) = filter { + if let Some(mut visitor) = visitor { debug!("Untaring with filter"); - for res in tar.entries()? { - let mut entry = res?; - let entry_path = entry.path()?; - - if filter(&entry_path) { - debug!("Extracting {entry_path:#?}"); - - let dst = path.join(entry_path); - - fs::create_dir_all(dst.parent().unwrap())?; - - entry.unpack(dst)?; - } - } + visitor.visit(tar.entries()?)?; } else { debug!("Untaring entire tar"); tar.unpack(path)?; @@ -56,11 +46,11 @@ fn untar bool>( /// and only extract ones which filter returns `true`. /// Note that this is a best-effort and it only works when `fmt` /// is not `PkgFmt::Bin` or `PkgFmt::Zip`. -pub(crate) fn extract_compressed_from_readable bool, R: BufRead>( +pub(crate) fn extract_compressed_from_readable( dat: R, fmt: TarBasedFmt, path: &Path, - filter: Option, + visitor: Option, ) -> Result<(), BinstallError> { use TarBasedFmt::*; @@ -69,21 +59,21 @@ pub(crate) fn extract_compressed_from_readable bool, R: // Extract to install dir debug!("Extracting from tar archive to `{path:?}`"); - untar(dat, path, filter)? + untar(dat, path, visitor)? } Tgz => { // Extract to install dir debug!("Decompressing from tgz archive to `{path:?}`"); let tar = GzDecoder::new(dat); - untar(tar, path, filter)?; + untar(tar, path, visitor)?; } Txz => { // Extract to install dir debug!("Decompressing from txz archive to `{path:?}`"); let tar = XzDecoder::new(dat); - untar(tar, path, filter)?; + untar(tar, path, visitor)?; } Tzstd => { // Extract to install dir @@ -94,7 +84,7 @@ pub(crate) fn extract_compressed_from_readable bool, R: // as &[] by ZstdDecoder::new, thus ZstdDecoder::new // should not return any error. let tar = ZstdDecoder::with_buffer(dat)?; - untar(tar, path, filter)?; + untar(tar, path, visitor)?; } }; From 4892d8bf3a5d35a88aed6d65d2d9b0de22b64aaf Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 20:35:02 +1000 Subject: [PATCH 09/65] Impl forward of `&mut T` to `T` for `TarEntriesVisitor` Signed-off-by: Jiahao XU --- src/helpers/extracter.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index 969a1556..1175da8a 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -15,6 +15,12 @@ pub trait TarEntriesVisitor { fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError>; } +impl TarEntriesVisitor for &mut V { + fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError> { + (*self).visit(entries) + } +} + /// * `f` - If Some, then this function will pass /// the entries of the `dat` to it and let it decides /// what to do with the tar. From f8c8c66f57f4c1f718becd19d6f788538bfb050e Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 20:38:11 +1000 Subject: [PATCH 10/65] Impl new fn `helpers::download_tar_based_and_visit` Signed-off-by: Jiahao XU --- src/helpers.rs | 35 +++++++++++++++++++++++++++++++++- src/helpers/async_extracter.rs | 26 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/helpers.rs b/src/helpers.rs index bd2d904f..7670b15b 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -1,3 +1,4 @@ +use std::fmt::Debug; use std::path::{Path, PathBuf}; use cargo_toml::Manifest; @@ -19,6 +20,8 @@ mod ui_thread; pub use ui_thread::UIThread; mod extracter; +pub use extracter::TarEntriesVisitor; + mod readable_rx; /// Load binstall metadata from the crate `Cargo.toml` at the provided path @@ -83,7 +86,7 @@ pub async fn download_and_extract>( /// Download a file from the provided URL and extract part of it to /// the provided path. /// -/// * `filter` - If Some, then it will pass the path of the file to it +/// * `filter` - It will pass the path of the file to it /// and only extract ones which filter returns `true`. pub async fn download_and_extract_with_filter< Filter: FnMut(&Path) -> bool + Send + 'static, @@ -110,6 +113,36 @@ pub async fn download_and_extract_with_filter< Ok(()) } +/// Download a file from the provided URL and extract part of it to +/// the provided path. +/// +/// * `filter` - If Some, then it will pass the path of the file to it +/// and only extract ones which filter returns `true`. +pub async fn download_tar_based_and_visit< + V: TarEntriesVisitor + Debug + Send + 'static, + P: AsRef, +>( + url: Url, + fmt: TarBasedFmt, + path: P, + visitor: V, +) -> Result { + debug!("Downloading from: '{url}'"); + + let resp = create_request(url).await?; + + let path = path.as_ref(); + debug!("Downloading to file: '{}'", path.display()); + + let stream = resp.bytes_stream(); + + let visitor = extract_tar_based_stream_and_visit(stream, path, fmt, visitor).await?; + + debug!("Download OK, written to file: '{}'", path.display()); + + Ok(visitor) +} + /// Fetch install path from environment /// roughly follows pub fn get_install_path>(install_path: Option

) -> Option { diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 5a4051ac..cdf8ffea 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -282,3 +282,29 @@ where }) .await } + +pub async fn extract_tar_based_stream_and_visit( + stream: impl Stream> + Unpin, + output: &Path, + fmt: TarBasedFmt, + mut visitor: V, +) -> Result +where + BinstallError: From, +{ + let path = output.to_owned(); + + extract_impl(stream, move |mut rx| { + fs::create_dir_all(path.parent().unwrap())?; + + extract_compressed_from_readable( + ReadableRx::new(&mut rx), + fmt, + &*path, + Some(&mut visitor), + )?; + + Ok(visitor) + }) + .await +} From 6c6055da69875625a33a9cdd38c0f2542295ef22 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 22:43:45 +1000 Subject: [PATCH 11/65] Refactor mod `drivers`: Extract out sub mods Signed-off-by: Jiahao XU --- src/drivers.rs | 124 ++-------------------------------------- src/drivers/cratesio.rs | 80 ++++++++++++++++++++++++++ src/drivers/version.rs | 48 ++++++++++++++++ 3 files changed, 133 insertions(+), 119 deletions(-) create mode 100644 src/drivers/cratesio.rs create mode 100644 src/drivers/version.rs diff --git a/src/drivers.rs b/src/drivers.rs index 1c6a5eba..c63d3044 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -1,126 +1,12 @@ -use std::collections::BTreeSet; use std::path::{Path, PathBuf}; -use std::time::Duration; -use crates_io_api::AsyncClient; -use log::debug; -use semver::{Version, VersionReq}; -use url::Url; +use crate::BinstallError; -use crate::{helpers::*, BinstallError, TarBasedFmt}; +mod cratesio; +pub use cratesio::*; -fn find_version<'a, V: Iterator>( - requirement: &str, - version_iter: V, -) -> Result { - // Parse version requirement - let version_req = VersionReq::parse(requirement).map_err(|err| BinstallError::VersionReq { - req: requirement.into(), - err, - })?; - - // Filter for matching versions - let filtered: BTreeSet<_> = version_iter - .filter_map(|v| { - // Remove leading `v` for git tags - let ver_str = match v.strip_prefix('s') { - Some(v) => v, - None => v, - }; - - // Parse out version - let ver = Version::parse(ver_str).ok()?; - debug!("Version: {:?}", ver); - - // Filter by version match - if version_req.matches(&ver) { - Some(ver) - } else { - None - } - }) - .collect(); - - debug!("Filtered: {:?}", filtered); - - // Return highest version - filtered - .iter() - .max() - .cloned() - .ok_or(BinstallError::VersionMismatch { req: version_req }) -} - -/// Fetch a crate Cargo.toml by name and version from crates.io -pub async fn fetch_crate_cratesio( - name: &str, - version_req: &str, - temp_dir: &Path, -) -> Result { - // Fetch / update index - debug!("Looking up crate information"); - - // Build crates.io api client - let api_client = AsyncClient::new( - "cargo-binstall (https://github.com/ryankurte/cargo-binstall)", - Duration::from_millis(100), - ) - .expect("bug: invalid user agent"); - - // Fetch online crate information - let base_info = - api_client - .get_crate(name.as_ref()) - .await - .map_err(|err| BinstallError::CratesIoApi { - crate_name: name.into(), - err, - })?; - - // Locate matching version - let version_iter = - base_info - .versions - .iter() - .filter_map(|v| if !v.yanked { Some(&v.num) } else { None }); - let version_name = find_version(version_req, version_iter)?; - - // Fetch information for the filtered version - let version = base_info - .versions - .iter() - .find(|v| v.num == version_name.to_string()) - .ok_or_else(|| BinstallError::VersionUnavailable { - crate_name: name.into(), - v: version_name.clone(), - })?; - - debug!("Found information for crate version: '{}'", version.num); - - // Download crate to temporary dir (crates.io or git?) - let crate_url = format!("https://crates.io/{}", version.dl_path); - - debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); - - let crate_dir: PathBuf = format!("{name}-{version_name}").into(); - let crate_path = temp_dir.join(&crate_dir); - - let cargo_toml = crate_dir.join("Cargo.toml"); - let src = crate_dir.join("src"); - let main = src.join("main.rs"); - let bin = src.join("bin"); - - download_and_extract_with_filter( - Url::parse(&crate_url)?, - TarBasedFmt::Tgz, - &temp_dir, - move |path: &Path| path == cargo_toml || path == main || path.starts_with(&bin), - ) - .await?; - - // Return crate directory - Ok(crate_path) -} +mod version; +use version::find_version; /// Fetch a crate by name and version from github /// TODO: implement this diff --git a/src/drivers/cratesio.rs b/src/drivers/cratesio.rs new file mode 100644 index 00000000..82b5044d --- /dev/null +++ b/src/drivers/cratesio.rs @@ -0,0 +1,80 @@ +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use crates_io_api::AsyncClient; +use log::debug; +use url::Url; + +use super::find_version; +use crate::{helpers::*, BinstallError, TarBasedFmt}; + +/// Fetch a crate Cargo.toml by name and version from crates.io +pub async fn fetch_crate_cratesio( + name: &str, + version_req: &str, + temp_dir: &Path, +) -> Result { + // Fetch / update index + debug!("Looking up crate information"); + + // Build crates.io api client + let api_client = AsyncClient::new( + "cargo-binstall (https://github.com/ryankurte/cargo-binstall)", + Duration::from_millis(100), + ) + .expect("bug: invalid user agent"); + + // Fetch online crate information + let base_info = + api_client + .get_crate(name.as_ref()) + .await + .map_err(|err| BinstallError::CratesIoApi { + crate_name: name.into(), + err, + })?; + + // Locate matching version + let version_iter = + base_info + .versions + .iter() + .filter_map(|v| if !v.yanked { Some(&v.num) } else { None }); + let version_name = find_version(version_req, version_iter)?; + + // Fetch information for the filtered version + let version = base_info + .versions + .iter() + .find(|v| v.num == version_name.to_string()) + .ok_or_else(|| BinstallError::VersionUnavailable { + crate_name: name.into(), + v: version_name.clone(), + })?; + + debug!("Found information for crate version: '{}'", version.num); + + // Download crate to temporary dir (crates.io or git?) + let crate_url = format!("https://crates.io/{}", version.dl_path); + + debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); + + let crate_dir: PathBuf = format!("{name}-{version_name}").into(); + let crate_path = temp_dir.join(&crate_dir); + + let cargo_toml = crate_dir.join("Cargo.toml"); + let src = crate_dir.join("src"); + let main = src.join("main.rs"); + let bin = src.join("bin"); + + download_and_extract_with_filter( + Url::parse(&crate_url)?, + TarBasedFmt::Tgz, + &temp_dir, + move |path: &Path| path == cargo_toml || path == main || path.starts_with(&bin), + ) + .await?; + + // Return crate directory + Ok(crate_path) +} diff --git a/src/drivers/version.rs b/src/drivers/version.rs new file mode 100644 index 00000000..d1d9fdf8 --- /dev/null +++ b/src/drivers/version.rs @@ -0,0 +1,48 @@ +use std::collections::BTreeSet; + +use log::debug; +use semver::{Version, VersionReq}; + +use crate::BinstallError; + +pub(crate) fn find_version<'a, V: Iterator>( + requirement: &str, + version_iter: V, +) -> Result { + // Parse version requirement + let version_req = VersionReq::parse(requirement).map_err(|err| BinstallError::VersionReq { + req: requirement.into(), + err, + })?; + + // Filter for matching versions + let filtered: BTreeSet<_> = version_iter + .filter_map(|v| { + // Remove leading `v` for git tags + let ver_str = match v.strip_prefix('s') { + Some(v) => v, + None => v, + }; + + // Parse out version + let ver = Version::parse(ver_str).ok()?; + debug!("Version: {:?}", ver); + + // Filter by version match + if version_req.matches(&ver) { + Some(ver) + } else { + None + } + }) + .collect(); + + debug!("Filtered: {:?}", filtered); + + // Return highest version + filtered + .iter() + .max() + .cloned() + .ok_or(BinstallError::VersionMismatch { req: version_req }) +} From 0eb9424f1774c6f22e7bce16cef5d0c0306de35e Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 22:47:47 +1000 Subject: [PATCH 12/65] Set vis of `find_version` to `pub(super)` Signed-off-by: Jiahao XU --- src/drivers/version.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/drivers/version.rs b/src/drivers/version.rs index d1d9fdf8..7d5f4a74 100644 --- a/src/drivers/version.rs +++ b/src/drivers/version.rs @@ -5,7 +5,7 @@ use semver::{Version, VersionReq}; use crate::BinstallError; -pub(crate) fn find_version<'a, V: Iterator>( +pub(super) fn find_version<'a, V: Iterator>( requirement: &str, version_iter: V, ) -> Result { From 0162f5f462789aa613f1b8edce202531d492b055 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 11 Jun 2022 22:53:34 +1000 Subject: [PATCH 13/65] Add doc for `TarEntriesVisitor` Signed-off-by: Jiahao XU --- src/helpers/extracter.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index 1175da8a..78d858c3 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -11,6 +11,8 @@ use zstd::stream::Decoder as ZstdDecoder; use crate::{BinstallError, TarBasedFmt}; +/// Visitor must iterate over all entries. +/// Entires can be in arbitary order. pub trait TarEntriesVisitor { fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError>; } From cb2be5a882018c10bcd90b9045ab8b2c48e0678e Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 01:46:10 +1000 Subject: [PATCH 14/65] Add new trait `PathExt` & impl for `Path` Signed-off-by: Jiahao XU --- src/helpers.rs | 3 +++ src/helpers/path_ext.rs | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 src/helpers/path_ext.rs diff --git a/src/helpers.rs b/src/helpers.rs index 7670b15b..643eda60 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -24,6 +24,9 @@ pub use extracter::TarEntriesVisitor; mod readable_rx; +mod path_ext; +pub use path_ext::*; + /// Load binstall metadata from the crate `Cargo.toml` at the provided path pub fn load_manifest_path>( manifest_path: P, diff --git a/src/helpers/path_ext.rs b/src/helpers/path_ext.rs new file mode 100644 index 00000000..d11c0e7e --- /dev/null +++ b/src/helpers/path_ext.rs @@ -0,0 +1,34 @@ +use std::path::{Component, Path, PathBuf}; + +trait PathExt { + fn normalize_path(&self) -> PathBuf; +} + +impl PathExt for Path { + fn normalize_path(&self) -> PathBuf { + let mut components = self.components().peekable(); + let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { + components.next(); + PathBuf::from(c.as_os_str()) + } else { + PathBuf::new() + }; + + for component in components { + match component { + Component::Prefix(..) => unreachable!(), + Component::RootDir => { + ret.push(component.as_os_str()); + } + Component::CurDir => {} + Component::ParentDir => { + ret.pop(); + } + Component::Normal(c) => { + ret.push(c); + } + } + } + ret + } +} From c6687edf481eb0c7186805aa7752a2e1e6bda8ce Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 01:54:15 +1000 Subject: [PATCH 15/65] Fix visbility of `PathExt`: Mark it as `pub` Signed-off-by: Jiahao XU --- src/helpers/path_ext.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/helpers/path_ext.rs b/src/helpers/path_ext.rs index d11c0e7e..c1261f96 100644 --- a/src/helpers/path_ext.rs +++ b/src/helpers/path_ext.rs @@ -1,6 +1,6 @@ use std::path::{Component, Path, PathBuf}; -trait PathExt { +pub trait PathExt { fn normalize_path(&self) -> PathBuf; } From 3c30722a06d60c35c40b46634cb2b52fb8ee0d2e Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 01:57:34 +1000 Subject: [PATCH 16/65] Impl new type `Vfs` which impl `AbstractFilesystem` Signed-off-by: Jiahao XU --- src/drivers.rs | 2 ++ src/drivers/vfs.rs | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 src/drivers/vfs.rs diff --git a/src/drivers.rs b/src/drivers.rs index c63d3044..5b053f25 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -8,6 +8,8 @@ pub use cratesio::*; mod version; use version::find_version; +mod vfs; + /// Fetch a crate by name and version from github /// TODO: implement this pub async fn fetch_crate_gh_releases( diff --git a/src/drivers/vfs.rs b/src/drivers/vfs.rs new file mode 100644 index 00000000..0051c44e --- /dev/null +++ b/src/drivers/vfs.rs @@ -0,0 +1,39 @@ +use std::collections::{hash_map::HashMap, hash_set::HashSet}; +use std::io; +use std::path::Path; + +use cargo_toml::AbstractFilesystem; + +use crate::helpers::PathExt; + +#[derive(Debug)] +pub(super) struct Vfs(HashMap, HashSet>>); + +impl Vfs { + pub(super) fn new() -> Self { + Self(HashMap::with_capacity(16)) + } + + /// * `path` - must be canonical, must not be empty and must + /// start with a prefix. + pub(super) fn add_path(&mut self, mut path: &Path) { + while let Some(parent) = path.parent() { + if let Some(path_str) = path.to_str() { + self.0 + .entry(parent.into()) + .or_insert_with(|| HashSet::with_capacity(4)) + .insert(path_str.into()); + } + + path = parent; + } + } +} + +impl AbstractFilesystem for Vfs { + fn file_names_in(&self, rel_path: &str) -> io::Result>> { + let rel_path = Path::new(rel_path).normalize_path(); + + Ok(self.0.get(&*rel_path).map(Clone::clone).unwrap_or_default()) + } +} From 44d43113f4eae01ea77a143fbf451f4dd088dbf8 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 16:08:41 +1000 Subject: [PATCH 17/65] Forward impl `AbstractFilesystem` for `&Vfs` Signed-off-by: Jiahao XU --- src/drivers/vfs.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/drivers/vfs.rs b/src/drivers/vfs.rs index 0051c44e..56aa5f54 100644 --- a/src/drivers/vfs.rs +++ b/src/drivers/vfs.rs @@ -37,3 +37,9 @@ impl AbstractFilesystem for Vfs { Ok(self.0.get(&*rel_path).map(Clone::clone).unwrap_or_default()) } } + +impl AbstractFilesystem for &Vfs { + fn file_names_in(&self, rel_path: &str) -> io::Result>> { + (*self).file_names_in(rel_path) + } +} From f3d3c488e31ff92957e2a3d814b105ee4a226920 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 16:31:47 +1000 Subject: [PATCH 18/65] Impl new type `ManifestVisitor Signed-off-by: Jiahao XU --- src/drivers.rs | 3 ++ src/drivers/visitor.rs | 80 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 src/drivers/visitor.rs diff --git a/src/drivers.rs b/src/drivers.rs index 5b053f25..8c5a9724 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -10,6 +10,9 @@ use version::find_version; mod vfs; +mod visitor; +pub use visitor::*; + /// Fetch a crate by name and version from github /// TODO: implement this pub async fn fetch_crate_gh_releases( diff --git a/src/drivers/visitor.rs b/src/drivers/visitor.rs new file mode 100644 index 00000000..005dbace --- /dev/null +++ b/src/drivers/visitor.rs @@ -0,0 +1,80 @@ +use std::io::Read; +use std::path::{Path, PathBuf}; + +use cargo_toml::Manifest; +use log::debug; +use tar::Entries; + +use super::vfs::Vfs; +use crate::{ + helpers::{PathExt, TarEntriesVisitor}, + BinstallError, Meta, +}; + +#[derive(Debug)] +pub struct ManifestVisitor { + cargo_toml_content: Vec, + /// manifest_dir_path is treated as the current dir. + manifest_dir_path: PathBuf, + + vfs: Vfs, +} + +impl ManifestVisitor { + pub(super) fn new(manifest_dir_path: PathBuf) -> Self { + Self { + // Cargo.toml is quite large usually. + cargo_toml_content: Vec::with_capacity(2000), + manifest_dir_path, + vfs: Vfs::new(), + } + } + + /// Load binstall metadata using the extracted information stored in memory. + pub fn load_manifest(&self) -> Result, BinstallError> { + debug!("Loading manifest directly from extracted file"); + + // Load and parse manifest + let mut manifest = Manifest::::from_slice_with_metadata(&self.cargo_toml_content)?; + + // Checks vfs for binary output names + manifest.complete_from_abstract_filesystem(&self.vfs)?; + + // Return metadata + Ok(manifest) + } +} + +impl TarEntriesVisitor for ManifestVisitor { + fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError> { + for res in entries { + let mut entry = res?; + let path = entry.path()?.normalize_path(); + + let path = if let Ok(path) = path.strip_prefix(&self.manifest_dir_path) { + path + } else { + // The path is outside of the curr dir (manifest dir), + // ignore it. + continue; + }; + + if path == Path::new("Cargo.toml") + || path == Path::new("src/main.rs") + || path.starts_with("src/bin") + { + self.vfs.add_path(path); + } + + if path == Path::new("Cargo.toml") { + // Since it is possible for the same Cargo.toml to appear + // multiple times using `tar --keep-old-files`, here we + // clear the buffer first before reading into it. + self.cargo_toml_content.clear(); + entry.read_to_end(&mut self.cargo_toml_content)?; + } + } + + Ok(()) + } +} From 5bb5d12949ef6b73448e849cc5ae1e0f2aa11159 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 16:37:53 +1000 Subject: [PATCH 19/65] Optimize `fetch_crate_cratesio` using `ManifestVisitor` and `download_tar_based_and_visit`. By using these two items, we avoid any I/O altogether. Everything happens in memory, thus there will be no i/o related errors or cost. This commit does not regress anything because `helpers::load_manifest_path` calls `Manifest::from_path_with_metadata`, which read in the whole `Cargo.toml` file at once. Thus this commit would not cause any OOM when the the original code would not. Signed-off-by: Jiahao XU --- src/drivers/cratesio.rs | 25 +++++++++---------------- src/main.rs | 9 +++++---- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/drivers/cratesio.rs b/src/drivers/cratesio.rs index 82b5044d..861b49cd 100644 --- a/src/drivers/cratesio.rs +++ b/src/drivers/cratesio.rs @@ -1,19 +1,20 @@ use std::path::{Path, PathBuf}; use std::time::Duration; +use cargo_toml::Manifest; use crates_io_api::AsyncClient; use log::debug; use url::Url; -use super::find_version; -use crate::{helpers::*, BinstallError, TarBasedFmt}; +use super::{find_version, ManifestVisitor}; +use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; /// Fetch a crate Cargo.toml by name and version from crates.io pub async fn fetch_crate_cratesio( name: &str, version_req: &str, temp_dir: &Path, -) -> Result { +) -> Result, BinstallError> { // Fetch / update index debug!("Looking up crate information"); @@ -59,22 +60,14 @@ pub async fn fetch_crate_cratesio( debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); - let crate_dir: PathBuf = format!("{name}-{version_name}").into(); - let crate_path = temp_dir.join(&crate_dir); + let manifest_dir_path: PathBuf = format!("{name}-{version_name}").into(); - let cargo_toml = crate_dir.join("Cargo.toml"); - let src = crate_dir.join("src"); - let main = src.join("main.rs"); - let bin = src.join("bin"); - - download_and_extract_with_filter( + download_tar_based_and_visit( Url::parse(&crate_url)?, TarBasedFmt::Tgz, &temp_dir, - move |path: &Path| path == cargo_toml || path == main || path.starts_with(&bin), + ManifestVisitor::new(manifest_dir_path), ) - .await?; - - // Return crate directory - Ok(crate_path) + .await? + .load_manifest() } diff --git a/src/main.rs b/src/main.rs index 5b89353c..b95a7111 100644 --- a/src/main.rs +++ b/src/main.rs @@ -210,13 +210,14 @@ async fn entry() -> Result<()> { // Fetch crate via crates.io, git, or use a local manifest path // TODO: work out which of these to do based on `opts.name` // TODO: support git-based fetches (whole repo name rather than just crate name) - let manifest_path = match opts.manifest_path.clone() { - Some(p) => p, + let manifest = match opts.manifest_path.clone() { + Some(manifest_path) => { + debug!("Reading manifest: {}", manifest_path.display()); + load_manifest_path(manifest_path.join("Cargo.toml"))? + } None => fetch_crate_cratesio(&opts.name, &opts.version, temp_dir.path()).await?, }; - debug!("Reading manifest: {}", manifest_path.display()); - let manifest = load_manifest_path(manifest_path.join("Cargo.toml"))?; let package = manifest.package.unwrap(); let is_plain_version = semver::Version::from_str(&opts.version).is_ok(); From e68eea35fec6597cc8e000d0fcfdc031283a335c Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 16:40:51 +1000 Subject: [PATCH 20/65] Mark type `ManifestVisitor` as `pub(super)` Since mod `cretesio` is the only one need to have access to it. Signed-off-by: Jiahao XU --- src/drivers.rs | 1 - src/drivers/cratesio.rs | 2 +- src/drivers/visitor.rs | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/drivers.rs b/src/drivers.rs index 8c5a9724..8b8f362f 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -11,7 +11,6 @@ use version::find_version; mod vfs; mod visitor; -pub use visitor::*; /// Fetch a crate by name and version from github /// TODO: implement this diff --git a/src/drivers/cratesio.rs b/src/drivers/cratesio.rs index 861b49cd..28a157ca 100644 --- a/src/drivers/cratesio.rs +++ b/src/drivers/cratesio.rs @@ -6,7 +6,7 @@ use crates_io_api::AsyncClient; use log::debug; use url::Url; -use super::{find_version, ManifestVisitor}; +use super::{find_version, visitor::ManifestVisitor}; use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; /// Fetch a crate Cargo.toml by name and version from crates.io diff --git a/src/drivers/visitor.rs b/src/drivers/visitor.rs index 005dbace..850d7c39 100644 --- a/src/drivers/visitor.rs +++ b/src/drivers/visitor.rs @@ -12,7 +12,7 @@ use crate::{ }; #[derive(Debug)] -pub struct ManifestVisitor { +pub(super) struct ManifestVisitor { cargo_toml_content: Vec, /// manifest_dir_path is treated as the current dir. manifest_dir_path: PathBuf, @@ -31,7 +31,7 @@ impl ManifestVisitor { } /// Load binstall metadata using the extracted information stored in memory. - pub fn load_manifest(&self) -> Result, BinstallError> { + pub(super) fn load_manifest(&self) -> Result, BinstallError> { debug!("Loading manifest directly from extracted file"); // Load and parse manifest From f82890cba396a1f0eb696ae1cf51089e96a7c403 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 16:42:32 +1000 Subject: [PATCH 21/65] Rm `download_and_extract_with_filter` Signed-off-by: Jiahao XU --- src/helpers.rs | 30 ------------------- src/helpers/async_extracter.rs | 53 +--------------------------------- 2 files changed, 1 insertion(+), 82 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index 643eda60..7a222d3f 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -86,36 +86,6 @@ pub async fn download_and_extract>( Ok(()) } -/// Download a file from the provided URL and extract part of it to -/// the provided path. -/// -/// * `filter` - It will pass the path of the file to it -/// and only extract ones which filter returns `true`. -pub async fn download_and_extract_with_filter< - Filter: FnMut(&Path) -> bool + Send + 'static, - P: AsRef, ->( - url: Url, - fmt: TarBasedFmt, - path: P, - filter: Filter, -) -> Result<(), BinstallError> { - debug!("Downloading from: '{url}'"); - - let resp = create_request(url).await?; - - let path = path.as_ref(); - debug!("Downloading to file: '{}'", path.display()); - - let stream = resp.bytes_stream(); - - extract_tar_based_stream_with_filter(stream, path, fmt, filter).await?; - - debug!("Download OK, written to file: '{}'", path.display()); - - Ok(()) -} - /// Download a file from the provided URL and extract part of it to /// the provided path. /// diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index cdf8ffea..8564d19e 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -1,12 +1,10 @@ use std::fmt::Debug; use std::fs; use std::io::{self, Read, Seek, Write}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; +use std::path::Path; use bytes::Bytes; use futures_util::stream::{Stream, StreamExt}; -use log::debug; use scopeguard::{guard, ScopeGuard}; use tar::Entries; use tempfile::tempfile; @@ -203,55 +201,6 @@ where .await } -/// * `filter` - If Some, then it will pass the path of the file to it -/// and only extract ones which filter returns `true`. -pub async fn extract_tar_based_stream_with_filter< - Filter: FnMut(&Path) -> bool + Send + 'static, - E, ->( - stream: impl Stream> + Unpin, - output: &Path, - fmt: TarBasedFmt, - filter: Filter, -) -> Result<(), BinstallError> -where - BinstallError: From, -{ - struct Visitor(F, Arc); - - impl bool + Send + 'static> TarEntriesVisitor for Visitor { - fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError> { - for res in entries { - let mut entry = res?; - let entry_path = entry.path()?; - - if self.0(&entry_path) { - debug!("Extracting {entry_path:#?}"); - - let dst = self.1.join(entry_path); - - fs::create_dir_all(dst.parent().unwrap())?; - - entry.unpack(dst)?; - } - } - - Ok(()) - } - } - - let path = Arc::new(output.to_owned()); - - let visitor = Visitor(filter, path.clone()); - - extract_impl(stream, move |mut rx| { - fs::create_dir_all(path.parent().unwrap())?; - - extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, &*path, Some(visitor)) - }) - .await -} - pub async fn extract_tar_based_stream( stream: impl Stream> + Unpin, output: &Path, From b2c34137ccc70b9c2ea1bebf1161fff852996ec9 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 16:56:41 +1000 Subject: [PATCH 22/65] Mark `extract_compressed_from_readable` & `unzip` to be `pub(super)` Signed-off-by: Jiahao XU --- src/helpers/extracter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index 78d858c3..c24634cf 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -54,7 +54,7 @@ fn untar( /// and only extract ones which filter returns `true`. /// Note that this is a best-effort and it only works when `fmt` /// is not `PkgFmt::Bin` or `PkgFmt::Zip`. -pub(crate) fn extract_compressed_from_readable( +pub(super) fn extract_compressed_from_readable( dat: R, fmt: TarBasedFmt, path: &Path, @@ -99,7 +99,7 @@ pub(crate) fn extract_compressed_from_readable Ok(()) } -pub(crate) fn unzip(dat: File, dst: &Path) -> Result<(), BinstallError> { +pub(super) fn unzip(dat: File, dst: &Path) -> Result<(), BinstallError> { debug!("Decompressing from zip archive to `{dst:?}`"); let mut zip = ZipArchive::new(dat)?; From 17fcac7e63828195728238c368fd6c547ed9059d Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 17:03:39 +1000 Subject: [PATCH 23/65] Refactor: Simplify `untar` with new enum `Op` Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 10 ++----- src/helpers/extracter.rs | 52 ++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 8564d19e..80ed397c 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -225,8 +225,7 @@ where extract_compressed_from_readable::( ReadableRx::new(&mut rx), fmt, - &path, - None, + Op::UnpackToPath(&path), ) }) .await @@ -246,12 +245,7 @@ where extract_impl(stream, move |mut rx| { fs::create_dir_all(path.parent().unwrap())?; - extract_compressed_from_readable( - ReadableRx::new(&mut rx), - fmt, - &*path, - Some(&mut visitor), - )?; + extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, Op::Visit(&mut visitor))?; Ok(visitor) }) diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index c24634cf..12133da2 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -23,23 +23,28 @@ impl TarEntriesVisitor for &mut V { } } +#[derive(Debug)] +pub(super) enum Op<'a, V: TarEntriesVisitor> { + UnpackToPath(&'a Path), + Visit(V), +} + /// * `f` - If Some, then this function will pass /// the entries of the `dat` to it and let it decides /// what to do with the tar. -fn untar( - dat: R, - path: &Path, - visitor: Option, -) -> Result<(), BinstallError> { +fn untar(dat: R, op: Op<'_, V>) -> Result<(), BinstallError> { let mut tar = Archive::new(dat); - if let Some(mut visitor) = visitor { - debug!("Untaring with filter"); + match op { + Op::Visit(mut visitor) => { + debug!("Untaring with callback"); - visitor.visit(tar.entries()?)?; - } else { - debug!("Untaring entire tar"); - tar.unpack(path)?; + visitor.visit(tar.entries()?)?; + } + Op::UnpackToPath(path) => { + debug!("Untaring entire tar"); + tar.unpack(path)?; + } } debug!("Untaring completed"); @@ -57,42 +62,47 @@ fn untar( pub(super) fn extract_compressed_from_readable( dat: R, fmt: TarBasedFmt, - path: &Path, - visitor: Option, + op: Op<'_, V>, ) -> Result<(), BinstallError> { use TarBasedFmt::*; + let msg = if let Op::UnpackToPath(path) = op { + format!("destination: {path:#?}") + } else { + "process in-memory".to_string() + }; + match fmt { Tar => { // Extract to install dir - debug!("Extracting from tar archive to `{path:?}`"); + debug!("Extracting from tar archive: {msg}"); - untar(dat, path, visitor)? + untar(dat, op)? } Tgz => { // Extract to install dir - debug!("Decompressing from tgz archive to `{path:?}`"); + debug!("Decompressing from tgz archive {msg}"); let tar = GzDecoder::new(dat); - untar(tar, path, visitor)?; + untar(tar, op)?; } Txz => { // Extract to install dir - debug!("Decompressing from txz archive to `{path:?}`"); + debug!("Decompressing from txz archive {msg}"); let tar = XzDecoder::new(dat); - untar(tar, path, visitor)?; + untar(tar, op)?; } Tzstd => { // Extract to install dir - debug!("Decompressing from tzstd archive to `{path:?}`"); + debug!("Decompressing from tzstd archive {msg}"); // The error can only come from raw::Decoder::with_dictionary // as of zstd 0.10.2 and 0.11.2, which is specified // as &[] by ZstdDecoder::new, thus ZstdDecoder::new // should not return any error. let tar = ZstdDecoder::with_buffer(dat)?; - untar(tar, path, visitor)?; + untar(tar, op)?; } }; From e376b71cf4d5f916be716096884b6a46ff006c0b Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 17:05:21 +1000 Subject: [PATCH 24/65] Simplify `extract_tar_based_stream_and_visit` Rm unused param `path` and the unnecessary `fs::create_dir_all` since the tar will not be extracted to disk. Signed-off-by: Jiahao XU --- src/helpers.rs | 2 +- src/helpers/async_extracter.rs | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index 7a222d3f..a4899bf9 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -109,7 +109,7 @@ pub async fn download_tar_based_and_visit< let stream = resp.bytes_stream(); - let visitor = extract_tar_based_stream_and_visit(stream, path, fmt, visitor).await?; + let visitor = extract_tar_based_stream_and_visit(stream, fmt, visitor).await?; debug!("Download OK, written to file: '{}'", path.display()); diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 80ed397c..3d948de3 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -233,21 +233,15 @@ where pub async fn extract_tar_based_stream_and_visit( stream: impl Stream> + Unpin, - output: &Path, fmt: TarBasedFmt, mut visitor: V, ) -> Result where BinstallError: From, { - let path = output.to_owned(); - extract_impl(stream, move |mut rx| { - fs::create_dir_all(path.parent().unwrap())?; - - extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, Op::Visit(&mut visitor))?; - - Ok(visitor) + extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, Op::Visit(&mut visitor)) + .map(|_| visitor) }) .await } From f25306ff97de8320a9ba3a1835b57f6ca00aa695 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 17:07:29 +1000 Subject: [PATCH 25/65] Simplify `download_tar_based_and_visit`: Rm unused param `path` Signed-off-by: Jiahao XU --- src/drivers/cratesio.rs | 1 - src/helpers.rs | 11 +++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/drivers/cratesio.rs b/src/drivers/cratesio.rs index 28a157ca..eb47a6b0 100644 --- a/src/drivers/cratesio.rs +++ b/src/drivers/cratesio.rs @@ -65,7 +65,6 @@ pub async fn fetch_crate_cratesio( download_tar_based_and_visit( Url::parse(&crate_url)?, TarBasedFmt::Tgz, - &temp_dir, ManifestVisitor::new(manifest_dir_path), ) .await? diff --git a/src/helpers.rs b/src/helpers.rs index a4899bf9..0667383c 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -91,27 +91,22 @@ pub async fn download_and_extract>( /// /// * `filter` - If Some, then it will pass the path of the file to it /// and only extract ones which filter returns `true`. -pub async fn download_tar_based_and_visit< - V: TarEntriesVisitor + Debug + Send + 'static, - P: AsRef, ->( +pub async fn download_tar_based_and_visit( url: Url, fmt: TarBasedFmt, - path: P, visitor: V, ) -> Result { debug!("Downloading from: '{url}'"); let resp = create_request(url).await?; - let path = path.as_ref(); - debug!("Downloading to file: '{}'", path.display()); + debug!("Downloading and extracting then in-memory processing"); let stream = resp.bytes_stream(); let visitor = extract_tar_based_stream_and_visit(stream, fmt, visitor).await?; - debug!("Download OK, written to file: '{}'", path.display()); + debug!("Download, extraction and in-memory procession OK"); Ok(visitor) } From e39549f470fd6ef7d968a9e4cc26581f57f37acf Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 17:08:22 +1000 Subject: [PATCH 26/65] Improve `debug!` logging in `download_and_extract` Signed-off-by: Jiahao XU --- src/helpers.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index 0667383c..a088b99e 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -71,7 +71,7 @@ pub async fn download_and_extract>( let resp = create_request(url).await?; let path = path.as_ref(); - debug!("Downloading to file: '{}'", path.display()); + debug!("Downloading and extracting to: '{}'", path.display()); let stream = resp.bytes_stream(); @@ -81,7 +81,7 @@ pub async fn download_and_extract>( PkgFmtDecomposed::Zip => extract_zip(stream, path).await?, } - debug!("Download OK, written to file: '{}'", path.display()); + debug!("Download OK, extracted to: '{}'", path.display()); Ok(()) } From 24b1941c1a79eb29d00d9b19b5c9a971fc437c32 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 17:09:17 +1000 Subject: [PATCH 27/65] Simplify `fetch_crate_cratesio`: Rm unused param `temp_dir` Signed-off-by: Jiahao XU --- src/drivers/cratesio.rs | 3 +-- src/main.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/drivers/cratesio.rs b/src/drivers/cratesio.rs index eb47a6b0..0bc6027b 100644 --- a/src/drivers/cratesio.rs +++ b/src/drivers/cratesio.rs @@ -1,4 +1,4 @@ -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::time::Duration; use cargo_toml::Manifest; @@ -13,7 +13,6 @@ use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; pub async fn fetch_crate_cratesio( name: &str, version_req: &str, - temp_dir: &Path, ) -> Result, BinstallError> { // Fetch / update index debug!("Looking up crate information"); diff --git a/src/main.rs b/src/main.rs index b95a7111..f9136157 100644 --- a/src/main.rs +++ b/src/main.rs @@ -215,7 +215,7 @@ async fn entry() -> Result<()> { debug!("Reading manifest: {}", manifest_path.display()); load_manifest_path(manifest_path.join("Cargo.toml"))? } - None => fetch_crate_cratesio(&opts.name, &opts.version, temp_dir.path()).await?, + None => fetch_crate_cratesio(&opts.name, &opts.version).await?, }; let package = manifest.package.unwrap(); From 989be49cb0faa620b6b6c56e1e30e5514fcfc590 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 17:13:48 +1000 Subject: [PATCH 28/65] Fix confusing doc of `Vfs::add_path` Signed-off-by: Jiahao XU --- src/drivers/vfs.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/drivers/vfs.rs b/src/drivers/vfs.rs index 56aa5f54..08178b06 100644 --- a/src/drivers/vfs.rs +++ b/src/drivers/vfs.rs @@ -14,8 +14,7 @@ impl Vfs { Self(HashMap::with_capacity(16)) } - /// * `path` - must be canonical, must not be empty and must - /// start with a prefix. + /// * `path` - must be canonical, must not be empty. pub(super) fn add_path(&mut self, mut path: &Path) { while let Some(parent) = path.parent() { if let Some(path_str) = path.to_str() { From b879c15c702acc817d6be7a9536b6afa6a764967 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 17:22:20 +1000 Subject: [PATCH 29/65] Update doc of `PathExt::normalize_path` Signed-off-by: Jiahao XU --- src/helpers/path_ext.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/helpers/path_ext.rs b/src/helpers/path_ext.rs index c1261f96..c4958539 100644 --- a/src/helpers/path_ext.rs +++ b/src/helpers/path_ext.rs @@ -1,6 +1,8 @@ use std::path::{Component, Path, PathBuf}; pub trait PathExt { + /// Similiar to `os.path.normpath`: It does not perform + /// any fs operation. fn normalize_path(&self) -> PathBuf; } From b88e384f9535f84ca782fe0eace1359542c4cca3 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 19:17:06 +1000 Subject: [PATCH 30/65] Fix `Vfs::add_path`: Add insert `filename` instead of `path` into the `HashSet>` Signed-off-by: Jiahao XU --- src/drivers/vfs.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/drivers/vfs.rs b/src/drivers/vfs.rs index 08178b06..d43b3a8a 100644 --- a/src/drivers/vfs.rs +++ b/src/drivers/vfs.rs @@ -17,11 +17,14 @@ impl Vfs { /// * `path` - must be canonical, must not be empty. pub(super) fn add_path(&mut self, mut path: &Path) { while let Some(parent) = path.parent() { - if let Some(path_str) = path.to_str() { + // Since path has parent, it must have a filename + let filename = path.file_name().unwrap(); + + if let Some(filename) = filename.to_str() { self.0 .entry(parent.into()) .or_insert_with(|| HashSet::with_capacity(4)) - .insert(path_str.into()); + .insert(filename.into()); } path = parent; From bd39ce754fa2c0a225d5634efa7943c527874f91 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 19:23:03 +1000 Subject: [PATCH 31/65] Fix `Vfs::add_path`: Use `to_string_lossy` instead of `to_str` to be compatible with the implementation in `cargo_toml`: https://docs.rs/cargo_toml/0.11.5/src/cargo_toml/afs.rs.html#24 Signed-off-by: Jiahao XU --- src/drivers/vfs.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/drivers/vfs.rs b/src/drivers/vfs.rs index d43b3a8a..18c422de 100644 --- a/src/drivers/vfs.rs +++ b/src/drivers/vfs.rs @@ -20,12 +20,14 @@ impl Vfs { // Since path has parent, it must have a filename let filename = path.file_name().unwrap(); - if let Some(filename) = filename.to_str() { - self.0 - .entry(parent.into()) - .or_insert_with(|| HashSet::with_capacity(4)) - .insert(filename.into()); - } + // `cargo_toml`'s implementation does the same thing. + // https://docs.rs/cargo_toml/0.11.5/src/cargo_toml/afs.rs.html#24 + let filename = filename.to_string_lossy(); + + self.0 + .entry(parent.into()) + .or_insert_with(|| HashSet::with_capacity(4)) + .insert(filename.into()); path = parent; } From 3a1038c80b0f69564d00ebd2cb6c57854c714f41 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 19:44:59 +1000 Subject: [PATCH 32/65] Optimize binary size/compilation time by reducing generics monomorphization using `Box`. Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 89 ++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 3d948de3..7073d824 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -109,14 +109,9 @@ impl AsyncExtracterInner { } } -async fn extract_impl< - F: FnOnce(mpsc::Receiver) -> Result + Send + 'static, - T: Debug + Send + 'static, - S: Stream> + Unpin, - E, ->( +async fn extract_impl> + Unpin, E>( mut stream: S, - f: F, + f: Box) -> Result + Send>, ) -> Result where BinstallError: From, @@ -155,25 +150,28 @@ where { let path = output.to_owned(); - extract_impl(stream, move |mut rx| { - fs::create_dir_all(path.parent().unwrap())?; + extract_impl( + stream, + Box::new(move |mut rx| { + fs::create_dir_all(path.parent().unwrap())?; - let mut file = fs::File::create(&path)?; + let mut file = fs::File::create(&path)?; - // remove it unless the operation isn't aborted and no write - // fails. - let remove_guard = guard(&path, |path| { - fs::remove_file(path).ok(); - }); + // remove it unless the operation isn't aborted and no write + // fails. + let remove_guard = guard(&path, |path| { + fs::remove_file(path).ok(); + }); - read_into_file(&mut file, &mut rx)?; + read_into_file(&mut file, &mut rx)?; - // Operation isn't aborted and all writes succeed, - // disarm the remove_guard. - ScopeGuard::into_inner(remove_guard); + // Operation isn't aborted and all writes succeed, + // disarm the remove_guard. + ScopeGuard::into_inner(remove_guard); - Ok(()) - }) + Ok(()) + }), + ) .await } @@ -186,18 +184,21 @@ where { let path = output.to_owned(); - extract_impl(stream, move |mut rx| { - fs::create_dir_all(path.parent().unwrap())?; + extract_impl( + stream, + Box::new(move |mut rx| { + fs::create_dir_all(path.parent().unwrap())?; - let mut file = tempfile()?; + let mut file = tempfile()?; - read_into_file(&mut file, &mut rx)?; + read_into_file(&mut file, &mut rx)?; - // rewind it so that we can pass it to unzip - file.rewind()?; + // rewind it so that we can pass it to unzip + file.rewind()?; - unzip(file, &path) - }) + unzip(file, &path) + }), + ) .await } @@ -219,15 +220,18 @@ where let path = output.to_owned(); - extract_impl(stream, move |mut rx| { - fs::create_dir_all(path.parent().unwrap())?; + extract_impl( + stream, + Box::new(move |mut rx| { + fs::create_dir_all(path.parent().unwrap())?; - extract_compressed_from_readable::( - ReadableRx::new(&mut rx), - fmt, - Op::UnpackToPath(&path), - ) - }) + extract_compressed_from_readable::( + ReadableRx::new(&mut rx), + fmt, + Op::UnpackToPath(&path), + ) + }), + ) .await } @@ -239,9 +243,12 @@ pub async fn extract_tar_based_stream_and_visit, { - extract_impl(stream, move |mut rx| { - extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, Op::Visit(&mut visitor)) - .map(|_| visitor) - }) + extract_impl( + stream, + Box::new(move |mut rx| { + extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, Op::Visit(&mut visitor)) + .map(|_| visitor) + }), + ) .await } From baf9784b82344e4349c27d252e0c0c88aa922125 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 19:52:03 +1000 Subject: [PATCH 33/65] Update doc of mod `async_extracter` Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 35 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 7073d824..16cf6c03 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -1,3 +1,19 @@ +//! # Advantages +//! +//! Using this mod has the following advantages over downloading +//! and extracting in on the async thread: +//! +//! - The code is pipelined instead of storing the downloaded file in memory +//! and extract it, except for `PkgFmt::Zip`, since `ZipArchiver::new` +//! requires `std::io::Seek`, so it fallbacks to writing the a file then +//! unzip it. +//! - The async part (downloading) and the extracting part runs in parallel +//! using `tokio::spawn_nonblocking`. +//! - Compressing/writing which takes a lot of CPU time will not block +//! the runtime anymore. +//! - For all `tar` based formats, it can extract only specified files and +//! process them in memory, without any disk I/O. + use std::fmt::Debug; use std::fs; use std::io::{self, Read, Seek, Write}; @@ -29,25 +45,6 @@ pub(crate) enum Content { /// /// After all write is done, you must call `AsyncExtracter::done`, /// otherwise the extracted content will be removed on drop. -/// -/// # Advantages -/// -/// `download_and_extract` has the following advantages over downloading -/// plus extracting in on the same thread: -/// -/// - The code is pipelined instead of storing the downloaded file in memory -/// and extract it, except for `PkgFmt::Zip`, since `ZipArchiver::new` -/// requires `std::io::Seek`, so it fallbacks to writing the a file then -/// unzip it. -/// - The async part (downloading) and the extracting part runs in parallel -/// using `tokio::spawn_nonblocking`. -/// - Compressing/writing which takes a lot of CPU time will not block -/// the runtime anymore. -/// - For any PkgFmt except for `PkgFmt::Zip` and `PkgFmt::Bin` (basically -/// all `tar` based formats), it can extract only specified files. -/// This means that `super::drivers::fetch_crate_cratesio` no longer need to -/// extract the whole crate and write them to disk, it now only extract the -/// relevant part (`Cargo.toml`) out to disk and open it. #[derive(Debug)] struct AsyncExtracterInner { /// Use AutoAbortJoinHandle so that the task From 225cf74cd9601ee5829bf89de24eacf801787f93 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 20:01:43 +1000 Subject: [PATCH 34/65] Refactor: Ret `impl Stream` in `create_request` Since both `download*` function takes a `impl Stream` and the `Response::bytes_stream` takes `Response` by value, thus there is no lifetime issue and we can return `impl Stream` instead of `Response` Signed-off-by: Jiahao XU --- src/helpers.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index a088b99e..721716bb 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -1,7 +1,9 @@ use std::fmt::Debug; use std::path::{Path, PathBuf}; +use bytes::Bytes; use cargo_toml::Manifest; +use futures_util::stream::Stream; use log::debug; use reqwest::{Method, Response}; use serde::Serialize; @@ -49,7 +51,9 @@ pub async fn remote_exists(url: Url, method: Method) -> Result Result { +async fn create_request( + url: Url, +) -> Result>, BinstallError> { reqwest::get(url.clone()) .await .and_then(|r| r.error_for_status()) @@ -58,6 +62,7 @@ async fn create_request(url: Url) -> Result { url, err, }) + .map(Response::bytes_stream) } /// Download a file from the provided URL and extract it to the provided path. @@ -68,13 +73,11 @@ pub async fn download_and_extract>( ) -> Result<(), BinstallError> { debug!("Downloading from: '{url}'"); - let resp = create_request(url).await?; + let stream = create_request(url).await?; let path = path.as_ref(); debug!("Downloading and extracting to: '{}'", path.display()); - let stream = resp.bytes_stream(); - match fmt.decompose() { PkgFmtDecomposed::Tar(fmt) => extract_tar_based_stream(stream, path, fmt).await?, PkgFmtDecomposed::Bin => extract_bin(stream, path).await?, @@ -98,12 +101,10 @@ pub async fn download_tar_based_and_visit Result { debug!("Downloading from: '{url}'"); - let resp = create_request(url).await?; + let stream = create_request(url).await?; debug!("Downloading and extracting then in-memory processing"); - let stream = resp.bytes_stream(); - let visitor = extract_tar_based_stream_and_visit(stream, fmt, visitor).await?; debug!("Download, extraction and in-memory procession OK"); From 94c77c32b4946f256cc45edfc8672846122835fd Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 22:38:00 +1000 Subject: [PATCH 35/65] Make `debug!` message more consistent in `extract_compressed_from_readable` Signed-off-by: Jiahao XU --- src/helpers/extracter.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index 12133da2..cbf65802 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -81,21 +81,21 @@ pub(super) fn extract_compressed_from_readable } Tgz => { // Extract to install dir - debug!("Decompressing from tgz archive {msg}"); + debug!("Decompressing from tgz archive: {msg}"); let tar = GzDecoder::new(dat); untar(tar, op)?; } Txz => { // Extract to install dir - debug!("Decompressing from txz archive {msg}"); + debug!("Decompressing from txz archive: {msg}"); let tar = XzDecoder::new(dat); untar(tar, op)?; } Tzstd => { // Extract to install dir - debug!("Decompressing from tzstd archive {msg}"); + debug!("Decompressing from tzstd archive: {msg}"); // The error can only come from raw::Decoder::with_dictionary // as of zstd 0.10.2 and 0.11.2, which is specified From c9b0c0c59c45b40065d7928b053dcec70f0203eb Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sun, 12 Jun 2022 22:38:34 +1000 Subject: [PATCH 36/65] Add `.DS_Store` to `.gitignore` Signed-off-by: Jiahao XU --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ea8c4bf7..05923927 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.DS_Store From b4e61161f2f5f650fa6e84cf018c639304de6778 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Mon, 13 Jun 2022 00:53:08 +1000 Subject: [PATCH 37/65] Derive `strum_macros::Display` on `TarBasedFmt` sp that it can be printed. Signed-off-by: Jiahao XU --- src/fmt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmt.rs b/src/fmt.rs index 34a9dc1e..840ffc6a 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -50,7 +50,7 @@ pub enum PkgFmtDecomposed { Zip, } -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Display, Copy, Clone, PartialEq)] pub enum TarBasedFmt { /// Download format is TAR (uncompressed) Tar, From 8ef1e56fcceba6adaec43826fa4a490b16ab1eb9 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Mon, 13 Jun 2022 01:05:07 +1000 Subject: [PATCH 38/65] Take `Receiver` by value in `ReadableRx::new` It would remove the lifetime and make reasoning the code much easier. It would also unblock the next commit I am going to make. Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 8 ++++---- src/helpers/readable_rx.rs | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 16cf6c03..04a6117b 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -219,11 +219,11 @@ where extract_impl( stream, - Box::new(move |mut rx| { + Box::new(move |rx| { fs::create_dir_all(path.parent().unwrap())?; extract_compressed_from_readable::( - ReadableRx::new(&mut rx), + ReadableRx::new(rx), fmt, Op::UnpackToPath(&path), ) @@ -242,8 +242,8 @@ where { extract_impl( stream, - Box::new(move |mut rx| { - extract_compressed_from_readable(ReadableRx::new(&mut rx), fmt, Op::Visit(&mut visitor)) + Box::new(move |rx| { + extract_compressed_from_readable(ReadableRx::new(rx), fmt, Op::Visit(&mut visitor)) .map(|_| visitor) }), ) diff --git a/src/helpers/readable_rx.rs b/src/helpers/readable_rx.rs index 545bc176..e2597aae 100644 --- a/src/helpers/readable_rx.rs +++ b/src/helpers/readable_rx.rs @@ -7,13 +7,13 @@ use tokio::sync::mpsc::Receiver; use super::async_extracter::Content; #[derive(Debug)] -pub(crate) struct ReadableRx<'a> { - rx: &'a mut Receiver, +pub(crate) struct ReadableRx { + rx: Receiver, bytes: Bytes, } -impl<'a> ReadableRx<'a> { - pub(crate) fn new(rx: &'a mut Receiver) -> Self { +impl ReadableRx { + pub(crate) fn new(rx: Receiver) -> Self { Self { rx, bytes: Bytes::new(), @@ -21,7 +21,7 @@ impl<'a> ReadableRx<'a> { } } -impl Read for ReadableRx<'_> { +impl Read for ReadableRx { fn read(&mut self, buf: &mut [u8]) -> io::Result { if buf.is_empty() { return Ok(0); @@ -43,7 +43,7 @@ impl Read for ReadableRx<'_> { } } -impl BufRead for ReadableRx<'_> { +impl BufRead for ReadableRx { fn fill_buf(&mut self) -> io::Result<&[u8]> { let bytes = &mut self.bytes; if !bytes.has_remaining() { From 9584c8d35e7c69939935b7b11e3befe0a50e5a86 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Mon, 13 Jun 2022 01:06:12 +1000 Subject: [PATCH 39/65] Refactor: Extract `create_tar_decoder` from `extract_compressed_from_readable`. Signed-off-by: Jiahao XU --- src/helpers/extracter.rs | 70 +++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index cbf65802..5994882f 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -32,9 +32,10 @@ pub(super) enum Op<'a, V: TarEntriesVisitor> { /// * `f` - If Some, then this function will pass /// the entries of the `dat` to it and let it decides /// what to do with the tar. -fn untar(dat: R, op: Op<'_, V>) -> Result<(), BinstallError> { - let mut tar = Archive::new(dat); - +fn untar( + mut tar: Archive, + op: Op<'_, V>, +) -> Result<(), BinstallError> { match op { Op::Visit(mut visitor) => { debug!("Untaring with callback"); @@ -52,6 +53,28 @@ fn untar(dat: R, op: Op<'_, V>) -> Result<(), Bin Ok(()) } +pub(super) fn create_tar_decoder( + dat: impl BufRead + 'static, + fmt: TarBasedFmt, +) -> Result>, BinstallError> { + use TarBasedFmt::*; + + let r: Box = match fmt { + Tar => Box::new(dat), + Tgz => Box::new(GzDecoder::new(dat)), + Txz => Box::new(XzDecoder::new(dat)), + Tzstd => { + // The error can only come from raw::Decoder::with_dictionary + // as of zstd 0.10.2 and 0.11.2, which is specified + // as &[] by ZstdDecoder::new, thus ZstdDecoder::new + // should not return any error. + Box::new(ZstdDecoder::with_buffer(dat)?) + } + }; + + Ok(Archive::new(r)) +} + /// Extract files from the specified source onto the specified path. /// /// * `fmt` - must not be `PkgFmt::Bin` or `PkgFmt::Zip`. @@ -59,54 +82,21 @@ fn untar(dat: R, op: Op<'_, V>) -> Result<(), Bin /// and only extract ones which filter returns `true`. /// Note that this is a best-effort and it only works when `fmt` /// is not `PkgFmt::Bin` or `PkgFmt::Zip`. -pub(super) fn extract_compressed_from_readable( +pub(super) fn extract_compressed_from_readable( dat: R, fmt: TarBasedFmt, op: Op<'_, V>, ) -> Result<(), BinstallError> { - use TarBasedFmt::*; - let msg = if let Op::UnpackToPath(path) = op { format!("destination: {path:#?}") } else { "process in-memory".to_string() }; - match fmt { - Tar => { - // Extract to install dir - debug!("Extracting from tar archive: {msg}"); + debug!("Extracting from {fmt} archive: {msg}"); - untar(dat, op)? - } - Tgz => { - // Extract to install dir - debug!("Decompressing from tgz archive: {msg}"); - - let tar = GzDecoder::new(dat); - untar(tar, op)?; - } - Txz => { - // Extract to install dir - debug!("Decompressing from txz archive: {msg}"); - - let tar = XzDecoder::new(dat); - untar(tar, op)?; - } - Tzstd => { - // Extract to install dir - debug!("Decompressing from tzstd archive: {msg}"); - - // The error can only come from raw::Decoder::with_dictionary - // as of zstd 0.10.2 and 0.11.2, which is specified - // as &[] by ZstdDecoder::new, thus ZstdDecoder::new - // should not return any error. - let tar = ZstdDecoder::with_buffer(dat)?; - untar(tar, op)?; - } - }; - - Ok(()) + let tar = create_tar_decoder(dat, fmt)?; + untar(tar, op) } pub(super) fn unzip(dat: File, dst: &Path) -> Result<(), BinstallError> { From 467f7f68341245f56bcfda85568928c430b7bfc9 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Mon, 13 Jun 2022 01:12:21 +1000 Subject: [PATCH 40/65] Refactor: Call `create_tar_decoder` directly in `extract_tar_based_stream*`. Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 17 ++++++----- src/helpers/extracter.rs | 54 ---------------------------------- 2 files changed, 10 insertions(+), 61 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 04a6117b..2ae31a23 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -21,6 +21,7 @@ use std::path::Path; use bytes::Bytes; use futures_util::stream::{Stream, StreamExt}; +use log::debug; use scopeguard::{guard, ScopeGuard}; use tar::Entries; use tempfile::tempfile; @@ -222,11 +223,10 @@ where Box::new(move |rx| { fs::create_dir_all(path.parent().unwrap())?; - extract_compressed_from_readable::( - ReadableRx::new(rx), - fmt, - Op::UnpackToPath(&path), - ) + debug!("Extracting from {fmt} archive to {path:#?}"); + create_tar_decoder(ReadableRx::new(rx), fmt)?.unpack(path)?; + + Ok(()) }), ) .await @@ -243,8 +243,11 @@ where extract_impl( stream, Box::new(move |rx| { - extract_compressed_from_readable(ReadableRx::new(rx), fmt, Op::Visit(&mut visitor)) - .map(|_| visitor) + debug!("Extracting from {fmt} archive to process it in memory"); + + let mut tar = create_tar_decoder(ReadableRx::new(rx), fmt)?; + visitor.visit(tar.entries()?)?; + Ok(visitor) }), ) .await diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index 5994882f..ee5129b3 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -23,36 +23,6 @@ impl TarEntriesVisitor for &mut V { } } -#[derive(Debug)] -pub(super) enum Op<'a, V: TarEntriesVisitor> { - UnpackToPath(&'a Path), - Visit(V), -} - -/// * `f` - If Some, then this function will pass -/// the entries of the `dat` to it and let it decides -/// what to do with the tar. -fn untar( - mut tar: Archive, - op: Op<'_, V>, -) -> Result<(), BinstallError> { - match op { - Op::Visit(mut visitor) => { - debug!("Untaring with callback"); - - visitor.visit(tar.entries()?)?; - } - Op::UnpackToPath(path) => { - debug!("Untaring entire tar"); - tar.unpack(path)?; - } - } - - debug!("Untaring completed"); - - Ok(()) -} - pub(super) fn create_tar_decoder( dat: impl BufRead + 'static, fmt: TarBasedFmt, @@ -75,30 +45,6 @@ pub(super) fn create_tar_decoder( Ok(Archive::new(r)) } -/// Extract files from the specified source onto the specified path. -/// -/// * `fmt` - must not be `PkgFmt::Bin` or `PkgFmt::Zip`. -/// * `filter` - If Some, then it will pass the path of the file to it -/// and only extract ones which filter returns `true`. -/// Note that this is a best-effort and it only works when `fmt` -/// is not `PkgFmt::Bin` or `PkgFmt::Zip`. -pub(super) fn extract_compressed_from_readable( - dat: R, - fmt: TarBasedFmt, - op: Op<'_, V>, -) -> Result<(), BinstallError> { - let msg = if let Op::UnpackToPath(path) = op { - format!("destination: {path:#?}") - } else { - "process in-memory".to_string() - }; - - debug!("Extracting from {fmt} archive: {msg}"); - - let tar = create_tar_decoder(dat, fmt)?; - untar(tar, op) -} - pub(super) fn unzip(dat: File, dst: &Path) -> Result<(), BinstallError> { debug!("Decompressing from zip archive to `{dst:?}`"); From 2091345ce029ecae1843c8ba0de7a35bb0960dae Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Mon, 13 Jun 2022 01:14:22 +1000 Subject: [PATCH 41/65] Refactor: Mv `TarEntriesVisitor` to mod `async_extracter` Signed-off-by: Jiahao XU --- src/helpers.rs | 1 - src/helpers/async_extracter.rs | 12 ++++++++++++ src/helpers/extracter.rs | 14 +------------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index 721716bb..060aad8c 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -22,7 +22,6 @@ mod ui_thread; pub use ui_thread::UIThread; mod extracter; -pub use extracter::TarEntriesVisitor; mod readable_rx; diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 2ae31a23..2b1298bf 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -232,6 +232,18 @@ where .await } +/// Visitor must iterate over all entries. +/// Entires can be in arbitary order. +pub trait TarEntriesVisitor { + fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError>; +} + +impl TarEntriesVisitor for &mut V { + fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError> { + (*self).visit(entries) + } +} + pub async fn extract_tar_based_stream_and_visit( stream: impl Stream> + Unpin, fmt: TarBasedFmt, diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index ee5129b3..b1737528 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -4,25 +4,13 @@ use std::path::Path; use flate2::bufread::GzDecoder; use log::debug; -use tar::{Archive, Entries}; +use tar::Archive; use xz2::bufread::XzDecoder; use zip::read::ZipArchive; use zstd::stream::Decoder as ZstdDecoder; use crate::{BinstallError, TarBasedFmt}; -/// Visitor must iterate over all entries. -/// Entires can be in arbitary order. -pub trait TarEntriesVisitor { - fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError>; -} - -impl TarEntriesVisitor for &mut V { - fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError> { - (*self).visit(entries) - } -} - pub(super) fn create_tar_decoder( dat: impl BufRead + 'static, fmt: TarBasedFmt, From 9eb1128f9fab6e90bf5e592a3b6966682a0b8cce Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Mon, 13 Jun 2022 19:32:55 +1000 Subject: [PATCH 42/65] Rm unused `DummyVisitor` in `extract_tar_based_stream` Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 2b1298bf..334812aa 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -208,14 +208,6 @@ pub async fn extract_tar_based_stream( where BinstallError: From, { - struct DummyVisitor; - - impl TarEntriesVisitor for DummyVisitor { - fn visit(&mut self, _entries: Entries<'_, R>) -> Result<(), BinstallError> { - unimplemented!() - } - } - let path = output.to_owned(); extract_impl( From 282805c3ac7677dd7506b30b346bcbe1bbb2eff5 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Wed, 15 Jun 2022 17:23:00 +1000 Subject: [PATCH 43/65] Add reference to the src of `path_ext` Signed-off-by: Jiahao XU --- src/helpers/path_ext.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/helpers/path_ext.rs b/src/helpers/path_ext.rs index c4958539..1e5d2cc7 100644 --- a/src/helpers/path_ext.rs +++ b/src/helpers/path_ext.rs @@ -1,3 +1,6 @@ +//! Shamelessly taken from: +//! https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61 + use std::path::{Component, Path, PathBuf}; pub trait PathExt { From 30b9a7852092f7201de5f764ecf5af9c78fff8a0 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Wed, 15 Jun 2022 17:45:50 +1000 Subject: [PATCH 44/65] Optimize `normalize_path`: Avoid copy if possible Signed-off-by: Jiahao XU --- src/drivers/visitor.rs | 3 ++- src/helpers/path_ext.rs | 26 ++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/drivers/visitor.rs b/src/drivers/visitor.rs index 850d7c39..3f354771 100644 --- a/src/drivers/visitor.rs +++ b/src/drivers/visitor.rs @@ -49,7 +49,8 @@ impl TarEntriesVisitor for ManifestVisitor { fn visit(&mut self, entries: Entries<'_, R>) -> Result<(), BinstallError> { for res in entries { let mut entry = res?; - let path = entry.path()?.normalize_path(); + let path = entry.path()?; + let path = path.normalize_path(); let path = if let Ok(path) = path.strip_prefix(&self.manifest_dir_path) { path diff --git a/src/helpers/path_ext.rs b/src/helpers/path_ext.rs index 1e5d2cc7..bf223280 100644 --- a/src/helpers/path_ext.rs +++ b/src/helpers/path_ext.rs @@ -1,16 +1,34 @@ -//! Shamelessly taken from: +//! Shamelessly adapted from: //! https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61 +use std::borrow::Cow; use std::path::{Component, Path, PathBuf}; pub trait PathExt { /// Similiar to `os.path.normpath`: It does not perform /// any fs operation. - fn normalize_path(&self) -> PathBuf; + fn normalize_path(&self) -> Cow<'_, Path>; +} + +fn is_normalized(path: &Path) -> bool { + for component in path.components() { + match component { + Component::CurDir | Component::ParentDir => { + return false; + } + _ => continue, + } + } + + true } impl PathExt for Path { - fn normalize_path(&self) -> PathBuf { + fn normalize_path(&self) -> Cow<'_, Path> { + if is_normalized(self) { + return Cow::Borrowed(self); + } + let mut components = self.components().peekable(); let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { components.next(); @@ -34,6 +52,6 @@ impl PathExt for Path { } } } - ret + Cow::Owned(ret) } } From 39ab334da5e7b82c15ae160c38907a02964f15f3 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Wed, 15 Jun 2022 17:52:49 +1000 Subject: [PATCH 45/65] Add a simple optimization to `normalize_path` Signed-off-by: Jiahao XU --- src/helpers/path_ext.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/helpers/path_ext.rs b/src/helpers/path_ext.rs index bf223280..78f26687 100644 --- a/src/helpers/path_ext.rs +++ b/src/helpers/path_ext.rs @@ -30,9 +30,10 @@ impl PathExt for Path { } let mut components = self.components().peekable(); - let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { + let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek() { + let buf = PathBuf::from(c.as_os_str()); components.next(); - PathBuf::from(c.as_os_str()) + buf } else { PathBuf::new() }; From 0480e9946015ad4ceddbb5ecf492a2a15c2fc099 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 17:29:50 +1000 Subject: [PATCH 46/65] Impl newtype `StreamReadable` It wraps a `Stream>` and implements `Read` and `BufRead` on it so that it can be used on sync context. Signed-off-by: Jiahao XU --- src/helpers.rs | 1 + src/helpers/stream_readable.rs | 81 ++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 src/helpers/stream_readable.rs diff --git a/src/helpers.rs b/src/helpers.rs index 060aad8c..4a30d986 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -22,6 +22,7 @@ mod ui_thread; pub use ui_thread::UIThread; mod extracter; +mod stream_readable; mod readable_rx; diff --git a/src/helpers/stream_readable.rs b/src/helpers/stream_readable.rs new file mode 100644 index 00000000..17113591 --- /dev/null +++ b/src/helpers/stream_readable.rs @@ -0,0 +1,81 @@ +use std::cmp::min; +use std::io::{self, BufRead, Read}; + +use bytes::{Buf, Bytes}; +use futures_util::stream::{Stream, StreamExt}; +use tokio::runtime::Handle; + +use super::BinstallError; + +/// This wraps an AsyncIterator as a `Read`able. +/// It must be used in non-async context only, +/// meaning you have to use it with +/// `tokio::task::{block_in_place, spawn_blocking}` or +/// `std::thread::spawn`. +#[derive(Debug)] +pub(super) struct StreamReadable { + stream: S, + handle: Handle, + bytes: Bytes, +} + +impl StreamReadable { + pub(super) async fn new(stream: S) -> Self { + Self { + stream, + handle: Handle::current(), + bytes: Bytes::new(), + } + } +} + +impl Read for StreamReadable +where + S: Stream> + Unpin, + BinstallError: From, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if buf.is_empty() { + return Ok(0); + } + + if self.fill_buf()?.is_empty() { + return Ok(0); + } + + let bytes = &mut self.bytes; + + // copy_to_slice requires the bytes to have enough remaining bytes + // to fill buf. + let n = min(buf.len(), bytes.remaining()); + + bytes.copy_to_slice(&mut buf[..n]); + + Ok(n) + } +} +impl BufRead for StreamReadable +where + S: Stream> + Unpin, + BinstallError: From, +{ + fn fill_buf(&mut self) -> io::Result<&[u8]> { + let bytes = &mut self.bytes; + + if !bytes.has_remaining() { + match self.handle.block_on(async { self.stream.next().await }) { + Some(Ok(new_bytes)) => *bytes = new_bytes, + Some(Err(e)) => { + let e: BinstallError = e.into(); + return Err(io::Error::new(io::ErrorKind::Other, e)); + } + None => (), + } + } + Ok(&*bytes) + } + + fn consume(&mut self, amt: usize) { + self.bytes.advance(amt); + } +} From 1161a60968b85ab19987c88f6671f843995a05b0 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 17:56:17 +1000 Subject: [PATCH 47/65] Simplify `create_tar_decoder`: Ret `io::Result` instead of `Result` Signed-off-by: Jiahao XU --- src/helpers/extracter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/helpers/extracter.rs b/src/helpers/extracter.rs index b1737528..13f018b6 100644 --- a/src/helpers/extracter.rs +++ b/src/helpers/extracter.rs @@ -1,5 +1,5 @@ use std::fs::File; -use std::io::{BufRead, Read}; +use std::io::{self, BufRead, Read}; use std::path::Path; use flate2::bufread::GzDecoder; @@ -14,7 +14,7 @@ use crate::{BinstallError, TarBasedFmt}; pub(super) fn create_tar_decoder( dat: impl BufRead + 'static, fmt: TarBasedFmt, -) -> Result>, BinstallError> { +) -> io::Result>> { use TarBasedFmt::*; let r: Box = match fmt { From aba1ba7b6d9f46121df4301b1601dd827f8c654d Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:02:42 +1000 Subject: [PATCH 48/65] Manually impl `From for BinstallError` so that if the `io::Error` wraps a `BinstallError`, we would just unwrap it and return the inner `BinstallError`. Otherwise, just wrap the `io::Error` in a `BinstallError`. Signed-off-by: Jiahao XU --- src/errors.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/errors.rs b/src/errors.rs index 070c91ce..7ffc20e3 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -75,7 +75,7 @@ pub enum BinstallError { /// - Exit: 74 #[error(transparent)] #[diagnostic(severity(error), code(binstall::io))] - Io(#[from] std::io::Error), + Io(std::io::Error), /// An error interacting with the crates.io API. /// @@ -231,3 +231,23 @@ impl Termination for BinstallError { code } } + +impl From for BinstallError { + fn from(err: std::io::Error) -> Self { + let is_inner_binstall_err = err + .get_ref() + .map(|e| e.is::()) + .unwrap_or_default(); + + if is_inner_binstall_err { + let inner = err + .into_inner() + .expect("err.get_ref() returns Some, so err.into_inner() should als return Some"); + *inner + .downcast() + .expect("The inner err is tested to be BinstallError") + } else { + BinstallError::Io(err) + } + } +} From c15d99c6f0a2a399c14f648212b5f9e371d5334a Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:07:46 +1000 Subject: [PATCH 49/65] Run downloader & extracter on the same thread This have the following advantage: - Remove the mpsc channel, which: - Remove synchronization required for mpsc. - Remove the internal buffering of the mpsc channel, which avoid potentially OOM situation. - Improve data locality since it no longer needs to be sent over thread. - It uses `block_in_place` to avoid creating an additional blocking thread. The disadvantages would be that the downloader can no longer be run in parallel to the extracter. If the bottleneck is the decompressor, then the downloader should also pause and wait for the decompressor to consume the data. But if the bottleneck is the network, then that might be an issue. Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 108 ++++++++++++++------------------- 1 file changed, 46 insertions(+), 62 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 334812aa..d37d1db0 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -16,7 +16,7 @@ use std::fmt::Debug; use std::fs; -use std::io::{self, Read, Seek, Write}; +use std::io::{self, copy, Read, Seek, Write}; use std::path::Path; use bytes::Bytes; @@ -27,10 +27,10 @@ use tar::Entries; use tempfile::tempfile; use tokio::{ sync::mpsc, - task::{spawn_blocking, JoinHandle}, + task::{block_in_place, spawn_blocking, JoinHandle}, }; -use super::{extracter::*, readable_rx::*}; +use super::{extracter::*, readable_rx::*, stream_readable::StreamReadable}; use crate::{BinstallError, TarBasedFmt}; pub(crate) enum Content { @@ -107,20 +107,15 @@ impl AsyncExtracterInner { } } -async fn extract_impl> + Unpin, E>( - mut stream: S, - f: Box) -> Result + Send>, -) -> Result +async fn extract_impl(stream: S, f: F) -> Result where + T: Debug + Send + 'static, + S: Stream> + Unpin, + F: FnOnce(StreamReadable) -> Result, BinstallError: From, { - let mut extracter = guard(AsyncExtracterInner::new(f), AsyncExtracterInner::abort); - - while let Some(res) = stream.next().await { - extracter.feed(res?).await?; - } - - ScopeGuard::into_inner(extracter).done().await + let readable = StreamReadable::new(stream).await; + block_in_place(move || f(readable)) } fn read_into_file( @@ -148,28 +143,25 @@ where { let path = output.to_owned(); - extract_impl( - stream, - Box::new(move |mut rx| { - fs::create_dir_all(path.parent().unwrap())?; + extract_impl(stream, move |mut reader| { + fs::create_dir_all(path.parent().unwrap())?; - let mut file = fs::File::create(&path)?; + let mut file = fs::File::create(&path)?; - // remove it unless the operation isn't aborted and no write - // fails. - let remove_guard = guard(&path, |path| { - fs::remove_file(path).ok(); - }); + // remove it unless the operation isn't aborted and no write + // fails. + let remove_guard = guard(&path, |path| { + fs::remove_file(path).ok(); + }); - read_into_file(&mut file, &mut rx)?; + copy(&mut reader, &mut file)?; - // Operation isn't aborted and all writes succeed, - // disarm the remove_guard. - ScopeGuard::into_inner(remove_guard); + // Operation isn't aborted and all writes succeed, + // disarm the remove_guard. + ScopeGuard::into_inner(remove_guard); - Ok(()) - }), - ) + Ok(()) + }) .await } @@ -182,26 +174,23 @@ where { let path = output.to_owned(); - extract_impl( - stream, - Box::new(move |mut rx| { - fs::create_dir_all(path.parent().unwrap())?; + extract_impl(stream, move |mut reader| { + fs::create_dir_all(path.parent().unwrap())?; - let mut file = tempfile()?; + let mut file = tempfile()?; - read_into_file(&mut file, &mut rx)?; + copy(&mut reader, &mut file)?; - // rewind it so that we can pass it to unzip - file.rewind()?; + // rewind it so that we can pass it to unzip + file.rewind()?; - unzip(file, &path) - }), - ) + unzip(file, &path) + }) .await } pub async fn extract_tar_based_stream( - stream: impl Stream> + Unpin, + stream: impl Stream> + Unpin + 'static, output: &Path, fmt: TarBasedFmt, ) -> Result<(), BinstallError> @@ -210,17 +199,15 @@ where { let path = output.to_owned(); - extract_impl( - stream, - Box::new(move |rx| { - fs::create_dir_all(path.parent().unwrap())?; + extract_impl(stream, move |reader| { + fs::create_dir_all(path.parent().unwrap())?; - debug!("Extracting from {fmt} archive to {path:#?}"); - create_tar_decoder(ReadableRx::new(rx), fmt)?.unpack(path)?; + debug!("Extracting from {fmt} archive to {path:#?}"); - Ok(()) - }), - ) + create_tar_decoder(reader, fmt)?.unpack(path)?; + + Ok(()) + }) .await } @@ -237,22 +224,19 @@ impl TarEntriesVisitor for &mut V { } pub async fn extract_tar_based_stream_and_visit( - stream: impl Stream> + Unpin, + stream: impl Stream> + Unpin + 'static, fmt: TarBasedFmt, mut visitor: V, ) -> Result where BinstallError: From, { - extract_impl( - stream, - Box::new(move |rx| { - debug!("Extracting from {fmt} archive to process it in memory"); + extract_impl(stream, move |reader| { + debug!("Extracting from {fmt} archive to process it in memory"); - let mut tar = create_tar_decoder(ReadableRx::new(rx), fmt)?; - visitor.visit(tar.entries()?)?; - Ok(visitor) - }), - ) + let mut tar = create_tar_decoder(reader, fmt)?; + visitor.visit(tar.entries()?)?; + Ok(visitor) + }) .await } From 621a64152971061e39eadb9832d9bfd43e5b0043 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:10:22 +1000 Subject: [PATCH 50/65] Rm unused items in mod `helpers` Signed-off-by: Jiahao XU --- src/helpers.rs | 2 - src/helpers/async_extracter.rs | 101 ++------------------------------- src/helpers/readable_rx.rs | 64 --------------------- 3 files changed, 4 insertions(+), 163 deletions(-) delete mode 100644 src/helpers/readable_rx.rs diff --git a/src/helpers.rs b/src/helpers.rs index 4a30d986..b2d252e9 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -24,8 +24,6 @@ pub use ui_thread::UIThread; mod extracter; mod stream_readable; -mod readable_rx; - mod path_ext; pub use path_ext::*; diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index d37d1db0..feec5d06 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -16,97 +16,20 @@ use std::fmt::Debug; use std::fs; -use std::io::{self, copy, Read, Seek, Write}; +use std::io::{copy, Read, Seek}; use std::path::Path; use bytes::Bytes; -use futures_util::stream::{Stream, StreamExt}; +use futures_util::stream::Stream; use log::debug; use scopeguard::{guard, ScopeGuard}; use tar::Entries; use tempfile::tempfile; -use tokio::{ - sync::mpsc, - task::{block_in_place, spawn_blocking, JoinHandle}, -}; +use tokio::task::block_in_place; -use super::{extracter::*, readable_rx::*, stream_readable::StreamReadable}; +use super::{extracter::*, stream_readable::StreamReadable}; use crate::{BinstallError, TarBasedFmt}; -pub(crate) enum Content { - /// Data to write to file - Data(Bytes), - - /// Abort the writing and remove the file. - Abort, -} - -/// AsyncExtracter will pass the `Bytes` you give to another thread via -/// a `mpsc` and decompress and unpack it if needed. -/// -/// After all write is done, you must call `AsyncExtracter::done`, -/// otherwise the extracted content will be removed on drop. -#[derive(Debug)] -struct AsyncExtracterInner { - /// Use AutoAbortJoinHandle so that the task - /// will be cancelled on failure. - handle: JoinHandle>, - tx: mpsc::Sender, -} - -impl AsyncExtracterInner { - fn new) -> Result + Send + 'static>( - f: F, - ) -> Self { - let (tx, rx) = mpsc::channel::(100); - - let handle = spawn_blocking(move || f(rx)); - - Self { handle, tx } - } - - /// Upon error, this extracter shall not be reused. - /// Otherwise, `Self::done` would panic. - async fn feed(&mut self, bytes: Bytes) -> Result<(), BinstallError> { - if self.tx.send(Content::Data(bytes)).await.is_err() { - // task failed - Err(Self::wait(&mut self.handle).await.expect_err( - "Implementation bug: write task finished successfully before all writes are done", - )) - } else { - Ok(()) - } - } - - async fn done(mut self) -> Result { - // Drop tx as soon as possible so that the task would wrap up what it - // was doing and flush out all the pending data. - drop(self.tx); - - Self::wait(&mut self.handle).await - } - - async fn wait(handle: &mut JoinHandle>) -> Result { - match handle.await { - Ok(res) => res, - Err(join_err) => Err(io::Error::new(io::ErrorKind::Other, join_err).into()), - } - } - - fn abort(self) { - let tx = self.tx; - // If Self::write fail, then the task is already tear down, - // tx closed and no need to abort. - if !tx.is_closed() { - // Use send here because blocking_send would panic if used - // in async threads. - tokio::spawn(async move { - tx.send(Content::Abort).await.ok(); - }); - } - } -} - async fn extract_impl(stream: S, f: F) -> Result where T: Debug + Send + 'static, @@ -118,22 +41,6 @@ where block_in_place(move || f(readable)) } -fn read_into_file( - file: &mut fs::File, - rx: &mut mpsc::Receiver, -) -> Result<(), BinstallError> { - while let Some(content) = rx.blocking_recv() { - match content { - Content::Data(bytes) => file.write_all(&*bytes)?, - Content::Abort => return Err(io::Error::new(io::ErrorKind::Other, "Aborted").into()), - } - } - - file.flush()?; - - Ok(()) -} - pub async fn extract_bin( stream: impl Stream> + Unpin, output: &Path, diff --git a/src/helpers/readable_rx.rs b/src/helpers/readable_rx.rs deleted file mode 100644 index e2597aae..00000000 --- a/src/helpers/readable_rx.rs +++ /dev/null @@ -1,64 +0,0 @@ -use std::cmp::min; -use std::io::{self, BufRead, Read}; - -use bytes::{Buf, Bytes}; -use tokio::sync::mpsc::Receiver; - -use super::async_extracter::Content; - -#[derive(Debug)] -pub(crate) struct ReadableRx { - rx: Receiver, - bytes: Bytes, -} - -impl ReadableRx { - pub(crate) fn new(rx: Receiver) -> Self { - Self { - rx, - bytes: Bytes::new(), - } - } -} - -impl Read for ReadableRx { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - if buf.is_empty() { - return Ok(0); - } - - if self.fill_buf()?.is_empty() { - return Ok(0); - } - - let bytes = &mut self.bytes; - - // copy_to_slice requires the bytes to have enough remaining bytes - // to fill buf. - let n = min(buf.len(), bytes.remaining()); - - bytes.copy_to_slice(&mut buf[..n]); - - Ok(n) - } -} - -impl BufRead for ReadableRx { - fn fill_buf(&mut self) -> io::Result<&[u8]> { - let bytes = &mut self.bytes; - if !bytes.has_remaining() { - match self.rx.blocking_recv() { - Some(Content::Data(new_bytes)) => *bytes = new_bytes, - Some(Content::Abort) => { - return Err(io::Error::new(io::ErrorKind::Other, "Aborted")) - } - None => (), - } - } - Ok(&*bytes) - } - - fn consume(&mut self, amt: usize) { - self.bytes.advance(amt); - } -} From b1523581751adf676bc01814f92507423c401d02 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:12:13 +1000 Subject: [PATCH 51/65] Rm unused trait bound in `extract_impl` Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index feec5d06..c2ff7800 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -32,7 +32,6 @@ use crate::{BinstallError, TarBasedFmt}; async fn extract_impl(stream: S, f: F) -> Result where - T: Debug + Send + 'static, S: Stream> + Unpin, F: FnOnce(StreamReadable) -> Result, BinstallError: From, From 9de8a4841f92352c23bf6dda247753808a65e6de Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:12:48 +1000 Subject: [PATCH 52/65] Update doc of mod `async_extracter` Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index c2ff7800..634df7e0 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -1,14 +1,12 @@ //! # Advantages //! //! Using this mod has the following advantages over downloading -//! and extracting in on the async thread: +//! to file then extracting: //! //! - The code is pipelined instead of storing the downloaded file in memory //! and extract it, except for `PkgFmt::Zip`, since `ZipArchiver::new` //! requires `std::io::Seek`, so it fallbacks to writing the a file then //! unzip it. -//! - The async part (downloading) and the extracting part runs in parallel -//! using `tokio::spawn_nonblocking`. //! - Compressing/writing which takes a lot of CPU time will not block //! the runtime anymore. //! - For all `tar` based formats, it can extract only specified files and From a5879e3d6537edaa4439fdfe951003ec5d321201 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:24:01 +1000 Subject: [PATCH 53/65] Rm unnecessary `to_owned` call in `extract_*` It was called before because `spawn_blocking` requires that, but we now switches to `block_in_place` which no longer needs this. Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 634df7e0..e1448776 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -40,13 +40,11 @@ where pub async fn extract_bin( stream: impl Stream> + Unpin, - output: &Path, + path: &Path, ) -> Result<(), BinstallError> where BinstallError: From, { - let path = output.to_owned(); - extract_impl(stream, move |mut reader| { fs::create_dir_all(path.parent().unwrap())?; @@ -71,13 +69,11 @@ where pub async fn extract_zip( stream: impl Stream> + Unpin, - output: &Path, + path: &Path, ) -> Result<(), BinstallError> where BinstallError: From, { - let path = output.to_owned(); - extract_impl(stream, move |mut reader| { fs::create_dir_all(path.parent().unwrap())?; @@ -88,21 +84,19 @@ where // rewind it so that we can pass it to unzip file.rewind()?; - unzip(file, &path) + unzip(file, path) }) .await } pub async fn extract_tar_based_stream( stream: impl Stream> + Unpin + 'static, - output: &Path, + path: &Path, fmt: TarBasedFmt, ) -> Result<(), BinstallError> where BinstallError: From, { - let path = output.to_owned(); - extract_impl(stream, move |reader| { fs::create_dir_all(path.parent().unwrap())?; From b14b71135ec116bc50817cab469677da7ec9c8b5 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:25:50 +1000 Subject: [PATCH 54/65] Revert refactor: Put `find_version` back to `drivers` Signed-off-by: Jiahao XU --- src/drivers.rs | 49 +++++++++++++++++++++++++++++++++++++++--- src/drivers/version.rs | 48 ----------------------------------------- 2 files changed, 46 insertions(+), 51 deletions(-) delete mode 100644 src/drivers/version.rs diff --git a/src/drivers.rs b/src/drivers.rs index 8b8f362f..cab8a6d6 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -1,17 +1,60 @@ +use std::collections::BTreeSet; use std::path::{Path, PathBuf}; +use log::debug; +use semver::{Version, VersionReq}; + use crate::BinstallError; mod cratesio; pub use cratesio::*; -mod version; -use version::find_version; - mod vfs; mod visitor; +fn find_version<'a, V: Iterator>( + requirement: &str, + version_iter: V, +) -> Result { + // Parse version requirement + let version_req = VersionReq::parse(requirement).map_err(|err| BinstallError::VersionReq { + req: requirement.into(), + err, + })?; + + // Filter for matching versions + let filtered: BTreeSet<_> = version_iter + .filter_map(|v| { + // Remove leading `v` for git tags + let ver_str = match v.strip_prefix('s') { + Some(v) => v, + None => v, + }; + + // Parse out version + let ver = Version::parse(ver_str).ok()?; + debug!("Version: {:?}", ver); + + // Filter by version match + if version_req.matches(&ver) { + Some(ver) + } else { + None + } + }) + .collect(); + + debug!("Filtered: {:?}", filtered); + + // Return highest version + filtered + .iter() + .max() + .cloned() + .ok_or(BinstallError::VersionMismatch { req: version_req }) +} + /// Fetch a crate by name and version from github /// TODO: implement this pub async fn fetch_crate_gh_releases( diff --git a/src/drivers/version.rs b/src/drivers/version.rs deleted file mode 100644 index 7d5f4a74..00000000 --- a/src/drivers/version.rs +++ /dev/null @@ -1,48 +0,0 @@ -use std::collections::BTreeSet; - -use log::debug; -use semver::{Version, VersionReq}; - -use crate::BinstallError; - -pub(super) fn find_version<'a, V: Iterator>( - requirement: &str, - version_iter: V, -) -> Result { - // Parse version requirement - let version_req = VersionReq::parse(requirement).map_err(|err| BinstallError::VersionReq { - req: requirement.into(), - err, - })?; - - // Filter for matching versions - let filtered: BTreeSet<_> = version_iter - .filter_map(|v| { - // Remove leading `v` for git tags - let ver_str = match v.strip_prefix('s') { - Some(v) => v, - None => v, - }; - - // Parse out version - let ver = Version::parse(ver_str).ok()?; - debug!("Version: {:?}", ver); - - // Filter by version match - if version_req.matches(&ver) { - Some(ver) - } else { - None - } - }) - .collect(); - - debug!("Filtered: {:?}", filtered); - - // Return highest version - filtered - .iter() - .max() - .cloned() - .ok_or(BinstallError::VersionMismatch { req: version_req }) -} From 53bf76104b7163ab7be7f139b5b047db36deb0b4 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:27:08 +1000 Subject: [PATCH 55/65] Revert refactor: Put `fetch_crate_cratesio` back to `drivers` Signed-off-by: Jiahao XU --- src/drivers.rs | 71 ++++++++++++++++++++++++++++++++++++++--- src/drivers/cratesio.rs | 71 ----------------------------------------- 2 files changed, 67 insertions(+), 75 deletions(-) delete mode 100644 src/drivers/cratesio.rs diff --git a/src/drivers.rs b/src/drivers.rs index cab8a6d6..b5dff09a 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -1,17 +1,19 @@ use std::collections::BTreeSet; use std::path::{Path, PathBuf}; +use std::time::Duration; +use cargo_toml::Manifest; +use crates_io_api::AsyncClient; use log::debug; use semver::{Version, VersionReq}; +use url::Url; -use crate::BinstallError; - -mod cratesio; -pub use cratesio::*; +use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; mod vfs; mod visitor; +use visitor::ManifestVisitor; fn find_version<'a, V: Iterator>( requirement: &str, @@ -55,6 +57,67 @@ fn find_version<'a, V: Iterator>( .ok_or(BinstallError::VersionMismatch { req: version_req }) } +/// Fetch a crate Cargo.toml by name and version from crates.io +pub async fn fetch_crate_cratesio( + name: &str, + version_req: &str, +) -> Result, BinstallError> { + // Fetch / update index + debug!("Looking up crate information"); + + // Build crates.io api client + let api_client = AsyncClient::new( + "cargo-binstall (https://github.com/ryankurte/cargo-binstall)", + Duration::from_millis(100), + ) + .expect("bug: invalid user agent"); + + // Fetch online crate information + let base_info = + api_client + .get_crate(name.as_ref()) + .await + .map_err(|err| BinstallError::CratesIoApi { + crate_name: name.into(), + err, + })?; + + // Locate matching version + let version_iter = + base_info + .versions + .iter() + .filter_map(|v| if !v.yanked { Some(&v.num) } else { None }); + let version_name = find_version(version_req, version_iter)?; + + // Fetch information for the filtered version + let version = base_info + .versions + .iter() + .find(|v| v.num == version_name.to_string()) + .ok_or_else(|| BinstallError::VersionUnavailable { + crate_name: name.into(), + v: version_name.clone(), + })?; + + debug!("Found information for crate version: '{}'", version.num); + + // Download crate to temporary dir (crates.io or git?) + let crate_url = format!("https://crates.io/{}", version.dl_path); + + debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); + + let manifest_dir_path: PathBuf = format!("{name}-{version_name}").into(); + + download_tar_based_and_visit( + Url::parse(&crate_url)?, + TarBasedFmt::Tgz, + ManifestVisitor::new(manifest_dir_path), + ) + .await? + .load_manifest() +} + /// Fetch a crate by name and version from github /// TODO: implement this pub async fn fetch_crate_gh_releases( diff --git a/src/drivers/cratesio.rs b/src/drivers/cratesio.rs deleted file mode 100644 index 0bc6027b..00000000 --- a/src/drivers/cratesio.rs +++ /dev/null @@ -1,71 +0,0 @@ -use std::path::PathBuf; -use std::time::Duration; - -use cargo_toml::Manifest; -use crates_io_api::AsyncClient; -use log::debug; -use url::Url; - -use super::{find_version, visitor::ManifestVisitor}; -use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; - -/// Fetch a crate Cargo.toml by name and version from crates.io -pub async fn fetch_crate_cratesio( - name: &str, - version_req: &str, -) -> Result, BinstallError> { - // Fetch / update index - debug!("Looking up crate information"); - - // Build crates.io api client - let api_client = AsyncClient::new( - "cargo-binstall (https://github.com/ryankurte/cargo-binstall)", - Duration::from_millis(100), - ) - .expect("bug: invalid user agent"); - - // Fetch online crate information - let base_info = - api_client - .get_crate(name.as_ref()) - .await - .map_err(|err| BinstallError::CratesIoApi { - crate_name: name.into(), - err, - })?; - - // Locate matching version - let version_iter = - base_info - .versions - .iter() - .filter_map(|v| if !v.yanked { Some(&v.num) } else { None }); - let version_name = find_version(version_req, version_iter)?; - - // Fetch information for the filtered version - let version = base_info - .versions - .iter() - .find(|v| v.num == version_name.to_string()) - .ok_or_else(|| BinstallError::VersionUnavailable { - crate_name: name.into(), - v: version_name.clone(), - })?; - - debug!("Found information for crate version: '{}'", version.num); - - // Download crate to temporary dir (crates.io or git?) - let crate_url = format!("https://crates.io/{}", version.dl_path); - - debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); - - let manifest_dir_path: PathBuf = format!("{name}-{version_name}").into(); - - download_tar_based_and_visit( - Url::parse(&crate_url)?, - TarBasedFmt::Tgz, - ManifestVisitor::new(manifest_dir_path), - ) - .await? - .load_manifest() -} From db22d7d0419f86d44942dcf1b89526429d868566 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:35:01 +1000 Subject: [PATCH 56/65] Fix codestyle in `async_extracter` Use consistent codestyle for specifing trait bounds. Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index e1448776..2a2a42dd 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -38,11 +38,9 @@ where block_in_place(move || f(readable)) } -pub async fn extract_bin( - stream: impl Stream> + Unpin, - path: &Path, -) -> Result<(), BinstallError> +pub async fn extract_bin(stream: S, path: &Path) -> Result<(), BinstallError> where + S: Stream> + Unpin + 'static, BinstallError: From, { extract_impl(stream, move |mut reader| { @@ -67,11 +65,9 @@ where .await } -pub async fn extract_zip( - stream: impl Stream> + Unpin, - path: &Path, -) -> Result<(), BinstallError> +pub async fn extract_zip(stream: S, path: &Path) -> Result<(), BinstallError> where + S: Stream> + Unpin + 'static, BinstallError: From, { extract_impl(stream, move |mut reader| { @@ -89,12 +85,13 @@ where .await } -pub async fn extract_tar_based_stream( - stream: impl Stream> + Unpin + 'static, +pub async fn extract_tar_based_stream( + stream: S, path: &Path, fmt: TarBasedFmt, ) -> Result<(), BinstallError> where + S: Stream> + Unpin + 'static, BinstallError: From, { extract_impl(stream, move |reader| { @@ -121,12 +118,14 @@ impl TarEntriesVisitor for &mut V { } } -pub async fn extract_tar_based_stream_and_visit( - stream: impl Stream> + Unpin + 'static, +pub async fn extract_tar_based_stream_and_visit( + stream: S, fmt: TarBasedFmt, mut visitor: V, ) -> Result where + S: Stream> + Unpin + 'static, + V: TarEntriesVisitor + Debug + Send + 'static, BinstallError: From, { extract_impl(stream, move |reader| { From 784a24577b349af01ac82717d253a620d1cddb8b Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Sat, 18 Jun 2022 18:37:50 +1000 Subject: [PATCH 57/65] Refactor: Rm `extract_impl` Signed-off-by: Jiahao XU --- src/helpers/async_extracter.rs | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/helpers/async_extracter.rs b/src/helpers/async_extracter.rs index 2a2a42dd..5a7ef9c1 100644 --- a/src/helpers/async_extracter.rs +++ b/src/helpers/async_extracter.rs @@ -28,22 +28,13 @@ use tokio::task::block_in_place; use super::{extracter::*, stream_readable::StreamReadable}; use crate::{BinstallError, TarBasedFmt}; -async fn extract_impl(stream: S, f: F) -> Result -where - S: Stream> + Unpin, - F: FnOnce(StreamReadable) -> Result, - BinstallError: From, -{ - let readable = StreamReadable::new(stream).await; - block_in_place(move || f(readable)) -} - pub async fn extract_bin(stream: S, path: &Path) -> Result<(), BinstallError> where S: Stream> + Unpin + 'static, BinstallError: From, { - extract_impl(stream, move |mut reader| { + let mut reader = StreamReadable::new(stream).await; + block_in_place(move || { fs::create_dir_all(path.parent().unwrap())?; let mut file = fs::File::create(&path)?; @@ -62,7 +53,6 @@ where Ok(()) }) - .await } pub async fn extract_zip(stream: S, path: &Path) -> Result<(), BinstallError> @@ -70,7 +60,8 @@ where S: Stream> + Unpin + 'static, BinstallError: From, { - extract_impl(stream, move |mut reader| { + let mut reader = StreamReadable::new(stream).await; + block_in_place(move || { fs::create_dir_all(path.parent().unwrap())?; let mut file = tempfile()?; @@ -82,7 +73,6 @@ where unzip(file, path) }) - .await } pub async fn extract_tar_based_stream( @@ -94,7 +84,8 @@ where S: Stream> + Unpin + 'static, BinstallError: From, { - extract_impl(stream, move |reader| { + let reader = StreamReadable::new(stream).await; + block_in_place(move || { fs::create_dir_all(path.parent().unwrap())?; debug!("Extracting from {fmt} archive to {path:#?}"); @@ -103,7 +94,6 @@ where Ok(()) }) - .await } /// Visitor must iterate over all entries. @@ -128,12 +118,12 @@ where V: TarEntriesVisitor + Debug + Send + 'static, BinstallError: From, { - extract_impl(stream, move |reader| { + let reader = StreamReadable::new(stream).await; + block_in_place(move || { debug!("Extracting from {fmt} archive to process it in memory"); let mut tar = create_tar_decoder(reader, fmt)?; visitor.visit(tar.entries()?)?; Ok(visitor) }) - .await } From 5d79af545bfe72f455ee05e23f1dccbac1559e45 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 21 Jun 2022 13:38:24 +1000 Subject: [PATCH 58/65] Add doc for `Vfs` Signed-off-by: Jiahao XU --- src/drivers/vfs.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/drivers/vfs.rs b/src/drivers/vfs.rs index 18c422de..66e4875e 100644 --- a/src/drivers/vfs.rs +++ b/src/drivers/vfs.rs @@ -6,6 +6,9 @@ use cargo_toml::AbstractFilesystem; use crate::helpers::PathExt; +/// This type stores the filesystem structure for the crate tarball +/// extracted in memory and can be passed to +/// `cargo_toml::Manifest::complete_from_abstract_filesystem`. #[derive(Debug)] pub(super) struct Vfs(HashMap, HashSet>>); From c916814e7ed8f2fa3e56ef0d2f0454a93f724186 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 21 Jun 2022 13:40:36 +1000 Subject: [PATCH 59/65] Refactor: Extract `find_version` into mod Signed-off-by: Jiahao XU --- src/drivers.rs | 45 ++------------------------------------- src/drivers/version.rs | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 43 deletions(-) create mode 100644 src/drivers/version.rs diff --git a/src/drivers.rs b/src/drivers.rs index b5dff09a..9a56a7d3 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -1,11 +1,9 @@ -use std::collections::BTreeSet; use std::path::{Path, PathBuf}; use std::time::Duration; use cargo_toml::Manifest; use crates_io_api::AsyncClient; use log::debug; -use semver::{Version, VersionReq}; use url::Url; use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; @@ -15,47 +13,8 @@ mod vfs; mod visitor; use visitor::ManifestVisitor; -fn find_version<'a, V: Iterator>( - requirement: &str, - version_iter: V, -) -> Result { - // Parse version requirement - let version_req = VersionReq::parse(requirement).map_err(|err| BinstallError::VersionReq { - req: requirement.into(), - err, - })?; - - // Filter for matching versions - let filtered: BTreeSet<_> = version_iter - .filter_map(|v| { - // Remove leading `v` for git tags - let ver_str = match v.strip_prefix('s') { - Some(v) => v, - None => v, - }; - - // Parse out version - let ver = Version::parse(ver_str).ok()?; - debug!("Version: {:?}", ver); - - // Filter by version match - if version_req.matches(&ver) { - Some(ver) - } else { - None - } - }) - .collect(); - - debug!("Filtered: {:?}", filtered); - - // Return highest version - filtered - .iter() - .max() - .cloned() - .ok_or(BinstallError::VersionMismatch { req: version_req }) -} +mod version; +use version::find_version; /// Fetch a crate Cargo.toml by name and version from crates.io pub async fn fetch_crate_cratesio( diff --git a/src/drivers/version.rs b/src/drivers/version.rs new file mode 100644 index 00000000..7d5f4a74 --- /dev/null +++ b/src/drivers/version.rs @@ -0,0 +1,48 @@ +use std::collections::BTreeSet; + +use log::debug; +use semver::{Version, VersionReq}; + +use crate::BinstallError; + +pub(super) fn find_version<'a, V: Iterator>( + requirement: &str, + version_iter: V, +) -> Result { + // Parse version requirement + let version_req = VersionReq::parse(requirement).map_err(|err| BinstallError::VersionReq { + req: requirement.into(), + err, + })?; + + // Filter for matching versions + let filtered: BTreeSet<_> = version_iter + .filter_map(|v| { + // Remove leading `v` for git tags + let ver_str = match v.strip_prefix('s') { + Some(v) => v, + None => v, + }; + + // Parse out version + let ver = Version::parse(ver_str).ok()?; + debug!("Version: {:?}", ver); + + // Filter by version match + if version_req.matches(&ver) { + Some(ver) + } else { + None + } + }) + .collect(); + + debug!("Filtered: {:?}", filtered); + + // Return highest version + filtered + .iter() + .max() + .cloned() + .ok_or(BinstallError::VersionMismatch { req: version_req }) +} From b6f15f2e5e4045c2d8f82f2a6cb7a30701d0a0c1 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 21 Jun 2022 13:42:39 +1000 Subject: [PATCH 60/65] Refactor: Extract `fetch_crate_cratesio` out into mod Signed-off-by: Jiahao XU --- src/drivers.rs | 70 ++------------------------------------- src/drivers/crates_io.rs | 71 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 67 deletions(-) create mode 100644 src/drivers/crates_io.rs diff --git a/src/drivers.rs b/src/drivers.rs index 9a56a7d3..18752554 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -1,12 +1,6 @@ use std::path::{Path, PathBuf}; -use std::time::Duration; -use cargo_toml::Manifest; -use crates_io_api::AsyncClient; -use log::debug; -use url::Url; - -use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; +use crate::BinstallError; mod vfs; @@ -16,66 +10,8 @@ use visitor::ManifestVisitor; mod version; use version::find_version; -/// Fetch a crate Cargo.toml by name and version from crates.io -pub async fn fetch_crate_cratesio( - name: &str, - version_req: &str, -) -> Result, BinstallError> { - // Fetch / update index - debug!("Looking up crate information"); - - // Build crates.io api client - let api_client = AsyncClient::new( - "cargo-binstall (https://github.com/ryankurte/cargo-binstall)", - Duration::from_millis(100), - ) - .expect("bug: invalid user agent"); - - // Fetch online crate information - let base_info = - api_client - .get_crate(name.as_ref()) - .await - .map_err(|err| BinstallError::CratesIoApi { - crate_name: name.into(), - err, - })?; - - // Locate matching version - let version_iter = - base_info - .versions - .iter() - .filter_map(|v| if !v.yanked { Some(&v.num) } else { None }); - let version_name = find_version(version_req, version_iter)?; - - // Fetch information for the filtered version - let version = base_info - .versions - .iter() - .find(|v| v.num == version_name.to_string()) - .ok_or_else(|| BinstallError::VersionUnavailable { - crate_name: name.into(), - v: version_name.clone(), - })?; - - debug!("Found information for crate version: '{}'", version.num); - - // Download crate to temporary dir (crates.io or git?) - let crate_url = format!("https://crates.io/{}", version.dl_path); - - debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); - - let manifest_dir_path: PathBuf = format!("{name}-{version_name}").into(); - - download_tar_based_and_visit( - Url::parse(&crate_url)?, - TarBasedFmt::Tgz, - ManifestVisitor::new(manifest_dir_path), - ) - .await? - .load_manifest() -} +mod crates_io; +pub use crates_io::fetch_crate_cratesio; /// Fetch a crate by name and version from github /// TODO: implement this diff --git a/src/drivers/crates_io.rs b/src/drivers/crates_io.rs new file mode 100644 index 00000000..94cdccce --- /dev/null +++ b/src/drivers/crates_io.rs @@ -0,0 +1,71 @@ +use std::path::PathBuf; +use std::time::Duration; + +use cargo_toml::Manifest; +use crates_io_api::AsyncClient; +use log::debug; +use url::Url; + +use super::{find_version, ManifestVisitor}; +use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; + +/// Fetch a crate Cargo.toml by name and version from crates.io +pub async fn fetch_crate_cratesio( + name: &str, + version_req: &str, +) -> Result, BinstallError> { + // Fetch / update index + debug!("Looking up crate information"); + + // Build crates.io api client + let api_client = AsyncClient::new( + "cargo-binstall (https://github.com/ryankurte/cargo-binstall)", + Duration::from_millis(100), + ) + .expect("bug: invalid user agent"); + + // Fetch online crate information + let base_info = + api_client + .get_crate(name.as_ref()) + .await + .map_err(|err| BinstallError::CratesIoApi { + crate_name: name.into(), + err, + })?; + + // Locate matching version + let version_iter = + base_info + .versions + .iter() + .filter_map(|v| if !v.yanked { Some(&v.num) } else { None }); + let version_name = find_version(version_req, version_iter)?; + + // Fetch information for the filtered version + let version = base_info + .versions + .iter() + .find(|v| v.num == version_name.to_string()) + .ok_or_else(|| BinstallError::VersionUnavailable { + crate_name: name.into(), + v: version_name.clone(), + })?; + + debug!("Found information for crate version: '{}'", version.num); + + // Download crate to temporary dir (crates.io or git?) + let crate_url = format!("https://crates.io/{}", version.dl_path); + + debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); + + let manifest_dir_path: PathBuf = format!("{name}-{version_name}").into(); + + download_tar_based_and_visit( + Url::parse(&crate_url)?, + TarBasedFmt::Tgz, + ManifestVisitor::new(manifest_dir_path), + ) + .await? + .load_manifest() +} From 23bad39ba8901c80fe230256dd973d3bfbfc56ab Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 21 Jun 2022 13:44:12 +1000 Subject: [PATCH 61/65] Refactor:Mv mod `visitor` `vfs` under `crates_io` Signed-off-by: Jiahao XU --- src/drivers.rs | 5 ----- src/drivers/crates_io.rs | 7 ++++++- src/drivers/{ => crates_io}/vfs.rs | 0 src/drivers/{ => crates_io}/visitor.rs | 0 4 files changed, 6 insertions(+), 6 deletions(-) rename src/drivers/{ => crates_io}/vfs.rs (100%) rename src/drivers/{ => crates_io}/visitor.rs (100%) diff --git a/src/drivers.rs b/src/drivers.rs index 18752554..933a6d2f 100644 --- a/src/drivers.rs +++ b/src/drivers.rs @@ -2,11 +2,6 @@ use std::path::{Path, PathBuf}; use crate::BinstallError; -mod vfs; - -mod visitor; -use visitor::ManifestVisitor; - mod version; use version::find_version; diff --git a/src/drivers/crates_io.rs b/src/drivers/crates_io.rs index 94cdccce..68f04e82 100644 --- a/src/drivers/crates_io.rs +++ b/src/drivers/crates_io.rs @@ -6,9 +6,14 @@ use crates_io_api::AsyncClient; use log::debug; use url::Url; -use super::{find_version, ManifestVisitor}; +use super::find_version; use crate::{helpers::*, BinstallError, Meta, TarBasedFmt}; +mod vfs; + +mod visitor; +use visitor::ManifestVisitor; + /// Fetch a crate Cargo.toml by name and version from crates.io pub async fn fetch_crate_cratesio( name: &str, diff --git a/src/drivers/vfs.rs b/src/drivers/crates_io/vfs.rs similarity index 100% rename from src/drivers/vfs.rs rename to src/drivers/crates_io/vfs.rs diff --git a/src/drivers/visitor.rs b/src/drivers/crates_io/visitor.rs similarity index 100% rename from src/drivers/visitor.rs rename to src/drivers/crates_io/visitor.rs From ad41756daab0120e9ad38ead7b6ac10bd0688a00 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 21 Jun 2022 13:45:56 +1000 Subject: [PATCH 62/65] Rename `fmt.rs` to `format.rs` Signed-off-by: Jiahao XU --- src/{fmt.rs => format.rs} | 0 src/lib.rs | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename src/{fmt.rs => format.rs} (100%) diff --git a/src/fmt.rs b/src/format.rs similarity index 100% rename from src/fmt.rs rename to src/format.rs diff --git a/src/lib.rs b/src/lib.rs index ca2b019a..5725995d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,8 +17,8 @@ pub mod fetchers; mod target; pub use target::*; -mod fmt; -pub use fmt::*; +mod format; +pub use format::*; /// Default package path template (may be overridden in package Cargo.toml) pub const DEFAULT_PKG_URL: &str = From 74a6e137bed7f566ee5afef82be584a187f31680 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 21 Jun 2022 13:47:00 +1000 Subject: [PATCH 63/65] Refactor: Mv `debug!` into `create_request` Signed-off-by: Jiahao XU --- src/helpers.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index b2d252e9..55aee08c 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -52,6 +52,8 @@ pub async fn remote_exists(url: Url, method: Method) -> Result Result>, BinstallError> { + debug!("Downloading from: '{url}'"); + reqwest::get(url.clone()) .await .and_then(|r| r.error_for_status()) @@ -69,8 +71,6 @@ pub async fn download_and_extract>( fmt: PkgFmt, path: P, ) -> Result<(), BinstallError> { - debug!("Downloading from: '{url}'"); - let stream = create_request(url).await?; let path = path.as_ref(); @@ -97,8 +97,6 @@ pub async fn download_tar_based_and_visit Result { - debug!("Downloading from: '{url}'"); - let stream = create_request(url).await?; debug!("Downloading and extracting then in-memory processing"); From c5a2a8936133ddfff72a4c263e0431f86ef342ef Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 21 Jun 2022 13:52:27 +1000 Subject: [PATCH 64/65] Rm the duplicate `debug!` in `main.rs:214` Signed-off-by: Jiahao XU --- src/main.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index f9136157..0d8748cc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -211,10 +211,7 @@ async fn entry() -> Result<()> { // TODO: work out which of these to do based on `opts.name` // TODO: support git-based fetches (whole repo name rather than just crate name) let manifest = match opts.manifest_path.clone() { - Some(manifest_path) => { - debug!("Reading manifest: {}", manifest_path.display()); - load_manifest_path(manifest_path.join("Cargo.toml"))? - } + Some(manifest_path) => load_manifest_path(manifest_path.join("Cargo.toml"))?, None => fetch_crate_cratesio(&opts.name, &opts.version).await?, }; From 2f38925ee43b945a93f76066e9473b9fd9c816e4 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 21 Jun 2022 14:10:27 +1000 Subject: [PATCH 65/65] Refactor `From for BinstallError` Avoid one `expect`. Signed-off-by: Jiahao XU --- src/errors.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 7ffc20e3..4c7e5cad 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -234,18 +234,17 @@ impl Termination for BinstallError { impl From for BinstallError { fn from(err: std::io::Error) -> Self { - let is_inner_binstall_err = err - .get_ref() - .map(|e| e.is::()) - .unwrap_or_default(); + if err.get_ref().is_some() { + let kind = err.kind(); - if is_inner_binstall_err { let inner = err .into_inner() - .expect("err.get_ref() returns Some, so err.into_inner() should als return Some"); - *inner + .expect("err.get_ref() returns Some, so err.into_inner() should also return Some"); + + inner .downcast() - .expect("The inner err is tested to be BinstallError") + .map(|b| *b) + .unwrap_or_else(|err| BinstallError::Io(std::io::Error::new(kind, err))) } else { BinstallError::Io(err) }