Refactor binstalk-downloader APIs: Remove cancellation_future plus optimizations (#591)

- Refactor: Mv fn `utils::asyncify` into mod `utils`
 - Improve err msg for task failure in `utils::asyncify`
 - Make sure `asyncify` always returns the same annoymous type
   that implements `Future` if the `T` is same.
 - Rewrite `extract_bin` to avoid `block_in_place`
   support cancellation by dropping
 - Rm unused dep scopeguard from binstalk-downloader
 - Rewrite `extract_tar_based_stream` so that it is cancellable by dropping
 - Unbox `extract_future` in `async_extracter::extract_zip`
 - Refactor `Download` API: Remove `CancellationFuture` as param

   since all futures returned by `Download::and_*` does not call
   `block_in_place`, so they can be cancelled by drop instead of using this
   cumbersome hack.
 - Fix exports from mod `async_tar_visitor`
 - Make `signal::{ignore_signals, wait_on_cancellation_signal}` private
 - Rm the global variable `CANCELLED` in `wait_on_cancellation_signal`
   and rm fn `wait_on_cancellation_signal_inner`
 - Optimize `wait_on_cancellation_signal`: Avoid `tokio::select!` on `not(unix)`
 - Rm unnecessary `tokio::select!` in `wait_on_cancellation_signal` on unix
   Since `unix::wait_on_cancellation_signal_unix` already waits for ctrl + c signal.
 - Optimize `extract_bin`: Send `Bytes` to blocking thread for zero-copy
 - Optimize `extract_with_blocking_decoder`: Avoid dup monomorphization
 - Box fut of `fetch_crate_cratesio` in `PackageInfo::resolve`
 - Optimize `extract_zip_entry`: Spawn only one blocking task per fn call

   by using a mspc queue for the data to be written to the `outfile`.

   This would improve efficiency as using `tokio::fs::File` is expensive:
   It spawns a new blocking task, which needs one heap allocation and then
   pushed to a mpmc queue, and then wait for it to be done on every loop.

   This also fix a race condition where the unix permission is set before
   the whole file is written, which might be used by attackers.
 - Optimize `extract_zip`: Use one `BytesMut` for entire extraction process
   To avoid frequent allocation and deallocation.
 - Optimize `extract_zip_entry`: Inc prob of reusing alloc in `BytesMut`

   Performs the reserve before sending the buf over mpsc queue to
   increase the possibility of reusing the previous allocation.

   NOTE: `BytesMut` only reuses the previous allocation if it is the
   only one holds the reference to it, which is either on the first
   allocation or all the `Bytes` in the mpsc queue has been consumed,
   written to the file and dropped.

   Since reading from entry would have to wait for external file I/O,
   this would give the blocking thread some time to flush `Bytes`
   out.
 - Disable unused feature fs of dep tokio

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
This commit is contained in:
Jiahao XU 2022-12-12 14:15:30 +11:00 committed by GitHub
parent 058208bae9
commit db45f2fb7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 234 additions and 263 deletions

View file

@ -1,4 +1,4 @@
use std::{fmt::Debug, future::Future, io, marker::PhantomData, path::Path, pin::Pin};
use std::{fmt::Debug, io, marker::PhantomData, path::Path};
use binstalk_types::cargo_toml_binstall::{PkgFmtDecomposed, TarBasedFmt};
use digest::{Digest, FixedOutput, HashMarker, Output, OutputSizeUser, Update};
@ -14,7 +14,8 @@ mod async_extracter;
use async_extracter::*;
mod async_tar_visitor;
pub use async_tar_visitor::*;
use async_tar_visitor::extract_tar_based_stream_and_visit;
pub use async_tar_visitor::{TarEntriesVisitor, TarEntry, TarEntryType};
mod extracter;
mod stream_readable;
@ -23,9 +24,6 @@ mod zip_extraction;
pub use zip_extraction::ZipError;
mod utils;
use utils::await_on_option;
pub type CancellationFuture = Option<Pin<Box<dyn Future<Output = Result<(), io::Error>> + Send>>>;
#[derive(Debug, ThisError)]
pub enum DownloadError {
@ -102,12 +100,11 @@ impl Download {
///
/// NOTE that this API does not support gnu extension sparse file unlike
/// [`Download::and_extract`].
#[instrument(skip(visitor, cancellation_future))]
#[instrument(skip(visitor))]
pub async fn and_visit_tar<V: TarEntriesVisitor + Debug + Send + 'static>(
self,
fmt: TarBasedFmt,
visitor: V,
cancellation_future: CancellationFuture,
) -> Result<V::Target, DownloadError> {
let stream = self
.client
@ -117,14 +114,7 @@ impl Download {
debug!("Downloading and extracting then in-memory processing");
let ret = tokio::select! {
biased;
res = await_on_option(cancellation_future) => {
Err(res.err().unwrap_or_else(|| io::Error::from(DownloadError::UserAbort)))?
}
res = extract_tar_based_stream_and_visit(stream, fmt, visitor) => res?,
};
let ret = extract_tar_based_stream_and_visit(stream, fmt, visitor).await?;
debug!("Download, extraction and in-memory procession OK");
@ -135,19 +125,13 @@ impl Download {
///
/// `cancellation_future` can be used to cancel the extraction and return
/// [`DownloadError::UserAbort`] error.
#[instrument(skip(path, cancellation_future))]
#[instrument(skip(path))]
pub async fn and_extract(
self,
fmt: PkgFmt,
path: impl AsRef<Path>,
cancellation_future: CancellationFuture,
) -> Result<(), DownloadError> {
async fn inner(
this: Download,
fmt: PkgFmt,
path: &Path,
cancellation_future: CancellationFuture,
) -> Result<(), DownloadError> {
async fn inner(this: Download, fmt: PkgFmt, path: &Path) -> Result<(), DownloadError> {
let stream = this
.client
.get_stream(this.url)
@ -157,11 +141,9 @@ impl Download {
debug!("Downloading and extracting to: '{}'", path.display());
match fmt.decompose() {
PkgFmtDecomposed::Tar(fmt) => {
extract_tar_based_stream(stream, path, fmt, cancellation_future).await?
}
PkgFmtDecomposed::Bin => extract_bin(stream, path, cancellation_future).await?,
PkgFmtDecomposed::Zip => extract_zip(stream, path, cancellation_future).await?,
PkgFmtDecomposed::Tar(fmt) => extract_tar_based_stream(stream, path, fmt).await?,
PkgFmtDecomposed::Bin => extract_bin(stream, path).await?,
PkgFmtDecomposed::Zip => extract_zip(stream, path).await?,
}
debug!("Download OK, extracted to: '{}'", path.display());
@ -169,7 +151,7 @@ impl Download {
Ok(())
}
inner(self, fmt, path.as_ref(), cancellation_future).await
inner(self, fmt, path.as_ref()).await
}
}