Optimize extract_zip: Use async_zip::read::stream::ZipFileReader to avoid temporary file ()

* Add new dep async_zip v0.0.9 to binstalk-downloader
   with features "gzip", "zstd", "xz", "bzip2", "tokio".
* Refactor: Simplify `async_extracter::extract_*` API
* Refactor: Create newtype wrapper of `ZipError`
   so that the zip can be upgraded without affecting API of this crate.
* Enable feature fs of dep tokio in binstalk-downloader
* Rewrite `extract_zip` to use `async_zip::read::stream::ZipFileReader`
   which avoids writing the zip file to a temporary file and then read it
   back into memory.
* Refactor: Impl new fn `await_on_option` and use it
* Optimize `tokio::select!`: Make them biased and check for cancellation first
  to make cancellation takes effect ASAP.
* Rm unused dep zip from binstalk-downloader

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
This commit is contained in:
Jiahao XU 2022-12-12 11:44:34 +11:00 committed by GitHub
parent e6f969245a
commit 3b1a7f2c78
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 372 additions and 94 deletions
crates/binstalk-downloader/src/download

View file

@ -1,24 +1,25 @@
use std::{fs, io::Seek, path::Path};
use std::{fs, path::Path};
use async_zip::read::stream::ZipFileReader;
use bytes::Bytes;
use futures_util::stream::Stream;
use scopeguard::{guard, ScopeGuard};
use tempfile::tempfile;
use tokio::task::block_in_place;
use tokio_util::io::StreamReader;
use tracing::debug;
use super::{
extracter::*, stream_readable::StreamReadable, CancellationFuture, DownloadError, TarBasedFmt,
await_on_option, extracter::*, stream_readable::StreamReadable,
zip_extraction::extract_zip_entry, CancellationFuture, DownloadError, TarBasedFmt, ZipError,
};
pub async fn extract_bin<S, E>(
pub async fn extract_bin<S>(
stream: S,
path: &Path,
cancellation_future: CancellationFuture,
) -> Result<(), DownloadError>
where
S: Stream<Item = Result<Bytes, E>> + Unpin + 'static,
DownloadError: From<E>,
S: Stream<Item = Result<Bytes, DownloadError>> + Unpin + 'static,
{
let mut reader = StreamReadable::new(stream, cancellation_future).await;
block_in_place(move || {
@ -42,39 +43,45 @@ where
})
}
pub async fn extract_zip<S, E>(
pub async fn extract_zip<S>(
stream: S,
path: &Path,
cancellation_future: CancellationFuture,
) -> Result<(), DownloadError>
where
S: Stream<Item = Result<Bytes, E>> + Unpin + 'static,
DownloadError: From<E>,
S: Stream<Item = Result<Bytes, DownloadError>> + Unpin + Send + Sync + 'static,
{
let mut reader = StreamReadable::new(stream, cancellation_future).await;
block_in_place(move || {
fs::create_dir_all(path.parent().unwrap())?;
debug!("Decompressing from zip archive to `{}`", path.display());
let mut file = tempfile()?;
let extract_future = Box::pin(async move {
let reader = StreamReader::new(stream);
let mut zip = ZipFileReader::new(reader);
reader.copy(&mut file)?;
while let Some(entry) = zip.entry_reader().await.map_err(ZipError::from_inner)? {
extract_zip_entry(entry, path).await?;
}
// rewind it so that we can pass it to unzip
file.rewind()?;
Ok(())
});
unzip(file, path)
})
tokio::select! {
biased;
res = await_on_option(cancellation_future) => {
Err(res.err().map(DownloadError::from).unwrap_or(DownloadError::UserAbort))
}
res = extract_future => res,
}
}
pub async fn extract_tar_based_stream<S, E>(
pub async fn extract_tar_based_stream<S>(
stream: S,
path: &Path,
fmt: TarBasedFmt,
cancellation_future: CancellationFuture,
) -> Result<(), DownloadError>
where
S: Stream<Item = Result<Bytes, E>> + Unpin + 'static,
DownloadError: From<E>,
S: Stream<Item = Result<Bytes, DownloadError>> + Unpin + 'static,
{
let reader = StreamReadable::new(stream, cancellation_future).await;
block_in_place(move || {