Return a list of files written to disk in binstalk_downloader::download::Download::and_extract (#856)

to avoid collecting extracted files from disk again in resolution stage.

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
This commit is contained in:
Jiahao XU 2023-03-03 23:31:27 +11:00 committed by GitHub
parent 44ac63ce0d
commit 9c7da6a179
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 366 additions and 84 deletions

View file

@ -1,4 +1,5 @@
use std::{
borrow::Cow,
fs,
future::Future,
io::{self, Write},
@ -13,11 +14,12 @@ use tokio_util::io::StreamReader;
use tracing::debug;
use super::{
extracter::*, zip_extraction::extract_zip_entry, DownloadError, TarBasedFmt, ZipError,
extracter::*, zip_extraction::extract_zip_entry, DownloadError, ExtractedFiles, TarBasedFmt,
ZipError,
};
use crate::utils::{extract_with_blocking_task, StreamReadable};
pub async fn extract_bin<S>(stream: S, path: &Path) -> Result<(), DownloadError>
pub async fn extract_bin<S>(stream: S, path: &Path) -> Result<ExtractedFiles, DownloadError>
where
S: Stream<Item = Result<Bytes, DownloadError>> + Send + Sync + Unpin + 'static,
{
@ -32,10 +34,16 @@ where
file.flush()
})
.await
.await?;
let mut extracted_files = ExtractedFiles::new();
extracted_files.add_file(Path::new(path.file_name().unwrap()));
Ok(extracted_files)
}
pub async fn extract_zip<S>(stream: S, path: &Path) -> Result<(), DownloadError>
pub async fn extract_zip<S>(stream: S, path: &Path) -> Result<ExtractedFiles, DownloadError>
where
S: Stream<Item = Result<Bytes, DownloadError>> + Unpin + Send + Sync + 'static,
{
@ -44,9 +52,10 @@ where
let reader = StreamReader::new(stream);
let mut zip = ZipFileReader::new(reader);
let mut buf = BytesMut::with_capacity(4 * 4096);
let mut extracted_files = ExtractedFiles::new();
while let Some(mut zip_reader) = zip.next_entry().await.map_err(ZipError::from_inner)? {
extract_zip_entry(&mut zip_reader, path, &mut buf).await?;
extract_zip_entry(&mut zip_reader, path, &mut buf, &mut extracted_files).await?;
// extract_zip_entry would read the zip_reader until read the file until
// eof unless extract_zip itself is cancelled or an error is raised.
@ -55,33 +64,82 @@ where
zip = zip_reader.done().await.map_err(ZipError::from_inner)?;
}
Ok(())
Ok(extracted_files)
}
pub async fn extract_tar_based_stream<S>(
stream: S,
path: &Path,
dst: &Path,
fmt: TarBasedFmt,
) -> Result<(), DownloadError>
) -> Result<ExtractedFiles, DownloadError>
where
S: Stream<Item = Result<Bytes, DownloadError>> + Send + Sync + Unpin + 'static,
{
debug!("Extracting from {fmt} archive to {path:#?}");
debug!("Extracting from {fmt} archive to {}", dst.display());
extract_with_blocking_decoder(stream, path, move |rx, path| {
create_tar_decoder(StreamReadable::new(rx), fmt)?.unpack(path)
extract_with_blocking_decoder(stream, dst, move |rx, dst| {
// Adapted from https://docs.rs/tar/latest/src/tar/archive.rs.html#189-219
if dst.symlink_metadata().is_err() {
fs::create_dir_all(dst)?;
}
// Canonicalizing the dst directory will prepend the path with '\\?\'
// on windows which will allow windows APIs to treat the path as an
// extended-length path with a 32,767 character limit. Otherwise all
// unpacked paths over 260 characters will fail on creation with a
// NotFound exception.
let dst = &dst
.canonicalize()
.map(Cow::Owned)
.unwrap_or(Cow::Borrowed(dst));
let mut tar = create_tar_decoder(StreamReadable::new(rx), fmt)?;
let mut entries = tar.entries()?;
let mut extracted_files = ExtractedFiles::new();
// Delay any directory entries until the end (they will be created if needed by
// descendants), to ensure that directory permissions do not interfer with descendant
// extraction.
let mut directories = Vec::new();
while let Some(mut entry) = entries.next().transpose()? {
match entry.header().entry_type() {
tar::EntryType::Regular => {
// unpack_in returns false if the path contains ".."
// and is skipped.
if entry.unpack_in(dst)? {
extracted_files.add_file(&entry.path()?);
}
}
tar::EntryType::Directory => {
directories.push(entry);
}
_ => (),
}
}
for mut dir in directories {
if dir.unpack_in(dst)? {
extracted_files.add_dir(&dir.path()?);
}
}
Ok(extracted_files)
})
.await
}
fn extract_with_blocking_decoder<S, F>(
fn extract_with_blocking_decoder<S, F, T>(
stream: S,
path: &Path,
f: F,
) -> impl Future<Output = Result<(), DownloadError>>
) -> impl Future<Output = Result<T, DownloadError>>
where
S: Stream<Item = Result<Bytes, DownloadError>> + Send + Sync + Unpin + 'static,
F: FnOnce(mpsc::Receiver<Bytes>, &Path) -> io::Result<()> + Send + Sync + 'static,
F: FnOnce(mpsc::Receiver<Bytes>, &Path) -> io::Result<T> + Send + Sync + 'static,
T: Send + 'static,
{
let path = path.to_owned();