cargo-binstall/crates/binstalk-downloader/src/download.rs
Jiahao XU db45f2fb7f
Refactor binstalk-downloader APIs: Remove cancellation_future plus optimizations (#591)
- Refactor: Mv fn `utils::asyncify` into mod `utils`
 - Improve err msg for task failure in `utils::asyncify`
 - Make sure `asyncify` always returns the same annoymous type
   that implements `Future` if the `T` is same.
 - Rewrite `extract_bin` to avoid `block_in_place`
   support cancellation by dropping
 - Rm unused dep scopeguard from binstalk-downloader
 - Rewrite `extract_tar_based_stream` so that it is cancellable by dropping
 - Unbox `extract_future` in `async_extracter::extract_zip`
 - Refactor `Download` API: Remove `CancellationFuture` as param

   since all futures returned by `Download::and_*` does not call
   `block_in_place`, so they can be cancelled by drop instead of using this
   cumbersome hack.
 - Fix exports from mod `async_tar_visitor`
 - Make `signal::{ignore_signals, wait_on_cancellation_signal}` private
 - Rm the global variable `CANCELLED` in `wait_on_cancellation_signal`
   and rm fn `wait_on_cancellation_signal_inner`
 - Optimize `wait_on_cancellation_signal`: Avoid `tokio::select!` on `not(unix)`
 - Rm unnecessary `tokio::select!` in `wait_on_cancellation_signal` on unix
   Since `unix::wait_on_cancellation_signal_unix` already waits for ctrl + c signal.
 - Optimize `extract_bin`: Send `Bytes` to blocking thread for zero-copy
 - Optimize `extract_with_blocking_decoder`: Avoid dup monomorphization
 - Box fut of `fetch_crate_cratesio` in `PackageInfo::resolve`
 - Optimize `extract_zip_entry`: Spawn only one blocking task per fn call

   by using a mspc queue for the data to be written to the `outfile`.

   This would improve efficiency as using `tokio::fs::File` is expensive:
   It spawns a new blocking task, which needs one heap allocation and then
   pushed to a mpmc queue, and then wait for it to be done on every loop.

   This also fix a race condition where the unix permission is set before
   the whole file is written, which might be used by attackers.
 - Optimize `extract_zip`: Use one `BytesMut` for entire extraction process
   To avoid frequent allocation and deallocation.
 - Optimize `extract_zip_entry`: Inc prob of reusing alloc in `BytesMut`

   Performs the reserve before sending the buf over mpsc queue to
   increase the possibility of reusing the previous allocation.

   NOTE: `BytesMut` only reuses the previous allocation if it is the
   only one holds the reference to it, which is either on the first
   allocation or all the `Bytes` in the mpsc queue has been consumed,
   written to the file and dropped.

   Since reading from entry would have to wait for external file I/O,
   this would give the blocking thread some time to flush `Bytes`
   out.
 - Disable unused feature fs of dep tokio

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
2022-12-12 03:15:30 +00:00

186 lines
5.1 KiB
Rust

use std::{fmt::Debug, io, marker::PhantomData, path::Path};
use binstalk_types::cargo_toml_binstall::{PkgFmtDecomposed, TarBasedFmt};
use digest::{Digest, FixedOutput, HashMarker, Output, OutputSizeUser, Update};
use futures_util::stream::StreamExt;
use thiserror::Error as ThisError;
use tracing::{debug, instrument};
pub use binstalk_types::cargo_toml_binstall::PkgFmt;
use crate::remote::{Client, Error as RemoteError, Url};
mod async_extracter;
use async_extracter::*;
mod async_tar_visitor;
use async_tar_visitor::extract_tar_based_stream_and_visit;
pub use async_tar_visitor::{TarEntriesVisitor, TarEntry, TarEntryType};
mod extracter;
mod stream_readable;
mod zip_extraction;
pub use zip_extraction::ZipError;
mod utils;
#[derive(Debug, ThisError)]
pub enum DownloadError {
#[error(transparent)]
Unzip(#[from] ZipError),
#[error(transparent)]
Remote(#[from] RemoteError),
/// A generic I/O error.
///
/// - Code: `binstall::io`
/// - Exit: 74
#[error(transparent)]
Io(io::Error),
#[error("installation cancelled by user")]
UserAbort,
}
impl From<io::Error> for DownloadError {
fn from(err: io::Error) -> Self {
if err.get_ref().is_some() {
let kind = err.kind();
let inner = err
.into_inner()
.expect("err.get_ref() returns Some, so err.into_inner() should also return Some");
inner
.downcast()
.map(|b| *b)
.unwrap_or_else(|err| DownloadError::Io(io::Error::new(kind, err)))
} else {
DownloadError::Io(err)
}
}
}
impl From<DownloadError> for io::Error {
fn from(e: DownloadError) -> io::Error {
match e {
DownloadError::Io(io_error) => io_error,
e => io::Error::new(io::ErrorKind::Other, e),
}
}
}
#[derive(Debug)]
pub struct Download<D: Digest = NoDigest> {
client: Client,
url: Url,
_digest: PhantomData<D>,
_checksum: Vec<u8>,
}
impl Download {
pub fn new(client: Client, url: Url) -> Self {
Self {
client,
url,
_digest: PhantomData::default(),
_checksum: Vec::new(),
}
}
/// Download a file from the provided URL and process them in memory.
///
/// This does not support verifying a checksum due to the partial extraction
/// and will ignore one if specified.
///
/// `cancellation_future` can be used to cancel the extraction and return
/// [`DownloadError::UserAbort`] error.
///
/// NOTE that this API does not support gnu extension sparse file unlike
/// [`Download::and_extract`].
#[instrument(skip(visitor))]
pub async fn and_visit_tar<V: TarEntriesVisitor + Debug + Send + 'static>(
self,
fmt: TarBasedFmt,
visitor: V,
) -> Result<V::Target, DownloadError> {
let stream = self
.client
.get_stream(self.url)
.await?
.map(|res| res.map_err(DownloadError::from));
debug!("Downloading and extracting then in-memory processing");
let ret = extract_tar_based_stream_and_visit(stream, fmt, visitor).await?;
debug!("Download, extraction and in-memory procession OK");
Ok(ret)
}
/// Download a file from the provided URL and extract it to the provided path.
///
/// `cancellation_future` can be used to cancel the extraction and return
/// [`DownloadError::UserAbort`] error.
#[instrument(skip(path))]
pub async fn and_extract(
self,
fmt: PkgFmt,
path: impl AsRef<Path>,
) -> Result<(), DownloadError> {
async fn inner(this: Download, fmt: PkgFmt, path: &Path) -> Result<(), DownloadError> {
let stream = this
.client
.get_stream(this.url)
.await?
.map(|res| res.map_err(DownloadError::from));
debug!("Downloading and extracting to: '{}'", path.display());
match fmt.decompose() {
PkgFmtDecomposed::Tar(fmt) => extract_tar_based_stream(stream, path, fmt).await?,
PkgFmtDecomposed::Bin => extract_bin(stream, path).await?,
PkgFmtDecomposed::Zip => extract_zip(stream, path).await?,
}
debug!("Download OK, extracted to: '{}'", path.display());
Ok(())
}
inner(self, fmt, path.as_ref()).await
}
}
impl<D: Digest> Download<D> {
pub fn new_with_checksum(client: Client, url: Url, checksum: Vec<u8>) -> Self {
Self {
client,
url,
_digest: PhantomData::default(),
_checksum: checksum,
}
}
// TODO: implement checking the sum, may involve bringing (parts of) and_extract() back in here
}
#[derive(Clone, Copy, Debug, Default)]
pub struct NoDigest;
impl FixedOutput for NoDigest {
fn finalize_into(self, _out: &mut Output<Self>) {}
}
impl OutputSizeUser for NoDigest {
type OutputSize = generic_array::typenum::U0;
}
impl Update for NoDigest {
fn update(&mut self, _data: &[u8]) {}
}
impl HashMarker for NoDigest {}