From e704abe7ac3bd292101729ed57cdfe5bb1f7295a Mon Sep 17 00:00:00 2001 From: Jiahao XU <30436523+NobodyXu@users.noreply.github.com> Date: Wed, 30 Oct 2024 10:16:54 +1000 Subject: [PATCH] Use rc-zip-sync for zip extraction (#1942) * Use rc-zip-sync for zip extraction Fixed #1080 In this commit, binstalk-downloader is updated to - first download the zip into a temporary file, since there is no correct way to extract zip from a stream. - then use rc-zip-sync to read from the zip and extract it to filesystem. Signed-off-by: Jiahao XU * Fix returned `ExtractedFiles` in `do_extract_zip` Signed-off-by: Jiahao XU <30436523+NobodyXu@users.noreply.github.com> * Fix clippy in zip_extraction.rs Signed-off-by: Jiahao XU <30436523+NobodyXu@users.noreply.github.com> --------- Signed-off-by: Jiahao XU Signed-off-by: Jiahao XU <30436523+NobodyXu@users.noreply.github.com> --- Cargo.lock | 383 +++++++++++++++--- crates/binstalk-downloader/Cargo.toml | 17 +- crates/binstalk-downloader/src/download.rs | 2 +- .../src/download/async_extracter.rs | 73 ++-- .../src/download/zip_extraction.rs | 258 +++--------- 5 files changed, 406 insertions(+), 327 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a6f6e2ff..b2948b45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "gimli", ] +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "adler2" version = "2.0.0" @@ -59,6 +65,21 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.17" @@ -134,10 +155,8 @@ checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" dependencies = [ "brotli", "bzip2", - "deflate64", "flate2", "futures-core", - "futures-io", "memchr", "pin-project-lite", "tokio", @@ -168,21 +187,6 @@ dependencies = [ "syn", ] -[[package]] -name = "async_zip" -version = "0.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b9f7252833d5ed4b00aa9604b563529dd5e11de9c23615de2dcdf91eb87b52" -dependencies = [ - "async-compression", - "crc32fast", - "futures-lite", - "pin-project", - "thiserror", - "tokio", - "tokio-util", -] - [[package]] name = "atomic-file-install" version = "1.0.5" @@ -214,7 +218,7 @@ dependencies = [ "addr2line", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.8.0", "object", "rustc-demangle", "windows-targets 0.52.6", @@ -302,11 +306,11 @@ version = "0.13.1" dependencies = [ "async-compression", "async-trait", - "async_zip", "binstalk-types", "binstall-tar", "bytes", "bzip2", + "cfg-if", "compact_str", "default-net", "flate2", @@ -317,6 +321,7 @@ dependencies = [ "ipconfig", "native-tls", "once_cell", + "rc-zip-sync", "reqwest", "serde", "serde_json", @@ -658,6 +663,31 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chardetng" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea" +dependencies = [ + "cfg-if", + "encoding_rs", + "memchr", +] + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.52.6", +] + [[package]] name = "clap" version = "4.5.20" @@ -771,6 +801,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.4.2" @@ -1040,7 +1085,7 @@ checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", "libz-ng-sys", - "miniz_oxide", + "miniz_oxide 0.8.0", ] [[package]] @@ -1138,19 +1183,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" -[[package]] -name = "futures-lite" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52527eb5074e35e9339c6b4e8d12600c7128b68fb25dcb9fa9dec18f7c25f3a5" -dependencies = [ - "fastrand", - "futures-core", - "futures-io", - "parking", - "pin-project-lite", -] - [[package]] name = "futures-macro" version = "0.3.31" @@ -1282,7 +1314,7 @@ dependencies = [ "gix-utils", "itoa", "thiserror", - "winnow", + "winnow 0.6.20", ] [[package]] @@ -1364,7 +1396,7 @@ dependencies = [ "smallvec", "thiserror", "unicode-bom", - "winnow", + "winnow 0.6.20", ] [[package]] @@ -1611,7 +1643,7 @@ dependencies = [ "itoa", "smallvec", "thiserror", - "winnow", + "winnow 0.6.20", ] [[package]] @@ -1736,7 +1768,7 @@ dependencies = [ "gix-utils", "maybe-async", "thiserror", - "winnow", + "winnow 0.6.20", ] [[package]] @@ -1768,7 +1800,7 @@ dependencies = [ "gix-validate", "memmap2", "thiserror", - "winnow", + "winnow 0.6.20", ] [[package]] @@ -2302,6 +2334,29 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core 0.52.0", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "idna" version = "0.5.0" @@ -2546,6 +2601,16 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "lzma-rs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e" +dependencies = [ + "byteorder", + "crc", +] + [[package]] name = "lzma-sys" version = "0.1.20" @@ -2656,6 +2721,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a05b5d0594e0cb1ad8cee3373018d2b84e25905dc75b2468114cc9a8e86cfc20" +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + [[package]] name = "miniz_oxide" version = "0.8.0" @@ -2773,6 +2847,36 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_enum" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "num_threads" version = "0.1.7" @@ -2791,6 +2895,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "oem_cp" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "330138902ab4dab09a86e6b7ab7ddeffb5f8435d52fe0df1bce8b06a17b10ee4" +dependencies = [ + "phf", + "phf_codegen", + "serde", + "serde_json", +] + [[package]] name = "once_cell" version = "1.20.2" @@ -2857,24 +2973,45 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "oval" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135cef32720c6746450d910890b0b69bcba2bbf6f85c9f4583df13fe415de828" + [[package]] name = "overload" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "ownable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcba94d1536fcc470287d96fd26356c38da8215fdb9a74285b09621f35d9350" +dependencies = [ + "ownable-macro", +] + +[[package]] +name = "ownable-macro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2c91d2781624dec1234581a1a01e63638f36546ad72ee82873ac1b84f41117b" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "owo-colors" version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb37767f6569cd834a413442455e0f066d0d522de8630436e2a1761d9726ba56" -[[package]] -name = "parking" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" - [[package]] name = "parking_lot" version = "0.12.3" @@ -2911,23 +3048,41 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] -name = "pin-project" -version = "1.1.7" +name = "phf" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ - "pin-project-internal", + "phf_shared", ] [[package]] -name = "pin-project-internal" -version = "1.1.7" +name = "phf_codegen" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ - "proc-macro2", - "quote", - "syn", + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", ] [[package]] @@ -2948,6 +3103,17 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "positioned-io" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccabfeeb89c73adf4081f0dca7f8e28dbda90981a222ceea37f619e93ea6afe9" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -2963,6 +3129,38 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "proc-macro-crate" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro2" version = "1.0.89" @@ -3078,6 +3276,42 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rc-zip" +version = "5.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14e24471baa77d6e0e8be04c7d62d5e3c3454e7f1863ed7d74faddd6e4786007" +dependencies = [ + "bzip2", + "chardetng", + "chrono", + "crc32fast", + "deflate64", + "encoding_rs", + "lzma-rs", + "miniz_oxide 0.7.4", + "num_enum", + "oem_cp", + "oval", + "ownable", + "thiserror", + "tracing", + "winnow 0.5.40", + "zstd", +] + +[[package]] +name = "rc-zip-sync" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdebaff0e109a140208ab5ffbbe4862aada391d94456cbf7887757f3078a328b" +dependencies = [ + "oval", + "positioned-io", + "rc-zip", + "tracing", +] + [[package]] name = "redox_syscall" version = "0.3.5" @@ -3251,9 +3485,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" dependencies = [ "bitflags 2.6.0", "errno 0.3.9", @@ -3264,9 +3498,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.15" +version = "0.23.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" dependencies = [ "log", "once_cell", @@ -3528,6 +3762,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -3886,7 +4126,6 @@ checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", - "futures-io", "futures-sink", "pin-project-lite", "tokio", @@ -3923,7 +4162,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.6.20", ] [[package]] @@ -4234,9 +4473,9 @@ checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -4316,7 +4555,16 @@ version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" dependencies = [ - "windows-core", + "windows-core 0.58.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ "windows-targets 0.52.6", ] @@ -4533,6 +4781,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "0.6.20" diff --git a/crates/binstalk-downloader/Cargo.toml b/crates/binstalk-downloader/Cargo.toml index 05a8e52d..73e58948 100644 --- a/crates/binstalk-downloader/Cargo.toml +++ b/crates/binstalk-downloader/Cargo.toml @@ -18,23 +18,22 @@ async-compression = { version = "0.4.4", features = [ "bzip2", "tokio", ] } -async_zip = { version = "0.0.17", features = [ - "deflate", - "deflate64", - "bzip2", - "lzma", - "zstd", - "xz", - "tokio", -] } binstalk-types = { version = "0.9.0", path = "../binstalk-types" } bytes = "1.4.0" bzip2 = "0.4.4" +cfg-if = "1" compact_str = "0.8.0" flate2 = { version = "1.0.28", default-features = false } futures-util = "0.3.30" futures-io = "0.3.30" httpdate = "1.0.2" +rc-zip-sync = { version = "4.2.3", features = [ + "deflate", + "bzip2", + "deflate64", + "lzma", + "zstd", +] } reqwest = { version = "0.12.5", features = [ "http2", "stream", diff --git a/crates/binstalk-downloader/src/download.rs b/crates/binstalk-downloader/src/download.rs index bfbd21f0..78943730 100644 --- a/crates/binstalk-downloader/src/download.rs +++ b/crates/binstalk-downloader/src/download.rs @@ -7,6 +7,7 @@ use thiserror::Error as ThisError; use tracing::{debug, error, instrument}; pub use binstalk_types::cargo_toml_binstall::{PkgFmt, TarBasedFmt}; +pub use rc_zip_sync::rc_zip::error::Error as ZipError; use crate::remote::{Client, Error as RemoteError, Response, Url}; @@ -23,7 +24,6 @@ mod extracted_files; pub use extracted_files::{ExtractedFiles, ExtractedFilesEntry}; mod zip_extraction; -pub use zip_extraction::ZipError; #[derive(Debug, ThisError)] #[non_exhaustive] diff --git a/crates/binstalk-downloader/src/download/async_extracter.rs b/crates/binstalk-downloader/src/download/async_extracter.rs index 6add5830..ca2f0710 100644 --- a/crates/binstalk-downloader/src/download/async_extracter.rs +++ b/crates/binstalk-downloader/src/download/async_extracter.rs @@ -6,18 +6,17 @@ use std::{ path::{Component, Path, PathBuf}, }; -use async_zip::base::read::stream::ZipFileReader; -use bytes::{Bytes, BytesMut}; +use bytes::Bytes; use futures_util::Stream; +use tempfile::tempfile as create_tmpfile; use tokio::sync::mpsc; -use tokio_util::io::StreamReader; use tracing::debug; -use super::{ - extracter::*, zip_extraction::extract_zip_entry, DownloadError, ExtractedFiles, TarBasedFmt, - ZipError, +use super::{extracter::*, DownloadError, ExtractedFiles, TarBasedFmt}; +use crate::{ + download::zip_extraction::do_extract_zip, + utils::{extract_with_blocking_task, StreamReadable}, }; -use crate::utils::{extract_with_blocking_task, StreamReadable}; pub async fn extract_bin(stream: S, path: &Path) -> Result where @@ -25,52 +24,30 @@ where { debug!("Writing to `{}`", path.display()); - extract_with_blocking_decoder(stream, path, |mut rx, path| { - let mut file = fs::File::create(path)?; + extract_with_blocking_decoder(stream, path, |rx, path| { + let mut extracted_files = ExtractedFiles::new(); - while let Some(bytes) = rx.blocking_recv() { - file.write_all(&bytes)?; - } + extracted_files.add_file(Path::new(path.file_name().unwrap())); - file.flush() + write_stream_to_file(rx, fs::File::create(path)?)?; + + Ok(extracted_files) }) - .await?; - - let mut extracted_files = ExtractedFiles::new(); - - extracted_files.add_file(Path::new(path.file_name().unwrap())); - - Ok(extracted_files) + .await } pub async fn extract_zip(stream: S, path: &Path) -> Result where S: Stream> + Unpin + Send + Sync, { - debug!("Decompressing from zip archive to `{}`", path.display()); + debug!("Downloading from zip archive to tempfile"); - let reader = StreamReader::new(stream); - let mut zip = ZipFileReader::with_tokio(reader); - let mut buf = BytesMut::with_capacity(4 * 4096); - let mut extracted_files = ExtractedFiles::new(); + extract_with_blocking_decoder(stream, path, |rx, path| { + debug!("Decompressing from zip archive to `{}`", path.display()); - while let Some(mut zip_reader) = zip.next_with_entry().await.map_err(ZipError::from_inner)? { - extract_zip_entry( - zip_reader.reader_mut(), - path, - &mut buf, - &mut extracted_files, - ) - .await?; - - // extract_zip_entry would read the zip_reader until read the file until - // eof unless extract_zip itself is cancelled or an error is raised. - // - // So calling done here should not raise any error. - zip = zip_reader.done().await.map_err(ZipError::from_inner)?; - } - - Ok(extracted_files) + do_extract_zip(write_stream_to_file(rx, create_tmpfile()?)?, path).map_err(io::Error::from) + }) + .await } pub async fn extract_tar_based_stream( @@ -176,3 +153,15 @@ where f(rx, &path) }) } + +fn write_stream_to_file(mut rx: mpsc::Receiver, f: fs::File) -> io::Result { + let mut f = io::BufWriter::new(f); + + while let Some(bytes) = rx.blocking_recv() { + f.write_all(&bytes)?; + } + + f.flush()?; + + f.into_inner().map_err(io::IntoInnerError::into_error) +} diff --git a/crates/binstalk-downloader/src/download/zip_extraction.rs b/crates/binstalk-downloader/src/download/zip_extraction.rs index 1cf028ad..0bdfc425 100644 --- a/crates/binstalk-downloader/src/download/zip_extraction.rs +++ b/crates/binstalk-downloader/src/download/zip_extraction.rs @@ -1,232 +1,66 @@ use std::{ - borrow::Cow, - io::Write, - path::{Component, Path, PathBuf}, + fs::{self, create_dir_all, File}, + io::{self, Read}, + path::Path, }; -use async_zip::{ - base::{read::WithEntry, read::ZipEntryReader}, - ZipString, -}; -use bytes::{Bytes, BytesMut}; -use futures_util::future::try_join; -use thiserror::Error as ThisError; -use tokio::{ - io::{AsyncRead, AsyncReadExt}, - sync::mpsc, -}; -use tokio_util::compat::FuturesAsyncReadCompatExt; +use cfg_if::cfg_if; +use rc_zip_sync::{rc_zip::parse::EntryKind, ReadZip}; use super::{DownloadError, ExtractedFiles}; -use crate::utils::asyncify; -#[derive(Debug, ThisError)] -enum ZipErrorInner { - #[error(transparent)] - Inner(#[from] async_zip::error::ZipError), +pub(super) fn do_extract_zip(f: File, dir: &Path) -> Result { + let mut extracted_files = ExtractedFiles::new(); - #[error("Invalid file path: {0}")] - InvalidFilePath(Box), -} + for entry in f.read_zip()?.entries() { + let Some(name) = entry.sanitized_name().map(Path::new) else { + continue; + }; + let path = dir.join(name); -#[derive(Debug, ThisError)] -#[error(transparent)] -pub struct ZipError(#[from] ZipErrorInner); + let do_extract_file = || { + let mut entry_writer = File::create(&path)?; + let mut entry_reader = entry.reader(); + io::copy(&mut entry_reader, &mut entry_writer)?; -impl ZipError { - pub(super) fn from_inner(err: async_zip::error::ZipError) -> Self { - Self(ZipErrorInner::Inner(err)) - } -} - -pub(super) async fn extract_zip_entry( - zip_reader: &mut ZipEntryReader<'_, R, WithEntry<'_>>, - path: &Path, - buf: &mut BytesMut, - extracted_files: &mut ExtractedFiles, -) -> Result<(), DownloadError> -where - R: futures_io::AsyncBufRead + Unpin + Send + Sync, -{ - // Sanitize filename - let raw_filename = zip_reader.entry().filename(); - let (filename, is_dir) = check_filename_and_normalize(raw_filename)?; - - // Calculates the outpath - let outpath = path.join(&filename); - - // Get permissions - #[cfg_attr(not(unix), allow(unused_mut))] - let mut perms = None; - - #[cfg(unix)] - { - use std::{fs::Permissions, os::unix::fs::PermissionsExt}; - - if let Some(mode) = zip_reader.entry().unix_permissions() { - // If it is a dir, then it needs to be at least rwx for the current - // user so that we can create new files, search for existing files - // and list its contents. - // - // If it is a file, then it needs to be at least readable for the - // current user. - let mode: u16 = mode | if is_dir { 0o700 } else { 0o400 }; - perms = Some(Permissions::from_mode(mode as u32)); - } - } - - if is_dir { - extracted_files.add_dir(&filename); - - // This entry is a dir. - asyncify(move || { - std::fs::create_dir_all(&outpath)?; - if let Some(perms) = perms { - std::fs::set_permissions(&outpath, perms)?; - } - - Ok(()) - }) - .await?; - } else { - extracted_files.add_file(&filename); - - // Use channel size = 5 to minimize the waiting time in the extraction task - let (tx, mut rx) = mpsc::channel::(5); - - // This entry is a file. - - let write_task = asyncify(move || { - if let Some(p) = outpath.parent() { - std::fs::create_dir_all(p)?; - } - let mut outfile = std::fs::File::create(&outpath)?; - - while let Some(bytes) = rx.blocking_recv() { - outfile.write_all(&bytes)?; - } - - outfile.flush()?; - - if let Some(perms) = perms { - outfile.set_permissions(perms)?; - } - - Ok(()) - }); - - let read_task = async move { - // Read everything into `tx` - copy_file_to_mpsc(zip_reader.compat(), tx, buf).await?; - // Check crc32 checksum. - // - // NOTE that since everything is alread read into the channel, - // this function should not read any byte into the `Vec` and - // should return `0`. - assert_eq!(zip_reader.read_to_end_checked(&mut Vec::new()).await?, 0); - Ok(()) + Ok::<_, io::Error>(()) }; - try_join( - async move { write_task.await.map_err(From::from) }, - async move { - read_task - .await - .map_err(ZipError::from_inner) - .map_err(DownloadError::from) - }, - ) - .await?; - } + let parent = path + .parent() + .expect("all full entry paths should have parent paths"); + create_dir_all(parent)?; - Ok(()) -} + match entry.kind() { + EntryKind::Symlink => { + extracted_files.add_file(name); + cfg_if! { + if #[cfg(windows)] { + do_extract_file()?; + } else { + match fs::symlink_metadata(&path) { + Ok(metadata) if metadata.is_file() => fs::remove_file(&path)?, + _ => (), + } -async fn copy_file_to_mpsc( - mut entry_reader: R, - tx: mpsc::Sender, - buf: &mut BytesMut, -) -> Result<(), async_zip::error::ZipError> -where - R: AsyncRead + Unpin + Send + Sync, -{ - // Since BytesMut does not have a max cap, if AsyncReadExt::read_buf returns - // 0 then it means Eof. - while entry_reader.read_buf(buf).await? != 0 { - // Ensure AsyncReadExt::read_buf can read at least 4096B to avoid - // frequent expensive read syscalls. - // - // Performs this reserve before sending the buf over mpsc queue to - // increase the possibility of reusing the previous allocation. - // - // NOTE: `BytesMut` only reuses the previous allocation if it is the - // only one holds the reference to it, which is either on the first - // iteration or all the `Bytes` in the mpsc queue has been consumed, - // written to the file and dropped. - // - // Since reading from entry would have to wait for external file I/O, - // this would give the blocking thread some time to flush `Bytes` - // out. - // - // If all `Bytes` are flushed out, then we can reuse the allocation here. - buf.reserve(4096); + let mut src = String::new(); + entry.reader().read_to_string(&mut src)?; - if tx.send(buf.split().freeze()).await.is_err() { - // Same reason as extract_with_blocking_decoder - break; - } - } - - Ok(()) -} - -/// Ensure the file path is safe to use as a [`Path`]. -/// -/// - It can't contain NULL bytes -/// - It can't resolve to a path outside the current directory -/// > `foo/../bar` is fine, `foo/../../bar` is not. -/// - It can't be an absolute path -/// -/// It will then return a normalized path. -/// -/// This will read well-formed ZIP files correctly, and is resistant -/// to path-based exploits. -/// -/// This function is adapted from `zip::ZipFile::enclosed_name`. -fn check_filename_and_normalize(filename: &ZipString) -> Result<(PathBuf, bool), DownloadError> { - let filename = filename - .as_str() - .map(Cow::Borrowed) - .unwrap_or_else(|_| String::from_utf8_lossy(filename.as_bytes())); - - let bail = |filename: Cow<'_, str>| { - Err(DownloadError::from(ZipError( - ZipErrorInner::InvalidFilePath(filename.into_owned().into()), - ))) - }; - - if filename.contains('\0') { - return bail(filename); - } - - let mut path = PathBuf::new(); - - // The following loop is adapted from - // `normalize_path::NormalizePath::normalize`. - for component in Path::new(&*filename).components() { - match component { - Component::Prefix(_) | Component::RootDir => return bail(filename), - Component::CurDir => (), - Component::ParentDir => { - if !path.pop() { - // `PathBuf::pop` returns false if there is no parent. - // which means the path is invalid. - return bail(filename); + // validate pointing path before creating a symbolic link + if src.contains("..") { + continue; + } + std::os::unix::fs::symlink(src, &path)?; + } } } - Component::Normal(c) => path.push(c), + EntryKind::Directory => (), + EntryKind::File => { + extracted_files.add_file(name); + do_extract_file()?; + } } } - Ok((path, filename.ends_with('/'))) + Ok(extracted_files) }