feat: Verify cksum of crate tarball from cargo registry (#1260)

Fixed #1183

Since the crate tarball could be downloaded from a different set of
servers than where the cargo registry is hosted, verifying the checksum
is necessary to verify its integrity.

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
This commit is contained in:
Jiahao XU 2023-08-09 20:57:47 +10:00 committed by GitHub
parent 1c886d8897
commit 3e80b12748
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 112 additions and 45 deletions

20
Cargo.lock generated
View file

@ -203,6 +203,12 @@ dependencies = [
"backtrace", "backtrace",
] ]
[[package]]
name = "base16"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d27c3610c36aee21ce8ac510e6224498de4228ad772a171ed65643a24693a5a8"
[[package]] [[package]]
name = "base64" name = "base64"
version = "0.13.1" version = "0.13.1"
@ -229,6 +235,7 @@ name = "binstalk"
version = "0.14.1" version = "0.14.1"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"base16",
"binstalk-downloader", "binstalk-downloader",
"binstalk-types", "binstalk-types",
"cargo_toml", "cargo_toml",
@ -251,6 +258,7 @@ dependencies = [
"semver", "semver",
"serde", "serde",
"serde_json", "serde_json",
"sha2",
"strum", "strum",
"target-lexicon", "target-lexicon",
"tempfile", "tempfile",
@ -280,6 +288,7 @@ dependencies = [
"generic-array", "generic-array",
"httpdate", "httpdate",
"percent-encoding", "percent-encoding",
"quinn 0.10.2",
"reqwest", "reqwest",
"serde", "serde",
"serde-tuple-vec-map", "serde-tuple-vec-map",
@ -3258,6 +3267,17 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012"
[[package]]
name = "sha2"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]] [[package]]
name = "sharded-slab" name = "sharded-slab"
version = "0.1.4" version = "0.1.4"

View file

@ -1,5 +1,4 @@
use std::{ use std::{
borrow::Cow,
env, fs, env, fs,
future::Future, future::Future,
path::{Path, PathBuf}, path::{Path, PathBuf},

View file

@ -1,5 +1,7 @@
#![cfg_attr(docsrs, feature(doc_auto_cfg))] #![cfg_attr(docsrs, feature(doc_auto_cfg))]
pub use bytes;
pub mod download; pub mod download;
/// Github API client. /// Github API client.

View file

@ -11,6 +11,7 @@ license = "GPL-3.0-only"
[dependencies] [dependencies]
async-trait = "0.1.68" async-trait = "0.1.68"
base16 = "0.2.1"
binstalk-downloader = { version = "0.6.1", path = "../binstalk-downloader", default-features = false, features = ["gh-api-client"] } binstalk-downloader = { version = "0.6.1", path = "../binstalk-downloader", default-features = false, features = ["gh-api-client"] }
binstalk-types = { version = "0.5.0", path = "../binstalk-types" } binstalk-types = { version = "0.5.0", path = "../binstalk-types" }
cargo_toml = "0.15.3" cargo_toml = "0.15.3"
@ -33,6 +34,7 @@ reflink-copy = "0.1.5"
semver = { version = "1.0.17", features = ["serde"] } semver = { version = "1.0.17", features = ["serde"] }
serde = { version = "1.0.163", features = ["derive"] } serde = { version = "1.0.163", features = ["derive"] }
serde_json = "1.0.99" serde_json = "1.0.99"
sha2 = "0.10.7"
strum = "0.25.0" strum = "0.25.0"
target-lexicon = { version = "0.12.11", features = ["std"] } target-lexicon = { version = "0.12.11", features = ["std"] }
tempfile = "3.5.0" tempfile = "3.5.0"

View file

@ -1,5 +1,6 @@
use std::{str::FromStr, sync::Arc}; use std::{str::FromStr, sync::Arc};
use base16::DecodeError as Base16DecodeError;
use cargo_toml::Manifest; use cargo_toml::Manifest;
use compact_str::CompactString; use compact_str::CompactString;
use leon::{ParseError, RenderError}; use leon::{ParseError, RenderError};
@ -56,6 +57,12 @@ pub enum RegistryError {
#[error("Failed to render dl config: {0}")] #[error("Failed to render dl config: {0}")]
RenderDlConfig(#[from] RenderError), RenderDlConfig(#[from] RenderError),
#[error("Failed to parse checksum encoded in hex: {0}")]
InvalidHex(#[from] Base16DecodeError),
#[error("Expected checksum `{expected}`, actual checksum `{actual}`")]
UnmatchedChecksum { expected: String, actual: String },
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]

View file

@ -1,18 +1,21 @@
use std::{borrow::Cow, path::PathBuf}; use std::borrow::Cow;
use base16::{decode as decode_base16, encode_lower as encode_base16};
use cargo_toml::Manifest; use cargo_toml::Manifest;
use compact_str::{format_compact, CompactString, ToCompactString}; use compact_str::{format_compact, CompactString, ToCompactString};
use leon::{Template, Values}; use leon::{Template, Values};
use semver::{Version, VersionReq}; use semver::{Version, VersionReq};
use serde::Deserialize; use serde::Deserialize;
use serde_json::Error as JsonError; use serde_json::Error as JsonError;
use sha2::{Digest, Sha256};
use tracing::debug; use tracing::debug;
use crate::{ use crate::{
drivers::registry::{visitor::ManifestVisitor, RegistryError}, drivers::registry::{visitor::ManifestVisitor, RegistryError},
errors::BinstallError, errors::BinstallError,
helpers::{ helpers::{
download::Download, bytes::Bytes,
download::{DataVerifier, Download},
remote::{Client, Url}, remote::{Client, Url},
}, },
manifests::cargo_toml_binstall::{Meta, TarBasedFmt}, manifests::cargo_toml_binstall::{Meta, TarBasedFmt},
@ -23,23 +26,48 @@ pub(super) struct RegistryConfig {
pub(super) dl: CompactString, pub(super) dl: CompactString,
} }
struct Sha256Digest(Sha256);
impl Default for Sha256Digest {
fn default() -> Self {
Sha256Digest(Sha256::new())
}
}
impl DataVerifier for Sha256Digest {
fn update(&mut self, data: &Bytes) {
self.0.update(data);
}
}
pub(super) async fn parse_manifest( pub(super) async fn parse_manifest(
client: Client, client: Client,
crate_name: &str, crate_name: &str,
version: &str,
crate_url: Url, crate_url: Url,
MatchedVersion { version, cksum }: MatchedVersion,
) -> Result<Manifest<Meta>, BinstallError> { ) -> Result<Manifest<Meta>, BinstallError> {
debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it");
let manifest_dir_path: PathBuf = format!("{crate_name}-{version}").into(); let mut manifest_visitor = ManifestVisitor::new(format!("{crate_name}-{version}").into());
let mut manifest_visitor = ManifestVisitor::new(manifest_dir_path); let checksum = decode_base16(cksum.as_bytes()).map_err(RegistryError::from)?;
let mut sha256_digest = Sha256Digest::default();
Download::new(client, crate_url) Download::new_with_data_verifier(client, crate_url, &mut sha256_digest)
.and_visit_tar(TarBasedFmt::Tgz, &mut manifest_visitor) .and_visit_tar(TarBasedFmt::Tgz, &mut manifest_visitor)
.await?; .await?;
manifest_visitor.load_manifest() let digest_checksum = sha256_digest.0.finalize();
if digest_checksum.as_slice() != checksum.as_slice() {
Err(RegistryError::UnmatchedChecksum {
expected: cksum,
actual: encode_base16(digest_checksum.as_slice()),
}
.into())
} else {
manifest_visitor.load_manifest()
}
} }
/// Return components of crate prefix /// Return components of crate prefix
@ -68,8 +96,7 @@ pub(super) fn render_dl_template(
dl_template: &str, dl_template: &str,
crate_name: &str, crate_name: &str,
(c1, c2): &(CompactString, Option<CompactString>), (c1, c2): &(CompactString, Option<CompactString>),
version: &str, MatchedVersion { version, cksum }: &MatchedVersion,
cksum: &str,
) -> Result<String, RegistryError> { ) -> Result<String, RegistryError> {
let template = Template::parse(dl_template)?; let template = Template::parse(dl_template)?;
if template.keys().next().is_some() { if template.keys().next().is_some() {
@ -114,12 +141,13 @@ pub(super) fn render_dl_template(
pub(super) struct RegistryIndexEntry { pub(super) struct RegistryIndexEntry {
vers: CompactString, vers: CompactString,
yanked: bool, yanked: bool,
cksum: CompactString, cksum: String,
} }
pub(super) struct MatchedVersion { pub(super) struct MatchedVersion {
pub(super) version: CompactString, pub(super) version: CompactString,
pub(super) cksum: CompactString, /// sha256 checksum encoded in base16
pub(super) cksum: String,
} }
impl MatchedVersion { impl MatchedVersion {

View file

@ -10,7 +10,7 @@ use tokio::{
use tracing::debug; use tracing::debug;
use crate::{ use crate::{
drivers::registry::{parse_manifest, RegistryError}, drivers::registry::{parse_manifest, MatchedVersion, RegistryError},
errors::BinstallError, errors::BinstallError,
helpers::remote::{Client, Url}, helpers::remote::{Client, Url},
manifests::cargo_toml_binstall::Meta, manifests::cargo_toml_binstall::Meta,
@ -43,7 +43,9 @@ impl CratesIoRateLimit {
self.0.lock().await.tick().await; self.0.lock().await.tick().await;
} }
} }
async fn is_crate_yanked(client: &Client, url: Url) -> Result<bool, RemoteError> {
/// Return `Some(checksum)` if the version is not yanked, otherwise `None`.
async fn is_crate_yanked(client: &Client, url: Url) -> Result<Option<String>, RemoteError> {
#[derive(Deserialize)] #[derive(Deserialize)]
struct CrateInfo { struct CrateInfo {
version: Inner, version: Inner,
@ -52,25 +54,29 @@ async fn is_crate_yanked(client: &Client, url: Url) -> Result<bool, RemoteError>
#[derive(Deserialize)] #[derive(Deserialize)]
struct Inner { struct Inner {
yanked: bool, yanked: bool,
checksum: String,
} }
// Fetch / update index // Fetch / update index
debug!("Looking up crate information"); debug!("Looking up crate information");
let info: CrateInfo = client.get(url).send(true).await?.json().await?; let info: CrateInfo = client.get(url).send(true).await?.json().await?;
let version = info.version;
Ok(info.version.yanked) Ok((!version.yanked).then_some(version.checksum))
} }
async fn fetch_crate_cratesio_version_matched( async fn fetch_crate_cratesio_version_matched(
client: &Client, client: &Client,
url: Url, url: Url,
version_req: &VersionReq, version_req: &VersionReq,
) -> Result<Option<CompactString>, RemoteError> { ) -> Result<Option<(CompactString, String)>, RemoteError> {
#[derive(Deserialize)] #[derive(Deserialize)]
struct CrateInfo { struct CrateInfo {
#[serde(rename = "crate")] #[serde(rename = "crate")]
inner: CrateInfoInner, inner: CrateInfoInner,
versions: Vec<Version>,
} }
#[derive(Deserialize)] #[derive(Deserialize)]
@ -78,28 +84,27 @@ async fn fetch_crate_cratesio_version_matched(
max_stable_version: CompactString, max_stable_version: CompactString,
} }
#[derive(Deserialize)]
struct Versions {
versions: Vec<Version>,
}
#[derive(Deserialize)] #[derive(Deserialize)]
struct Version { struct Version {
num: CompactString, num: CompactString,
yanked: bool, yanked: bool,
checksum: String,
} }
// Fetch / update index // Fetch / update index
debug!("Looking up crate information"); debug!("Looking up crate information");
let response = client.get(url).send(true).await?; let crate_info: CrateInfo = client.get(url).send(true).await?.json().await?;
let version = if version_req == &VersionReq::STAR { let version_with_checksum = if version_req == &VersionReq::STAR {
let crate_info: CrateInfo = response.json().await?; let version = crate_info.inner.max_stable_version;
Some(crate_info.inner.max_stable_version) crate_info
.versions
.into_iter()
.find_map(|v| (v.num.as_str() == version.as_str()).then_some(v.checksum))
.map(|checksum| (version, checksum))
} else { } else {
let response: Versions = response.json().await?; crate_info
response
.versions .versions
.into_iter() .into_iter()
.filter_map(|item| { .filter_map(|item| {
@ -115,17 +120,23 @@ async fn fetch_crate_cratesio_version_matched(
let ver = semver::Version::parse(&num).ok()?; let ver = semver::Version::parse(&num).ok()?;
// Filter by version match // Filter by version match
version_req.matches(&ver).then_some((num, ver)) version_req
.matches(&ver)
.then_some((num, ver, item.checksum))
} else { } else {
None None
} }
}) })
// Return highest version // Return highest version
.max_by(|(_ver_str_x, ver_x), (_ver_str_y, ver_y)| ver_x.cmp(ver_y)) .max_by(
.map(|(ver_str, _)| ver_str) |(_ver_str_x, ver_x, _checksum_x), (_ver_str_y, ver_y, _checksum_y)| {
ver_x.cmp(ver_y)
},
)
.map(|(ver_str, _, checksum)| (ver_str, checksum))
}; };
Ok(version) Ok(version_with_checksum)
} }
/// Find the crate by name, get its latest stable version matches `version_req`, /// Find the crate by name, get its latest stable version matches `version_req`,
@ -141,7 +152,7 @@ pub async fn fetch_crate_cratesio(
let url = Url::parse(&format!("https://crates.io/api/v1/crates/{name}"))?; let url = Url::parse(&format!("https://crates.io/api/v1/crates/{name}"))?;
let version = match version_req.comparators.as_slice() { let (version, cksum) = match version_req.comparators.as_slice() {
[Comparator { [Comparator {
op: ComparatorOp::Exact, op: ComparatorOp::Exact,
major, major,
@ -163,7 +174,7 @@ pub async fn fetch_crate_cratesio(
is_crate_yanked(&client, url) is_crate_yanked(&client, url)
.await .await
.map(|yanked| (!yanked).then_some(version)) .map(|ret| ret.map(|checksum| (version, checksum)))
} }
_ => fetch_crate_cratesio_version_matched(&client, url.clone(), version_req).await, _ => fetch_crate_cratesio_version_matched(&client, url.clone(), version_req).await,
} }
@ -185,5 +196,5 @@ pub async fn fetch_crate_cratesio(
.push(&version) .push(&version)
.push("download"); .push("download");
parse_manifest(client, name, &version, crate_url).await parse_manifest(client, name, crate_url, MatchedVersion { version, cksum }).await
} }

View file

@ -108,7 +108,7 @@ impl GitRegistry {
let version_req = version_req.clone(); let version_req = version_req.clone();
let this = self.clone(); let this = self.clone();
let (version, dl_url) = spawn_blocking(move || { let (matched_version, dl_url) = spawn_blocking(move || {
let GitIndex { let GitIndex {
_tempdir: _, _tempdir: _,
repo, repo,
@ -118,21 +118,20 @@ impl GitRegistry {
.git_index .git_index
.get_or_try_init(|| GitIndex::new(this.0.url.clone()))?; .get_or_try_init(|| GitIndex::new(this.0.url.clone()))?;
let MatchedVersion { version, cksum } = let matched_version =
Self::find_crate_matched_ver(repo, &crate_name, &crate_prefix, &version_req)?; Self::find_crate_matched_ver(repo, &crate_name, &crate_prefix, &version_req)?;
let url = Url::parse(&render_dl_template( let url = Url::parse(&render_dl_template(
dl_template, dl_template,
&crate_name, &crate_name,
&crate_prefix, &crate_prefix,
&version, &matched_version,
&cksum,
)?)?; )?)?;
Ok::<_, BinstallError>((version, url)) Ok::<_, BinstallError>((matched_version, url))
}) })
.await??; .await??;
parse_manifest(client, name, &version, dl_url).await parse_manifest(client, name, dl_url, matched_version).await
} }
} }

View file

@ -88,7 +88,7 @@ impl SparseRegistry {
) -> Result<Manifest<Meta>, BinstallError> { ) -> Result<Manifest<Meta>, BinstallError> {
let crate_prefix = crate_prefix_components(crate_name)?; let crate_prefix = crate_prefix_components(crate_name)?;
let dl_template = self.get_dl_template(&client).await?; let dl_template = self.get_dl_template(&client).await?;
let MatchedVersion { version, cksum } = Self::find_crate_matched_ver( let matched_version = Self::find_crate_matched_ver(
&client, &client,
self.url.clone(), self.url.clone(),
crate_name, crate_name,
@ -100,10 +100,9 @@ impl SparseRegistry {
dl_template, dl_template,
crate_name, crate_name,
&crate_prefix, &crate_prefix,
&version, &matched_version,
&cksum,
)?)?; )?)?;
parse_manifest(client, crate_name, &version, dl_url).await parse_manifest(client, crate_name, dl_url, matched_version).await
} }
} }

View file

@ -8,8 +8,8 @@ pub mod signal;
pub(crate) mod target_triple; pub(crate) mod target_triple;
pub mod tasks; pub mod tasks;
pub(crate) use binstalk_downloader::download;
pub use binstalk_downloader::gh_api_client; pub use binstalk_downloader::gh_api_client;
pub(crate) use binstalk_downloader::{bytes, download};
pub(crate) fn is_universal_macos(target: &str) -> bool { pub(crate) fn is_universal_macos(target: &str) -> bool {
["universal-apple-darwin", "universal2-apple-darwin"].contains(&target) ["universal-apple-darwin", "universal2-apple-darwin"].contains(&target)