integration_tests/
seed.rs

1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::{env::set_env_vars_for_script, nextest_cli::CargoNextestCli};
5use camino::{Utf8Path, Utf8PathBuf};
6use color_eyre::eyre::Context;
7use fs_err as fs;
8use sha2::{Digest, Sha256};
9use std::{collections::BTreeMap, time::SystemTime};
10
11pub fn fixture_project_dir(workspace_root: &Utf8Path) -> Utf8PathBuf {
12    workspace_root.join("fixtures/fixture-project")
13}
14
15// We use SHA-256 because other parts of nextest do the same -- this can easily
16// be changed to another hash function if needed.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
18pub struct Sha256Hash([u8; 32]);
19
20impl std::fmt::Display for Sha256Hash {
21    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22        hex::encode(self.0).fmt(f)
23    }
24}
25
26/// Computes the hash of a directory and its contents, in a way that hopefully
27/// represents what Cargo does somewhat.
28///
29/// With any cache, invalidation is an issue -- specifically, Cargo has its own
30/// notion of cache invalidation. Ideally, we could ask Cargo to give us a hash
31/// for a particular command that deterministically says "a rebuild will happen
32/// if and only if this hash changes". But that doesn't exist with stable Rust
33/// as of this writing (Rust 1.83), so we must guess at what Cargo does.
34///
35/// We take some basic precautions:
36///
37/// * preserving mtimes while copying the source directory
38/// * using both mtimes and hashes while computing the overall hash below.
39///
40/// Beyond that, it's possible for this implementation to have issues in three
41/// different ways:
42///
43/// ## 1. Cargo invalidates cache, but we don't
44///
45/// In this case, the cache becomes useless -- Cargo will rebuild the project
46/// anyway. This can cause flaky tests (see `__NEXTEST_ALT_TARGET_DIR` for a fix
47/// to a flake that was caught because of this divergence).
48///
49/// To be clear, any divergence merely due to the cached seed not being used is
50/// a bug. That was the case with the issue which `__NEXTEST_ALT_TARGET_DIR`
51/// works around.
52///
53/// ## 2. We invalidate our cache, but Cargo doesn't
54///
55/// In this case, we'll regenerate a new seed but Cargo will reuse it. This
56/// isn't too bad since generating the seed is a one-time cost.
57///
58/// ## 3. Something about the way nextest generates archives changes
59///
60/// This is the most difficult case to handle, because a brute hash (just hash
61/// all of the files in the nextest repo) would invalidate far too often. So if
62/// you're altering this code, you have to be careful to remove the cache as
63/// well. Hopefully CI (which doesn't cache the seed archive) will catch issues.
64///
65/// ---
66///
67/// In general, this implementation appears to be pretty reliable, though
68/// occasionally the cache has not worked (case 1 above) in Windows CI.
69pub fn compute_dir_hash(dir: impl AsRef<Utf8Path>) -> color_eyre::Result<Sha256Hash> {
70    let files = collect_all_files(dir.as_ref(), true)?;
71    let mut hasher = Sha256::new();
72
73    // Hash the path to `cargo` to ensure that the hash is different for
74    // different Rust versions.
75    hasher.update(b"nextest:cargo-path\0");
76    hasher.update(
77        std::env::var("CARGO")
78            .expect("this should be run under cargo")
79            .as_bytes(),
80    );
81    hasher.update([0, 0]);
82    for (file_name, metadata) in files {
83        hasher.update(file_name.as_str());
84        hasher.update([0]);
85        // Convert the system time to a number to hash.
86        let timestamp = metadata
87            .mtime
88            .duration_since(SystemTime::UNIX_EPOCH)
89            .expect("file's mtime after 1970-01-01");
90        hasher.update(timestamp.as_nanos().to_le_bytes());
91        hasher.update(metadata.hash.0);
92        hasher.update([0]);
93    }
94    Ok(Sha256Hash(hasher.finalize().into()))
95}
96
97// Hash and collect metadata about all the files in a directory.
98//
99// Using a `BTreeMap` ensures a deterministic order of files above.
100fn collect_all_files(
101    dir: &Utf8Path,
102    root: bool,
103) -> color_eyre::Result<BTreeMap<Utf8PathBuf, FileMetadata>> {
104    let mut stack = vec![dir.to_path_buf()];
105    let mut hashes = BTreeMap::new();
106
107    // TODO: parallelize this?
108    while let Some(dir) = stack.pop() {
109        for entry in dir.read_dir_utf8()? {
110            let entry =
111                entry.wrap_err_with(|| format!("failed to read entry from directory {dir}"))?;
112            let ty = entry
113                .file_type()
114                .wrap_err_with(|| format!("failed to get file type for entry {}", entry.path()))?;
115
116            // Ignore a pre-existing `target` directory at the root.
117            if root && entry.path().file_name() == Some("target") {
118                continue;
119            }
120
121            if ty.is_dir() {
122                stack.push(entry.into_path());
123            } else if ty.is_file() {
124                let metadata = entry.metadata().wrap_err_with(|| {
125                    format!("failed to get metadata for file {}", entry.path())
126                })?;
127
128                // Also include the mtime, because Cargo uses the mtime to
129                // determine if a local file has changed. If there were a way to
130                // tell Cargo to ignore mtimes, we could remove this.
131                let mtime = metadata.modified().wrap_err_with(|| {
132                    format!("failed to get modified time for file {}", entry.path())
133                })?;
134                let path = entry.into_path();
135                let contents = fs::read(&path)?;
136                let hash = Sha256Hash(Sha256::digest(&contents).into());
137                hashes.insert(path, FileMetadata { mtime, hash });
138            }
139        }
140    }
141
142    Ok(hashes)
143}
144
145#[derive(Clone, Debug)]
146struct FileMetadata {
147    mtime: SystemTime,
148    hash: Sha256Hash,
149}
150
151pub fn get_seed_archive_name(hash: Sha256Hash) -> Utf8PathBuf {
152    // Check in the std temp directory for the seed file.
153    let temp_dir = Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir is utf-8");
154    let username = whoami::username().expect("obtained username");
155    let user_dir = temp_dir.join(format!("fixture-project-seed-{username}"));
156    user_dir.join(format!("seed-{hash}.tar.zst"))
157}
158
159pub fn make_seed_archive(workspace_dir: &Utf8Path, file_name: &Utf8Path) -> color_eyre::Result<()> {
160    // Make the directory containing the file name.
161    fs::create_dir_all(file_name.parent().unwrap())?;
162
163    // First, run a build in a temporary directory.
164    let temp_dir = camino_tempfile::Builder::new()
165        .prefix("nextest-seed-build-")
166        .tempdir()
167        .wrap_err("failed to create temporary directory")?;
168    let target_dir = temp_dir.path().join("target");
169    fs::create_dir_all(&target_dir)?;
170
171    // Now build a nextest archive, using the temporary directory as the target dir.
172    let mut cli = CargoNextestCli::for_script()?;
173
174    // Set the environment variables after getting the CLI -- this avoids
175    // rebuilds due to the variables changing.
176    //
177    // TODO: We shouldn't alter the global state of this process -- instead,
178    // set_env_vars_for_script should be part of nextest_cli.rs.
179    set_env_vars_for_script();
180
181    let output = cli
182        .args([
183            "--manifest-path",
184            workspace_dir.join("Cargo.toml").as_str(),
185            "archive",
186            "--archive-file",
187            file_name.as_str(),
188            "--workspace",
189            "--all-targets",
190            "--target-dir",
191            target_dir.as_str(),
192            // Use this profile to ensure that the entire target dir is included.
193            "--profile",
194            "archive-all",
195        ])
196        .output();
197
198    if std::env::var("INTEGRATION_TESTS_DEBUG").as_deref() == Ok("1") {
199        eprintln!("make_seed_archive output: {output}");
200    }
201
202    Ok(())
203}