integration_tests/
seed.rs

1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::{env::set_env_vars, nextest_cli::CargoNextestCli};
5use camino::{Utf8Path, Utf8PathBuf};
6use color_eyre::eyre::Context;
7use fs_err as fs;
8use sha2::{Digest, Sha256};
9use std::{collections::BTreeMap, time::SystemTime};
10
11pub fn nextest_tests_dir() -> Utf8PathBuf {
12    Utf8Path::new(env!("CARGO_MANIFEST_DIR"))
13        .parent()
14        .unwrap()
15        .join("fixtures/nextest-tests")
16}
17
18// We use SHA-256 because other parts of nextest do the same -- this can easily
19// be changed to another hash function if needed.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
21pub struct Sha256Hash([u8; 32]);
22
23impl std::fmt::Display for Sha256Hash {
24    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25        hex::encode(self.0).fmt(f)
26    }
27}
28
29/// Computes the hash of a directory and its contents, in a way that hopefully
30/// represents what Cargo does somewhat.
31///
32/// With any cache, invalidation is an issue -- specifically, Cargo has its own
33/// notion of cache invalidation. Ideally, we could ask Cargo to give us a hash
34/// for a particular command that deterministically says "a rebuild will happen
35/// if and only if this hash changes". But that doesn't exist with stable Rust
36/// as of this writing (Rust 1.83), so we must guess at what Cargo does.
37///
38/// We take some basic precautions:
39///
40/// * preserving mtimes while copying the source directory
41/// * using both mtimes and hashes while computing the overall hash below.
42///
43/// Beyond that, it's possible for this implementation to have issues in three
44/// different ways:
45///
46/// ## 1. Cargo invalidates cache, but we don't
47///
48/// In this case, the cache becomes useless -- Cargo will rebuild the project
49/// anyway. This can cause flaky tests (see `__NEXTEST_ALT_TARGET_DIR` for a fix
50/// to a flake that was caught because of this divergence).
51///
52/// To be clear, any divergence merely due to the cached seed not being used is
53/// a bug. That was the case with the issue which `__NEXTEST_ALT_TARGET_DIR`
54/// works around.
55///
56/// ## 2. We invalidate our cache, but Cargo doesn't
57///
58/// In this case, we'll regenerate a new seed but Cargo will reuse it. This
59/// isn't too bad since generating the seed is a one-time cost.
60///
61/// ## 3. Something about the way nextest generates archives changes
62///
63/// This is the most difficult case to handle, because a brute hash (just hash
64/// all of the files in the nextest repo) would invalidate far too often. So if
65/// you're altering this code, you have to be careful to remove the cache as
66/// well. Hopefully CI (which doesn't cache the seed archive) will catch issues.
67///
68/// ---
69///
70/// In general, this implementation appears to be pretty reliable, though
71/// occasionally the cache has not worked (case 1 above) in Windows CI.
72pub fn compute_dir_hash(dir: impl AsRef<Utf8Path>) -> color_eyre::Result<Sha256Hash> {
73    let files = collect_all_files(dir.as_ref(), true)?;
74    let mut hasher = Sha256::new();
75
76    // Hash the path to `cargo` to ensure that the hash is different for
77    // different Rust versions.
78    hasher.update(b"nextest:cargo-path\0");
79    hasher.update(
80        std::env::var("CARGO")
81            .expect("this should be run under cargo")
82            .as_bytes(),
83    );
84    hasher.update([0, 0]);
85    for (file_name, metadata) in files {
86        hasher.update(file_name.as_str());
87        hasher.update([0]);
88        // Convert the system time to a number to hash.
89        let timestamp = metadata
90            .mtime
91            .duration_since(SystemTime::UNIX_EPOCH)
92            .expect("file's mtime after 1970-01-01");
93        hasher.update(timestamp.as_nanos().to_le_bytes());
94        hasher.update(metadata.hash.0);
95        hasher.update([0]);
96    }
97    Ok(Sha256Hash(hasher.finalize().into()))
98}
99
100// Hash and collect metadata about all the files in a directory.
101//
102// Using a `BTreeMap` ensures a deterministic order of files above.
103fn collect_all_files(
104    dir: &Utf8Path,
105    root: bool,
106) -> color_eyre::Result<BTreeMap<Utf8PathBuf, FileMetadata>> {
107    let mut stack = vec![dir.to_path_buf()];
108    let mut hashes = BTreeMap::new();
109
110    // TODO: parallelize this?
111    while let Some(dir) = stack.pop() {
112        for entry in dir.read_dir_utf8()? {
113            let entry =
114                entry.wrap_err_with(|| format!("failed to read entry from directory {dir}"))?;
115            let ty = entry
116                .file_type()
117                .wrap_err_with(|| format!("failed to get file type for entry {}", entry.path()))?;
118
119            // Ignore a pre-existing `target` directory at the root.
120            if root && entry.path().file_name() == Some("target") {
121                continue;
122            }
123
124            if ty.is_dir() {
125                stack.push(entry.into_path());
126            } else if ty.is_file() {
127                let metadata = entry.metadata().wrap_err_with(|| {
128                    format!("failed to get metadata for file {}", entry.path())
129                })?;
130
131                // Also include the mtime, because Cargo uses the mtime to
132                // determine if a local file has changed. If there were a way to
133                // tell Cargo to ignore mtimes, we could remove this.
134                let mtime = metadata.modified().wrap_err_with(|| {
135                    format!("failed to get modified time for file {}", entry.path())
136                })?;
137                let path = entry.into_path();
138                let contents = fs::read(&path)?;
139                let hash = Sha256Hash(Sha256::digest(&contents).into());
140                hashes.insert(path, FileMetadata { mtime, hash });
141            }
142        }
143    }
144
145    Ok(hashes)
146}
147
148#[derive(Clone, Debug)]
149struct FileMetadata {
150    mtime: SystemTime,
151    hash: Sha256Hash,
152}
153
154pub fn get_seed_archive_name(hash: Sha256Hash) -> Utf8PathBuf {
155    // Check in the std temp directory for the seed file.
156    let temp_dir = Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir is utf-8");
157    let username = whoami::username();
158    let user_dir = temp_dir.join(format!("nextest-tests-seed-{username}"));
159    user_dir.join(format!("seed-{hash}.tar.zst"))
160}
161
162pub fn make_seed_archive(workspace_dir: &Utf8Path, file_name: &Utf8Path) -> color_eyre::Result<()> {
163    // Make the directory containing the file name.
164    fs::create_dir_all(file_name.parent().unwrap())?;
165
166    // First, run a build in a temporary directory.
167    let temp_dir = camino_tempfile::Builder::new()
168        .prefix("nextest-seed-build-")
169        .tempdir()
170        .wrap_err("failed to create temporary directory")?;
171    let target_dir = temp_dir.path().join("target");
172    fs::create_dir_all(&target_dir)?;
173
174    // Now build a nextest archive, using the temporary directory as the target dir.
175    let mut cli = CargoNextestCli::for_script()?;
176
177    // Set the environment variables after getting the CLI -- this avoids
178    // rebuilds due to the variables changing.
179    //
180    // TODO: We shouldn't alter the global state of this process -- instead,
181    // set_env_vars should be part of nextest_cli.rs.
182    set_env_vars();
183
184    let output = cli
185        .args([
186            "--manifest-path",
187            workspace_dir.join("Cargo.toml").as_str(),
188            "archive",
189            "--archive-file",
190            file_name.as_str(),
191            "--workspace",
192            "--all-targets",
193            "--target-dir",
194            target_dir.as_str(),
195            // Use this profile to ensure that the entire target dir is included.
196            "--profile",
197            "archive-all",
198        ])
199        .output();
200
201    if std::env::var("INTEGRATION_TESTS_DEBUG") == Ok("1".to_string()) {
202        eprintln!("make_seed_archive output: {output}");
203    }
204
205    Ok(())
206}