integration_tests/seed.rs
1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::{env::set_env_vars, nextest_cli::CargoNextestCli};
5use camino::{Utf8Path, Utf8PathBuf};
6use color_eyre::eyre::Context;
7use fs_err as fs;
8use sha2::{Digest, Sha256};
9use std::{collections::BTreeMap, time::SystemTime};
10
11pub fn nextest_tests_dir() -> Utf8PathBuf {
12 Utf8Path::new(env!("CARGO_MANIFEST_DIR"))
13 .parent()
14 .unwrap()
15 .join("fixtures/nextest-tests")
16}
17
18// We use SHA-256 because other parts of nextest do the same -- this can easily
19// be changed to another hash function if needed.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
21pub struct Sha256Hash([u8; 32]);
22
23impl std::fmt::Display for Sha256Hash {
24 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25 hex::encode(self.0).fmt(f)
26 }
27}
28
29/// Computes the hash of a directory and its contents, in a way that hopefully
30/// represents what Cargo does somewhat.
31///
32/// With any cache, invalidation is an issue -- specifically, Cargo has its own
33/// notion of cache invalidation. Ideally, we could ask Cargo to give us a hash
34/// for a particular command that deterministically says "a rebuild will happen
35/// if and only if this hash changes". But that doesn't exist with stable Rust
36/// as of this writing (Rust 1.83), so we must guess at what Cargo does.
37///
38/// We take some basic precautions:
39///
40/// * preserving mtimes while copying the source directory
41/// * using both mtimes and hashes while computing the overall hash below.
42///
43/// Beyond that, it's possible for this implementation to have issues in three
44/// different ways:
45///
46/// ## 1. Cargo invalidates cache, but we don't
47///
48/// In this case, the cache becomes useless -- Cargo will rebuild the project
49/// anyway. This can cause flaky tests (see `__NEXTEST_ALT_TARGET_DIR` for a fix
50/// to a flake that was caught because of this divergence).
51///
52/// To be clear, any divergence merely due to the cached seed not being used is
53/// a bug. That was the case with the issue which `__NEXTEST_ALT_TARGET_DIR`
54/// works around.
55///
56/// ## 2. We invalidate our cache, but Cargo doesn't
57///
58/// In this case, we'll regenerate a new seed but Cargo will reuse it. This
59/// isn't too bad since generating the seed is a one-time cost.
60///
61/// ## 3. Something about the way nextest generates archives changes
62///
63/// This is the most difficult case to handle, because a brute hash (just hash
64/// all of the files in the nextest repo) would invalidate far too often. So if
65/// you're altering this code, you have to be careful to remove the cache as
66/// well. Hopefully CI (which doesn't cache the seed archive) will catch issues.
67///
68/// ---
69///
70/// In general, this implementation appears to be pretty reliable, though
71/// occasionally the cache has not worked (case 1 above) in Windows CI.
72pub fn compute_dir_hash(dir: impl AsRef<Utf8Path>) -> color_eyre::Result<Sha256Hash> {
73 let files = collect_all_files(dir.as_ref(), true)?;
74 let mut hasher = Sha256::new();
75
76 // Hash the path to `cargo` to ensure that the hash is different for
77 // different Rust versions.
78 hasher.update(b"nextest:cargo-path\0");
79 hasher.update(
80 std::env::var("CARGO")
81 .expect("this should be run under cargo")
82 .as_bytes(),
83 );
84 hasher.update([0, 0]);
85 for (file_name, metadata) in files {
86 hasher.update(file_name.as_str());
87 hasher.update([0]);
88 // Convert the system time to a number to hash.
89 let timestamp = metadata
90 .mtime
91 .duration_since(SystemTime::UNIX_EPOCH)
92 .expect("file's mtime after 1970-01-01");
93 hasher.update(timestamp.as_nanos().to_le_bytes());
94 hasher.update(metadata.hash.0);
95 hasher.update([0]);
96 }
97 Ok(Sha256Hash(hasher.finalize().into()))
98}
99
100// Hash and collect metadata about all the files in a directory.
101//
102// Using a `BTreeMap` ensures a deterministic order of files above.
103fn collect_all_files(
104 dir: &Utf8Path,
105 root: bool,
106) -> color_eyre::Result<BTreeMap<Utf8PathBuf, FileMetadata>> {
107 let mut stack = vec![dir.to_path_buf()];
108 let mut hashes = BTreeMap::new();
109
110 // TODO: parallelize this?
111 while let Some(dir) = stack.pop() {
112 for entry in dir.read_dir_utf8()? {
113 let entry =
114 entry.wrap_err_with(|| format!("failed to read entry from directory {dir}"))?;
115 let ty = entry
116 .file_type()
117 .wrap_err_with(|| format!("failed to get file type for entry {}", entry.path()))?;
118
119 // Ignore a pre-existing `target` directory at the root.
120 if root && entry.path().file_name() == Some("target") {
121 continue;
122 }
123
124 if ty.is_dir() {
125 stack.push(entry.into_path());
126 } else if ty.is_file() {
127 let metadata = entry.metadata().wrap_err_with(|| {
128 format!("failed to get metadata for file {}", entry.path())
129 })?;
130
131 // Also include the mtime, because Cargo uses the mtime to
132 // determine if a local file has changed. If there were a way to
133 // tell Cargo to ignore mtimes, we could remove this.
134 let mtime = metadata.modified().wrap_err_with(|| {
135 format!("failed to get modified time for file {}", entry.path())
136 })?;
137 let path = entry.into_path();
138 let contents = fs::read(&path)?;
139 let hash = Sha256Hash(Sha256::digest(&contents).into());
140 hashes.insert(path, FileMetadata { mtime, hash });
141 }
142 }
143 }
144
145 Ok(hashes)
146}
147
148#[derive(Clone, Debug)]
149struct FileMetadata {
150 mtime: SystemTime,
151 hash: Sha256Hash,
152}
153
154pub fn get_seed_archive_name(hash: Sha256Hash) -> Utf8PathBuf {
155 // Check in the std temp directory for the seed file.
156 let temp_dir = Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir is utf-8");
157 let username = whoami::username();
158 let user_dir = temp_dir.join(format!("nextest-tests-seed-{username}"));
159 user_dir.join(format!("seed-{hash}.tar.zst"))
160}
161
162pub fn make_seed_archive(workspace_dir: &Utf8Path, file_name: &Utf8Path) -> color_eyre::Result<()> {
163 // Make the directory containing the file name.
164 fs::create_dir_all(file_name.parent().unwrap())?;
165
166 // First, run a build in a temporary directory.
167 let temp_dir = camino_tempfile::Builder::new()
168 .prefix("nextest-seed-build-")
169 .tempdir()
170 .wrap_err("failed to create temporary directory")?;
171 let target_dir = temp_dir.path().join("target");
172 fs::create_dir_all(&target_dir)?;
173
174 // Now build a nextest archive, using the temporary directory as the target dir.
175 let mut cli = CargoNextestCli::for_script()?;
176
177 // Set the environment variables after getting the CLI -- this avoids
178 // rebuilds due to the variables changing.
179 //
180 // TODO: We shouldn't alter the global state of this process -- instead,
181 // set_env_vars should be part of nextest_cli.rs.
182 set_env_vars();
183
184 let output = cli
185 .args([
186 "--manifest-path",
187 workspace_dir.join("Cargo.toml").as_str(),
188 "archive",
189 "--archive-file",
190 file_name.as_str(),
191 "--workspace",
192 "--all-targets",
193 "--target-dir",
194 target_dir.as_str(),
195 // Use this profile to ensure that the entire target dir is included.
196 "--profile",
197 "archive-all",
198 ])
199 .output();
200
201 if std::env::var("INTEGRATION_TESTS_DEBUG") == Ok("1".to_string()) {
202 eprintln!("make_seed_archive output: {output}");
203 }
204
205 Ok(())
206}