integration_tests/seed.rs
1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::{env::set_env_vars_for_script, nextest_cli::CargoNextestCli};
5use camino::{Utf8Path, Utf8PathBuf};
6use color_eyre::eyre::Context;
7use fs_err as fs;
8use sha2::{Digest, Sha256};
9use std::{collections::BTreeMap, time::SystemTime};
10
11pub fn fixture_project_dir(workspace_root: &Utf8Path) -> Utf8PathBuf {
12 workspace_root.join("fixtures/fixture-project")
13}
14
15// We use SHA-256 because other parts of nextest do the same -- this can easily
16// be changed to another hash function if needed.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
18pub struct Sha256Hash([u8; 32]);
19
20impl std::fmt::Display for Sha256Hash {
21 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22 hex::encode(self.0).fmt(f)
23 }
24}
25
26/// Computes the hash of a directory and its contents, in a way that hopefully
27/// represents what Cargo does somewhat.
28///
29/// With any cache, invalidation is an issue -- specifically, Cargo has its own
30/// notion of cache invalidation. Ideally, we could ask Cargo to give us a hash
31/// for a particular command that deterministically says "a rebuild will happen
32/// if and only if this hash changes". But that doesn't exist with stable Rust
33/// as of this writing (Rust 1.83), so we must guess at what Cargo does.
34///
35/// We take some basic precautions:
36///
37/// * preserving mtimes while copying the source directory
38/// * using both mtimes and hashes while computing the overall hash below.
39///
40/// Beyond that, it's possible for this implementation to have issues in three
41/// different ways:
42///
43/// ## 1. Cargo invalidates cache, but we don't
44///
45/// In this case, the cache becomes useless -- Cargo will rebuild the project
46/// anyway. This can cause flaky tests (see `__NEXTEST_ALT_TARGET_DIR` for a fix
47/// to a flake that was caught because of this divergence).
48///
49/// To be clear, any divergence merely due to the cached seed not being used is
50/// a bug. That was the case with the issue which `__NEXTEST_ALT_TARGET_DIR`
51/// works around.
52///
53/// ## 2. We invalidate our cache, but Cargo doesn't
54///
55/// In this case, we'll regenerate a new seed but Cargo will reuse it. This
56/// isn't too bad since generating the seed is a one-time cost.
57///
58/// ## 3. Something about the way nextest generates archives changes
59///
60/// This is the most difficult case to handle, because a brute hash (just hash
61/// all of the files in the nextest repo) would invalidate far too often. So if
62/// you're altering this code, you have to be careful to remove the cache as
63/// well. Hopefully CI (which doesn't cache the seed archive) will catch issues.
64///
65/// ---
66///
67/// In general, this implementation appears to be pretty reliable, though
68/// occasionally the cache has not worked (case 1 above) in Windows CI.
69pub fn compute_dir_hash(dir: impl AsRef<Utf8Path>) -> color_eyre::Result<Sha256Hash> {
70 let files = collect_all_files(dir.as_ref(), true)?;
71 let mut hasher = Sha256::new();
72
73 // Hash the path to `cargo` to ensure that the hash is different for
74 // different Rust versions.
75 hasher.update(b"nextest:cargo-path\0");
76 hasher.update(
77 std::env::var("CARGO")
78 .expect("this should be run under cargo")
79 .as_bytes(),
80 );
81 hasher.update([0, 0]);
82 for (file_name, metadata) in files {
83 hasher.update(file_name.as_str());
84 hasher.update([0]);
85 // Convert the system time to a number to hash.
86 let timestamp = metadata
87 .mtime
88 .duration_since(SystemTime::UNIX_EPOCH)
89 .expect("file's mtime after 1970-01-01");
90 hasher.update(timestamp.as_nanos().to_le_bytes());
91 hasher.update(metadata.hash.0);
92 hasher.update([0]);
93 }
94 Ok(Sha256Hash(hasher.finalize().into()))
95}
96
97// Hash and collect metadata about all the files in a directory.
98//
99// Using a `BTreeMap` ensures a deterministic order of files above.
100fn collect_all_files(
101 dir: &Utf8Path,
102 root: bool,
103) -> color_eyre::Result<BTreeMap<Utf8PathBuf, FileMetadata>> {
104 let mut stack = vec![dir.to_path_buf()];
105 let mut hashes = BTreeMap::new();
106
107 // TODO: parallelize this?
108 while let Some(dir) = stack.pop() {
109 for entry in dir.read_dir_utf8()? {
110 let entry =
111 entry.wrap_err_with(|| format!("failed to read entry from directory {dir}"))?;
112 let ty = entry
113 .file_type()
114 .wrap_err_with(|| format!("failed to get file type for entry {}", entry.path()))?;
115
116 // Ignore a pre-existing `target` directory at the root.
117 if root && entry.path().file_name() == Some("target") {
118 continue;
119 }
120
121 if ty.is_dir() {
122 stack.push(entry.into_path());
123 } else if ty.is_file() {
124 let metadata = entry.metadata().wrap_err_with(|| {
125 format!("failed to get metadata for file {}", entry.path())
126 })?;
127
128 // Also include the mtime, because Cargo uses the mtime to
129 // determine if a local file has changed. If there were a way to
130 // tell Cargo to ignore mtimes, we could remove this.
131 let mtime = metadata.modified().wrap_err_with(|| {
132 format!("failed to get modified time for file {}", entry.path())
133 })?;
134 let path = entry.into_path();
135 let contents = fs::read(&path)?;
136 let hash = Sha256Hash(Sha256::digest(&contents).into());
137 hashes.insert(path, FileMetadata { mtime, hash });
138 }
139 }
140 }
141
142 Ok(hashes)
143}
144
145#[derive(Clone, Debug)]
146struct FileMetadata {
147 mtime: SystemTime,
148 hash: Sha256Hash,
149}
150
151pub fn get_seed_archive_name(hash: Sha256Hash) -> Utf8PathBuf {
152 // Check in the std temp directory for the seed file.
153 let temp_dir = Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir is utf-8");
154 let username = whoami::username().expect("obtained username");
155 let user_dir = temp_dir.join(format!("fixture-project-seed-{username}"));
156 user_dir.join(format!("seed-{hash}.tar.zst"))
157}
158
159pub fn make_seed_archive(workspace_dir: &Utf8Path, file_name: &Utf8Path) -> color_eyre::Result<()> {
160 // Make the directory containing the file name.
161 fs::create_dir_all(file_name.parent().unwrap())?;
162
163 // First, run a build in a temporary directory.
164 let temp_dir = camino_tempfile::Builder::new()
165 .prefix("nextest-seed-build-")
166 .tempdir()
167 .wrap_err("failed to create temporary directory")?;
168 let target_dir = temp_dir.path().join("target");
169 fs::create_dir_all(&target_dir)?;
170
171 // Now build a nextest archive, using the temporary directory as the target dir.
172 let mut cli = CargoNextestCli::for_script()?;
173
174 // Set the environment variables after getting the CLI -- this avoids
175 // rebuilds due to the variables changing.
176 //
177 // TODO: We shouldn't alter the global state of this process -- instead,
178 // set_env_vars_for_script should be part of nextest_cli.rs.
179 set_env_vars_for_script();
180
181 let output = cli
182 .args([
183 "--manifest-path",
184 workspace_dir.join("Cargo.toml").as_str(),
185 "archive",
186 "--archive-file",
187 file_name.as_str(),
188 "--workspace",
189 "--all-targets",
190 "--target-dir",
191 target_dir.as_str(),
192 // Use this profile to ensure that the entire target dir is included.
193 "--profile",
194 "archive-all",
195 ])
196 .output();
197
198 if std::env::var("INTEGRATION_TESTS_DEBUG").as_deref() == Ok("1") {
199 eprintln!("make_seed_archive output: {output}");
200 }
201
202 Ok(())
203}