integration_tests/
seed.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
// Copyright (c) The nextest Contributors
// SPDX-License-Identifier: MIT OR Apache-2.0

use crate::{env::set_env_vars, nextest_cli::CargoNextestCli};
use camino::{Utf8Path, Utf8PathBuf};
use color_eyre::eyre::Context;
use fs_err as fs;
use sha2::{Digest, Sha256};
use std::{collections::BTreeMap, time::SystemTime};

pub fn nextest_tests_dir() -> Utf8PathBuf {
    Utf8Path::new(env!("CARGO_MANIFEST_DIR"))
        .parent()
        .unwrap()
        .join("fixtures/nextest-tests")
}

// We use SHA-256 because other parts of nextest do the same -- this can easily
// be changed to another hash function if needed.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Sha256Hash([u8; 32]);

impl std::fmt::Display for Sha256Hash {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        hex::encode(self.0).fmt(f)
    }
}

/// Computes the hash of a directory and its contents, in a way that hopefully
/// represents what Cargo does somewhat.
///
/// With any cache, invalidation is an issue -- specifically, Cargo has its own
/// notion of cache invalidation. Ideally, we could ask Cargo to give us a hash
/// for a particular command that deterministically says "a rebuild will happen
/// if and only if this hash changes". But that doesn't exist with stable Rust
/// as of this writing (Rust 1.83), so we must guess at what Cargo does.
///
/// We take some basic precautions:
///
/// * preserving mtimes while copying the source directory
/// * using both mtimes and hashes while computing the overall hash below.
///
/// Beyond that, it's possible for this implementation to have issues in three
/// different ways:
///
/// ## 1. Cargo invalidates cache, but we don't
///
/// In this case, the cache becomes useless -- Cargo will rebuild the project
/// anyway. This can cause flaky tests (see `__NEXTEST_ALT_TARGET_DIR` for a fix
/// to a flake that was caught because of this divergence).
///
/// To be clear, any divergence merely due to the cached seed not being used is
/// a bug. That was the case with the issue which `__NEXTEST_ALT_TARGET_DIR`
/// works around.
///
/// ## 2. We invalidate our cache, but Cargo doesn't
///
/// In this case, we'll regenerate a new seed but Cargo will reuse it. This
/// isn't too bad since generating the seed is a one-time cost.
///
/// ## 3. Something about the way nextest generates archives changes
///
/// This is the most difficult case to handle, because a brute hash (just hash
/// all of the files in the nextest repo) would invalidate far too often. So if
/// you're altering this code, you have to be careful to remove the cache as
/// well. Hopefully CI (which doesn't cache the seed archive) will catch issues.
///
/// ---
///
/// In general, this implementation appears to be pretty reliable, though
/// occasionally the cache has not worked (case 1 above) in Windows CI.
pub fn compute_dir_hash(dir: impl AsRef<Utf8Path>) -> color_eyre::Result<Sha256Hash> {
    let files = collect_all_files(dir.as_ref(), true)?;
    let mut hasher = Sha256::new();

    // Hash the path to `cargo` to ensure that the hash is different for
    // different Rust versions.
    hasher.update(b"nextest:cargo-path\0");
    hasher.update(
        std::env::var("CARGO")
            .expect("this should be run under cargo")
            .as_bytes(),
    );
    hasher.update([0, 0]);
    for (file_name, metadata) in files {
        hasher.update(file_name.as_str());
        hasher.update([0]);
        // Convert the system time to a number to hash.
        let timestamp = metadata
            .mtime
            .duration_since(SystemTime::UNIX_EPOCH)
            .expect("file's mtime after 1970-01-01");
        hasher.update(timestamp.as_nanos().to_le_bytes());
        hasher.update(metadata.hash.0);
        hasher.update([0]);
    }
    Ok(Sha256Hash(hasher.finalize().into()))
}

// Hash and collect metadata about all the files in a directory.
//
// Using a `BTreeMap` ensures a deterministic order of files above.
fn collect_all_files(
    dir: &Utf8Path,
    root: bool,
) -> color_eyre::Result<BTreeMap<Utf8PathBuf, FileMetadata>> {
    let mut stack = vec![dir.to_path_buf()];
    let mut hashes = BTreeMap::new();

    // TODO: parallelize this?
    while let Some(dir) = stack.pop() {
        for entry in dir.read_dir_utf8()? {
            let entry =
                entry.wrap_err_with(|| format!("failed to read entry from directory {dir}"))?;
            let ty = entry
                .file_type()
                .wrap_err_with(|| format!("failed to get file type for entry {}", entry.path()))?;

            // Ignore a pre-existing `target` directory at the root.
            if root && entry.path().file_name() == Some("target") {
                continue;
            }

            if ty.is_dir() {
                stack.push(entry.into_path());
            } else if ty.is_file() {
                let metadata = entry.metadata().wrap_err_with(|| {
                    format!("failed to get metadata for file {}", entry.path())
                })?;

                // Also include the mtime, because Cargo uses the mtime to
                // determine if a local file has changed. If there were a way to
                // tell Cargo to ignore mtimes, we could remove this.
                let mtime = metadata.modified().wrap_err_with(|| {
                    format!("failed to get modified time for file {}", entry.path())
                })?;
                let path = entry.into_path();
                let contents = fs::read(&path)?;
                let hash = Sha256Hash(Sha256::digest(&contents).into());
                hashes.insert(path, FileMetadata { mtime, hash });
            }
        }
    }

    Ok(hashes)
}

#[derive(Clone, Debug)]
struct FileMetadata {
    mtime: SystemTime,
    hash: Sha256Hash,
}

pub fn get_seed_archive_name(hash: Sha256Hash) -> Utf8PathBuf {
    // Check in the std temp directory for the seed file.
    let temp_dir = Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir is utf-8");
    let username = whoami::username();
    let user_dir = temp_dir.join(format!("nextest-tests-seed-{username}"));
    user_dir.join(format!("seed-{hash}.tar.zst"))
}

pub fn make_seed_archive(workspace_dir: &Utf8Path, file_name: &Utf8Path) -> color_eyre::Result<()> {
    // Make the directory containing the file name.
    fs::create_dir_all(file_name.parent().unwrap())?;

    // First, run a build in a temporary directory.
    let temp_dir = camino_tempfile::Builder::new()
        .prefix("nextest-seed-build-")
        .tempdir()
        .wrap_err("failed to create temporary directory")?;
    let target_dir = temp_dir.path().join("target");
    fs::create_dir_all(&target_dir)?;

    // Now build a nextest archive, using the temporary directory as the target dir.
    let mut cli = CargoNextestCli::for_script()?;

    // Set the environment variables after getting the CLI -- this avoids
    // rebuilds due to the variables changing.
    //
    // TODO: We shouldn't alter the global state of this process -- instead,
    // set_env_vars should be part of nextest_cli.rs.
    set_env_vars();

    let output = cli
        .args([
            "--manifest-path",
            workspace_dir.join("Cargo.toml").as_str(),
            "archive",
            "--archive-file",
            file_name.as_str(),
            "--workspace",
            "--all-targets",
            "--target-dir",
            target_dir.as_str(),
            // Use this profile to ensure that the entire target dir is included.
            "--profile",
            "archive-all",
        ])
        .output();

    if std::env::var("INTEGRATION_TESTS_DEBUG") == Ok("1".to_string()) {
        eprintln!("make_seed_archive output: {output}");
    }

    Ok(())
}