nextest_runner/reuse_build/
unarchiver.rs

1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use super::{
5    ArchiveEvent, ArchiveFormat, BINARIES_METADATA_FILE_NAME, CARGO_METADATA_FILE_NAME,
6    LIBDIRS_BASE_DIR, LibdirMapper, PlatformLibdirMapper,
7};
8use crate::{
9    errors::{ArchiveExtractError, ArchiveReadError},
10    helpers::convert_rel_path_to_main_sep,
11    list::BinaryList,
12};
13use camino::{Utf8Component, Utf8Path, Utf8PathBuf};
14use camino_tempfile::Utf8TempDir;
15use guppy::{CargoMetadata, graph::PackageGraph};
16use nextest_metadata::BinaryListSummary;
17use std::{
18    fs,
19    io::{self, Seek},
20    time::Instant,
21};
22
23#[derive(Debug)]
24pub(crate) struct Unarchiver<'a> {
25    file: &'a mut fs::File,
26    format: ArchiveFormat,
27}
28
29impl<'a> Unarchiver<'a> {
30    pub(crate) fn new(file: &'a mut fs::File, format: ArchiveFormat) -> Self {
31        Self { file, format }
32    }
33
34    pub(crate) fn extract<F>(
35        &mut self,
36        dest: ExtractDestination,
37        mut callback: F,
38    ) -> Result<ExtractInfo, ArchiveExtractError>
39    where
40        F: for<'e> FnMut(ArchiveEvent<'e>) -> io::Result<()>,
41    {
42        let (dest_dir, temp_dir) = match dest {
43            ExtractDestination::TempDir { persist } => {
44                // Create a new temporary directory and extract contents to it.
45                let temp_dir = camino_tempfile::Builder::new()
46                    .prefix("nextest-archive-")
47                    .tempdir()
48                    .map_err(ArchiveExtractError::TempDirCreate)?;
49
50                let dest_dir: Utf8PathBuf = temp_dir.path().to_path_buf();
51                let dest_dir = temp_dir.path().canonicalize_utf8().map_err(|error| {
52                    ArchiveExtractError::DestDirCanonicalization {
53                        dir: dest_dir,
54                        error,
55                    }
56                })?;
57
58                let temp_dir = if persist {
59                    // Persist the temporary directory.
60                    let _ = temp_dir.into_path();
61                    None
62                } else {
63                    Some(temp_dir)
64                };
65
66                (dest_dir, temp_dir)
67            }
68            ExtractDestination::Destination { dir, overwrite } => {
69                // Extract contents to the destination directory.
70                let dest_dir = dir
71                    .canonicalize_utf8()
72                    .map_err(|error| ArchiveExtractError::DestDirCanonicalization { dir, error })?;
73
74                let dest_target = dest_dir.join("target");
75                if dest_target.exists() && !overwrite {
76                    return Err(ArchiveExtractError::DestinationExists(dest_target));
77                }
78
79                (dest_dir, None)
80            }
81        };
82
83        let start_time = Instant::now();
84
85        // Extract the archive.
86        self.file
87            .rewind()
88            .map_err(|error| ArchiveExtractError::Read(ArchiveReadError::Io(error)))?;
89        let mut archive_reader =
90            ArchiveReader::new(self.file, self.format).map_err(ArchiveExtractError::Read)?;
91
92        // Will be filled out by the for loop below.
93        let mut binary_list = None;
94        let mut graph_data = None;
95        let mut host_libdir = PlatformLibdirMapper::Unavailable;
96        let mut target_libdir = PlatformLibdirMapper::Unavailable;
97        let binaries_metadata_path = Utf8Path::new(BINARIES_METADATA_FILE_NAME);
98        let cargo_metadata_path = Utf8Path::new(CARGO_METADATA_FILE_NAME);
99
100        let mut file_count = 0;
101
102        for entry in archive_reader
103            .entries()
104            .map_err(ArchiveExtractError::Read)?
105        {
106            file_count += 1;
107            let (mut entry, path) = entry.map_err(ArchiveExtractError::Read)?;
108
109            entry
110                .unpack_in(&dest_dir)
111                .map_err(|error| ArchiveExtractError::WriteFile {
112                    path: path.clone(),
113                    error,
114                })?;
115
116            // For archives created by nextest, binaries_metadata_path should be towards the beginning
117            // so this should report the ExtractStarted event instantly.
118            if path == binaries_metadata_path {
119                // Try reading the binary list from the file on disk.
120                let mut file = fs::File::open(dest_dir.join(binaries_metadata_path))
121                    .map_err(|error| ArchiveExtractError::WriteFile { path, error })?;
122
123                let summary: BinaryListSummary =
124                    serde_json::from_reader(&mut file).map_err(|error| {
125                        ArchiveExtractError::Read(ArchiveReadError::MetadataDeserializeError {
126                            path: binaries_metadata_path,
127                            error,
128                        })
129                    })?;
130
131                let this_binary_list = BinaryList::from_summary(summary)?;
132                let test_binary_count = this_binary_list.rust_binaries.len();
133                let non_test_binary_count =
134                    this_binary_list.rust_build_meta.non_test_binaries.len();
135                let build_script_out_dir_count =
136                    this_binary_list.rust_build_meta.build_script_out_dirs.len();
137                let linked_path_count = this_binary_list.rust_build_meta.linked_paths.len();
138
139                // TODO: also store a manifest of extra paths, and report them here.
140
141                // Report begin extraction.
142                callback(ArchiveEvent::ExtractStarted {
143                    test_binary_count,
144                    non_test_binary_count,
145                    build_script_out_dir_count,
146                    linked_path_count,
147                    dest_dir: &dest_dir,
148                })
149                .map_err(ArchiveExtractError::ReporterIo)?;
150
151                binary_list = Some(this_binary_list);
152            } else if path == cargo_metadata_path {
153                // Parse the input Cargo metadata as a `PackageGraph`.
154                let json = fs::read_to_string(dest_dir.join(cargo_metadata_path))
155                    .map_err(|error| ArchiveExtractError::WriteFile { path, error })?;
156
157                // Doing this in multiple steps results in better error messages.
158                let cargo_metadata: CargoMetadata =
159                    serde_json::from_str(&json).map_err(|error| {
160                        ArchiveExtractError::Read(ArchiveReadError::MetadataDeserializeError {
161                            path: binaries_metadata_path,
162                            error,
163                        })
164                    })?;
165
166                let package_graph = cargo_metadata.build_graph().map_err(|error| {
167                    ArchiveExtractError::Read(ArchiveReadError::PackageGraphConstructError {
168                        path: cargo_metadata_path,
169                        error,
170                    })
171                })?;
172                graph_data = Some((json, package_graph));
173                continue;
174            } else if let Ok(suffix) = path.strip_prefix(LIBDIRS_BASE_DIR) {
175                if suffix.starts_with("host") {
176                    host_libdir = PlatformLibdirMapper::Path(dest_dir.join(
177                        convert_rel_path_to_main_sep(&Utf8Path::new(LIBDIRS_BASE_DIR).join("host")),
178                    ));
179                } else if suffix.starts_with("target/0") {
180                    // Currently we only support one target, so just check explicitly for target/0.
181                    target_libdir =
182                        PlatformLibdirMapper::Path(dest_dir.join(convert_rel_path_to_main_sep(
183                            &Utf8Path::new(LIBDIRS_BASE_DIR).join("target/0"),
184                        )));
185                }
186            }
187        }
188
189        let binary_list = match binary_list {
190            Some(binary_list) => binary_list,
191            None => {
192                return Err(ArchiveExtractError::Read(
193                    ArchiveReadError::MetadataFileNotFound(binaries_metadata_path),
194                ));
195            }
196        };
197
198        let (cargo_metadata_json, graph) = match graph_data {
199            Some(x) => x,
200            None => {
201                return Err(ArchiveExtractError::Read(
202                    ArchiveReadError::MetadataFileNotFound(cargo_metadata_path),
203                ));
204            }
205        };
206
207        let elapsed = start_time.elapsed();
208        // Report end extraction.
209        callback(ArchiveEvent::Extracted {
210            file_count,
211            dest_dir: &dest_dir,
212            elapsed,
213        })
214        .map_err(ArchiveExtractError::ReporterIo)?;
215
216        Ok(ExtractInfo {
217            dest_dir,
218            temp_dir,
219            binary_list,
220            cargo_metadata_json,
221            graph,
222            libdir_mapper: LibdirMapper {
223                host: host_libdir,
224                target: target_libdir,
225            },
226        })
227    }
228}
229
230#[derive(Debug)]
231pub(crate) struct ExtractInfo {
232    /// The destination directory.
233    pub dest_dir: Utf8PathBuf,
234
235    /// An optional [`Utf8TempDir`], used for cleanup.
236    pub temp_dir: Option<Utf8TempDir>,
237
238    /// The [`BinaryList`] read from the archive.
239    pub binary_list: BinaryList,
240
241    /// The Cargo metadata JSON.
242    pub cargo_metadata_json: String,
243
244    /// The [`PackageGraph`] read from the archive.
245    pub graph: PackageGraph,
246
247    /// A remapper for the Rust libdir.
248    pub libdir_mapper: LibdirMapper,
249}
250
251struct ArchiveReader<'a> {
252    archive: tar::Archive<zstd::Decoder<'static, io::BufReader<&'a mut fs::File>>>,
253}
254
255impl<'a> ArchiveReader<'a> {
256    fn new(file: &'a mut fs::File, format: ArchiveFormat) -> Result<Self, ArchiveReadError> {
257        let archive = match format {
258            ArchiveFormat::TarZst => {
259                let decoder = zstd::Decoder::new(file).map_err(ArchiveReadError::Io)?;
260                tar::Archive::new(decoder)
261            }
262        };
263        Ok(Self { archive })
264    }
265
266    fn entries<'r>(
267        &'r mut self,
268    ) -> Result<
269        impl Iterator<Item = Result<(ArchiveEntry<'r, 'a>, Utf8PathBuf), ArchiveReadError>>,
270        ArchiveReadError,
271    > {
272        let entries = self.archive.entries().map_err(ArchiveReadError::Io)?;
273        Ok(entries.map(|entry| {
274            let entry = entry.map_err(ArchiveReadError::Io)?;
275
276            // Validation: entry paths must be valid UTF-8.
277            let path = entry_path(&entry)?;
278
279            // Validation: paths start with "target".
280            if !path.starts_with("target") {
281                return Err(ArchiveReadError::NoTargetPrefix(path));
282            }
283
284            // Validation: paths only contain normal components.
285            for component in path.components() {
286                match component {
287                    Utf8Component::Normal(_) => {}
288                    other => {
289                        return Err(ArchiveReadError::InvalidComponent {
290                            path: path.clone(),
291                            component: other.as_str().to_owned(),
292                        });
293                    }
294                }
295            }
296
297            // Validation: checksum matches.
298            let mut header = entry.header().clone();
299            let actual_cksum = header
300                .cksum()
301                .map_err(|error| ArchiveReadError::ChecksumRead {
302                    path: path.clone(),
303                    error,
304                })?;
305
306            header.set_cksum();
307            let expected_cksum = header
308                .cksum()
309                .expect("checksum that was just set can't be invalid");
310
311            if expected_cksum != actual_cksum {
312                return Err(ArchiveReadError::InvalidChecksum {
313                    path,
314                    expected: expected_cksum,
315                    actual: actual_cksum,
316                });
317            }
318
319            Ok((entry, path))
320        }))
321    }
322}
323
324/// Given an entry, returns its path as a `Utf8Path`.
325fn entry_path(entry: &ArchiveEntry<'_, '_>) -> Result<Utf8PathBuf, ArchiveReadError> {
326    let path_bytes = entry.path_bytes();
327    let path_str = std::str::from_utf8(&path_bytes)
328        .map_err(|_| ArchiveReadError::NonUtf8Path(path_bytes.to_vec()))?;
329    let utf8_path = Utf8Path::new(path_str);
330    Ok(utf8_path.to_owned())
331}
332
333/// Where to extract a nextest archive to.
334#[derive(Clone, Debug)]
335pub enum ExtractDestination {
336    /// Extract the archive to a new temporary directory.
337    TempDir {
338        /// Whether to persist the temporary directory at the end of execution.
339        persist: bool,
340    },
341    /// Extract the archive to a custom destination.
342    Destination {
343        /// The directory to extract to.
344        dir: Utf8PathBuf,
345        /// Whether to overwrite existing contents.
346        overwrite: bool,
347    },
348}
349
350type ArchiveEntry<'r, 'a> = tar::Entry<'r, zstd::Decoder<'static, io::BufReader<&'a mut fs::File>>>;