nextest_runner/reuse_build/
unarchiver.rs

1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use super::{
5    ArchiveEvent, ArchiveFormat, BINARIES_METADATA_FILE_NAME, CARGO_METADATA_FILE_NAME,
6    LIBDIRS_BASE_DIR, LibdirMapper, PlatformLibdirMapper,
7};
8use crate::{
9    errors::{ArchiveExtractError, ArchiveReadError},
10    helpers::convert_rel_path_to_main_sep,
11    list::BinaryList,
12};
13use camino::{Utf8Component, Utf8Path, Utf8PathBuf};
14use camino_tempfile::Utf8TempDir;
15use guppy::{CargoMetadata, graph::PackageGraph};
16use nextest_metadata::BinaryListSummary;
17use std::{
18    fs,
19    io::{self, Seek},
20    time::Instant,
21};
22
23#[derive(Debug)]
24pub(crate) struct Unarchiver<'a> {
25    file: &'a mut fs::File,
26    format: ArchiveFormat,
27}
28
29impl<'a> Unarchiver<'a> {
30    pub(crate) fn new(file: &'a mut fs::File, format: ArchiveFormat) -> Self {
31        Self { file, format }
32    }
33
34    pub(crate) fn extract<F>(
35        &mut self,
36        dest: ExtractDestination,
37        mut callback: F,
38    ) -> Result<ExtractInfo, ArchiveExtractError>
39    where
40        F: for<'e> FnMut(ArchiveEvent<'e>) -> io::Result<()>,
41    {
42        let (dest_dir, temp_dir) = match dest {
43            ExtractDestination::TempDir { persist } => {
44                // Create a new temporary directory and extract contents to it.
45                let temp_dir = camino_tempfile::Builder::new()
46                    .prefix("nextest-archive-")
47                    .tempdir()
48                    .map_err(ArchiveExtractError::TempDirCreate)?;
49
50                let dest_dir: Utf8PathBuf = temp_dir.path().to_path_buf();
51                let dest_dir = temp_dir.path().canonicalize_utf8().map_err(|error| {
52                    ArchiveExtractError::DestDirCanonicalization {
53                        dir: dest_dir,
54                        error,
55                    }
56                })?;
57
58                let temp_dir = if persist {
59                    // Persist the temporary directory.
60                    let _ = temp_dir.keep();
61                    None
62                } else {
63                    Some(temp_dir)
64                };
65
66                (dest_dir, temp_dir)
67            }
68            ExtractDestination::Destination { dir, overwrite } => {
69                // Extract contents to the destination directory.
70                let dest_dir = dir
71                    .canonicalize_utf8()
72                    .map_err(|error| ArchiveExtractError::DestDirCanonicalization { dir, error })?;
73
74                let dest_target = dest_dir.join("target");
75                if dest_target.exists() && !overwrite {
76                    return Err(ArchiveExtractError::DestinationExists(dest_target));
77                }
78
79                (dest_dir, None)
80            }
81        };
82
83        let start_time = Instant::now();
84
85        // Extract the archive.
86        self.file
87            .rewind()
88            .map_err(|error| ArchiveExtractError::Read(ArchiveReadError::Io(error)))?;
89        let mut archive_reader =
90            ArchiveReader::new(self.file, self.format).map_err(ArchiveExtractError::Read)?;
91
92        // Will be filled out by the for loop below.
93        let mut binary_list = None;
94        let mut graph_data = None;
95        let mut host_libdir = PlatformLibdirMapper::Unavailable;
96        let mut target_libdir = PlatformLibdirMapper::Unavailable;
97        let binaries_metadata_path = Utf8Path::new(BINARIES_METADATA_FILE_NAME);
98        let cargo_metadata_path = Utf8Path::new(CARGO_METADATA_FILE_NAME);
99
100        let mut file_count = 0;
101
102        for entry in archive_reader
103            .entries()
104            .map_err(ArchiveExtractError::Read)?
105        {
106            file_count += 1;
107            let (mut entry, path) = entry.map_err(ArchiveExtractError::Read)?;
108
109            entry
110                .unpack_in(&dest_dir)
111                .map_err(|error| ArchiveExtractError::WriteFile {
112                    path: path.clone(),
113                    error,
114                })?;
115
116            // For archives created by nextest, binaries_metadata_path should be towards the beginning
117            // so this should report the ExtractStarted event instantly.
118            if path == binaries_metadata_path {
119                // Try reading the binary list from the file on disk.
120                let mut file = fs::File::open(dest_dir.join(binaries_metadata_path))
121                    .map_err(|error| ArchiveExtractError::WriteFile { path, error })?;
122
123                let summary: BinaryListSummary =
124                    serde_json::from_reader(&mut file).map_err(|error| {
125                        ArchiveExtractError::Read(ArchiveReadError::MetadataDeserializeError {
126                            path: binaries_metadata_path,
127                            error,
128                        })
129                    })?;
130
131                let this_binary_list = BinaryList::from_summary(summary)?;
132                let test_binary_count = this_binary_list.rust_binaries.len();
133                let non_test_binary_count =
134                    this_binary_list.rust_build_meta.non_test_binaries.len();
135                let build_script_out_dir_count =
136                    this_binary_list.rust_build_meta.build_script_out_dirs.len();
137                let linked_path_count = this_binary_list.rust_build_meta.linked_paths.len();
138
139                // TODO: also store a manifest of extra paths, and report them here.
140
141                // Report begin extraction.
142                callback(ArchiveEvent::ExtractStarted {
143                    test_binary_count,
144                    non_test_binary_count,
145                    build_script_out_dir_count,
146                    linked_path_count,
147                    dest_dir: &dest_dir,
148                })
149                .map_err(ArchiveExtractError::ReporterIo)?;
150
151                binary_list = Some(this_binary_list);
152            } else if path == cargo_metadata_path {
153                // Parse the input Cargo metadata as a `PackageGraph`.
154                let json = fs::read_to_string(dest_dir.join(cargo_metadata_path))
155                    .map_err(|error| ArchiveExtractError::WriteFile { path, error })?;
156
157                // Doing this in multiple steps results in better error messages.
158                let cargo_metadata: CargoMetadata =
159                    serde_json::from_str(&json).map_err(|error| {
160                        ArchiveExtractError::Read(ArchiveReadError::MetadataDeserializeError {
161                            path: binaries_metadata_path,
162                            error,
163                        })
164                    })?;
165
166                let package_graph = cargo_metadata.build_graph().map_err(|error| {
167                    ArchiveExtractError::Read(ArchiveReadError::PackageGraphConstructError {
168                        path: cargo_metadata_path,
169                        error,
170                    })
171                })?;
172                graph_data = Some((json, package_graph));
173                continue;
174            } else if let Ok(suffix) = path.strip_prefix(LIBDIRS_BASE_DIR) {
175                if suffix.starts_with("host") {
176                    host_libdir = PlatformLibdirMapper::Path(dest_dir.join(
177                        convert_rel_path_to_main_sep(&Utf8Path::new(LIBDIRS_BASE_DIR).join("host")),
178                    ));
179                } else if suffix.starts_with("target/0") {
180                    // Currently we only support one target, so just check explicitly for target/0.
181                    target_libdir =
182                        PlatformLibdirMapper::Path(dest_dir.join(convert_rel_path_to_main_sep(
183                            &Utf8Path::new(LIBDIRS_BASE_DIR).join("target/0"),
184                        )));
185                }
186            }
187        }
188
189        let binary_list = binary_list.ok_or_else(|| {
190            ArchiveExtractError::Read(ArchiveReadError::MetadataFileNotFound(
191                binaries_metadata_path,
192            ))
193        })?;
194
195        let (cargo_metadata_json, graph) = graph_data.ok_or_else(|| {
196            ArchiveExtractError::Read(ArchiveReadError::MetadataFileNotFound(cargo_metadata_path))
197        })?;
198
199        let elapsed = start_time.elapsed();
200        // Report end extraction.
201        callback(ArchiveEvent::Extracted {
202            file_count,
203            dest_dir: &dest_dir,
204            elapsed,
205        })
206        .map_err(ArchiveExtractError::ReporterIo)?;
207
208        Ok(ExtractInfo {
209            dest_dir,
210            temp_dir,
211            binary_list,
212            cargo_metadata_json,
213            graph,
214            libdir_mapper: LibdirMapper {
215                host: host_libdir,
216                target: target_libdir,
217            },
218        })
219    }
220}
221
222#[derive(Debug)]
223pub(crate) struct ExtractInfo {
224    /// The destination directory.
225    pub dest_dir: Utf8PathBuf,
226
227    /// An optional [`Utf8TempDir`], used for cleanup.
228    pub temp_dir: Option<Utf8TempDir>,
229
230    /// The [`BinaryList`] read from the archive.
231    pub binary_list: BinaryList,
232
233    /// The Cargo metadata JSON.
234    pub cargo_metadata_json: String,
235
236    /// The [`PackageGraph`] read from the archive.
237    pub graph: PackageGraph,
238
239    /// A remapper for the Rust libdir.
240    pub libdir_mapper: LibdirMapper,
241}
242
243struct ArchiveReader<'a> {
244    archive: tar::Archive<zstd::Decoder<'static, io::BufReader<&'a mut fs::File>>>,
245}
246
247impl<'a> ArchiveReader<'a> {
248    fn new(file: &'a mut fs::File, format: ArchiveFormat) -> Result<Self, ArchiveReadError> {
249        let archive = match format {
250            ArchiveFormat::TarZst => {
251                let decoder = zstd::Decoder::new(file).map_err(ArchiveReadError::Io)?;
252                tar::Archive::new(decoder)
253            }
254        };
255        Ok(Self { archive })
256    }
257
258    fn entries<'r>(
259        &'r mut self,
260    ) -> Result<
261        impl Iterator<Item = Result<(ArchiveEntry<'r, 'a>, Utf8PathBuf), ArchiveReadError>>,
262        ArchiveReadError,
263    > {
264        let entries = self.archive.entries().map_err(ArchiveReadError::Io)?;
265        Ok(entries.map(|entry| {
266            let entry = entry.map_err(ArchiveReadError::Io)?;
267
268            // Validation: entry paths must be valid UTF-8.
269            let path = entry_path(&entry)?;
270
271            // Validation: paths start with "target".
272            if !path.starts_with("target") {
273                return Err(ArchiveReadError::NoTargetPrefix(path));
274            }
275
276            // Validation: paths only contain normal components.
277            for component in path.components() {
278                match component {
279                    Utf8Component::Normal(_) => {}
280                    other => {
281                        return Err(ArchiveReadError::InvalidComponent {
282                            path: path.clone(),
283                            component: other.as_str().to_owned(),
284                        });
285                    }
286                }
287            }
288
289            // Validation: checksum matches.
290            let mut header = entry.header().clone();
291            let actual_cksum = header
292                .cksum()
293                .map_err(|error| ArchiveReadError::ChecksumRead {
294                    path: path.clone(),
295                    error,
296                })?;
297
298            header.set_cksum();
299            let expected_cksum = header
300                .cksum()
301                .expect("checksum that was just set can't be invalid");
302
303            if expected_cksum != actual_cksum {
304                return Err(ArchiveReadError::InvalidChecksum {
305                    path,
306                    expected: expected_cksum,
307                    actual: actual_cksum,
308                });
309            }
310
311            Ok((entry, path))
312        }))
313    }
314}
315
316/// Given an entry, returns its path as a `Utf8Path`.
317fn entry_path(entry: &ArchiveEntry<'_, '_>) -> Result<Utf8PathBuf, ArchiveReadError> {
318    let path_bytes = entry.path_bytes();
319    let path_str = std::str::from_utf8(&path_bytes)
320        .map_err(|_| ArchiveReadError::NonUtf8Path(path_bytes.to_vec()))?;
321    let utf8_path = Utf8Path::new(path_str);
322    Ok(utf8_path.to_owned())
323}
324
325/// Where to extract a nextest archive to.
326#[derive(Clone, Debug)]
327pub enum ExtractDestination {
328    /// Extract the archive to a new temporary directory.
329    TempDir {
330        /// Whether to persist the temporary directory at the end of execution.
331        persist: bool,
332    },
333    /// Extract the archive to a custom destination.
334    Destination {
335        /// The directory to extract to.
336        dir: Utf8PathBuf,
337        /// Whether to overwrite existing contents.
338        overwrite: bool,
339    },
340}
341
342type ArchiveEntry<'r, 'a> = tar::Entry<'r, zstd::Decoder<'static, io::BufReader<&'a mut fs::File>>>;