nextest_runner/record/
cache_dir.rs

1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Platform-specific cache directory discovery for nextest records.
5
6use crate::errors::CacheDirError;
7use camino::{Utf8Path, Utf8PathBuf};
8use etcetera::{BaseStrategy, choose_base_strategy};
9use xxhash_rust::xxh3::xxh3_64;
10
11/// Maximum length of the encoded workspace path in bytes.
12const MAX_ENCODED_LEN: usize = 96;
13
14/// Length of the hash suffix appended to truncated paths.
15///
16/// Between the first many bytes and this, we should ideally have more than
17/// enough entropy to disambiguate repos.
18const HASH_SUFFIX_LEN: usize = 8;
19
20/// Environment variable to override the nextest cache directory.
21///
22/// When set, this overrides the platform-specific cache directory. The records
23/// directory will be `$NEXTEST_CACHE_DIR/projects/<encoded-workspace>/records/`.
24pub const NEXTEST_CACHE_DIR_ENV: &str = "NEXTEST_CACHE_DIR";
25
26/// Returns the platform-specific cache directory for nextest records for a workspace.
27///
28/// If the `NEXTEST_CACHE_DIR` environment variable is set, uses that as the base
29/// cache directory. Otherwise, uses the platform-specific default:
30///
31/// - Linux: `$XDG_CACHE_HOME/nextest/projects/<encoded-workspace>/records/`
32///   or `~/.cache/nextest/projects/<encoded-workspace>/records/`
33/// - macOS: `~/Library/Caches/nextest/projects/<encoded-workspace>/records/`
34/// - Windows: `%LOCALAPPDATA%\nextest\cache\projects\<encoded-workspace>\records\`
35///
36/// The workspace root is canonicalized (symlinks resolved) before being encoded
37/// using `encode_workspace_path` to produce a directory-safe, bijective
38/// representation. This ensures that accessing a workspace via a symlink
39/// produces the same cache directory as accessing it via the real path.
40///
41/// Returns an error if:
42/// - The platform cache directory cannot be determined
43/// - The workspace path cannot be canonicalized (e.g., doesn't exist)
44/// - Any path is not valid UTF-8
45pub fn records_cache_dir(workspace_root: &Utf8Path) -> Result<Utf8PathBuf, CacheDirError> {
46    let base_cache_dir = if let Ok(cache_dir) = std::env::var(NEXTEST_CACHE_DIR_ENV) {
47        Utf8PathBuf::from(cache_dir)
48    } else {
49        let strategy = choose_base_strategy().map_err(|_| CacheDirError::BaseDirStrategy)?;
50        let cache_dir = strategy.cache_dir();
51        let nextest_cache = cache_dir.join("nextest");
52        Utf8PathBuf::from_path_buf(nextest_cache.clone()).map_err(|_| {
53            CacheDirError::CacheDirNotUtf8 {
54                path: nextest_cache,
55            }
56        })?
57    };
58
59    // Canonicalize the workspace root to resolve symlinks. This ensures that
60    // accessing a workspace via a symlink produces the same cache directory.
61    let canonical_workspace =
62        workspace_root
63            .canonicalize_utf8()
64            .map_err(|error| CacheDirError::Canonicalize {
65                workspace_root: workspace_root.to_owned(),
66                error,
67            })?;
68
69    let encoded_workspace = encode_workspace_path(&canonical_workspace);
70    Ok(base_cache_dir
71        .join("projects")
72        .join(&encoded_workspace)
73        .join("records"))
74}
75
76/// Encodes a workspace path into a directory-safe string.
77///
78/// The encoding is bijective (reversible) and produces valid directory names on all
79/// platforms. The encoding scheme uses underscore as an escape character:
80///
81/// - `_` → `__` (escape underscore first)
82/// - `/` → `_s` (Unix path separator)
83/// - `\` → `_b` (Windows path separator)
84/// - `:` → `_c` (Windows drive letter separator)
85/// - `*` → `_a` (asterisk, invalid on Windows)
86/// - `"` → `_q` (double quote, invalid on Windows)
87/// - `<` → `_l` (less than, invalid on Windows)
88/// - `>` → `_g` (greater than, invalid on Windows)
89/// - `|` → `_p` (pipe, invalid on Windows)
90/// - `?` → `_m` (question mark, invalid on Windows)
91///
92/// If the encoded path exceeds 96 bytes, it is truncated at a valid UTF-8 boundary
93/// and an 8-character hash suffix is appended to maintain uniqueness.
94///
95/// # Examples
96///
97/// - `/home/rain/dev/nextest` → `_shome_srain_sdev_snextest`
98/// - `C:\Users\rain\dev` → `C_c_bUsers_brain_bdev`
99/// - `/path_with_underscore` → `_spath__with__underscore`
100/// - `/weird*path?` → `_sweird_apath_m`
101pub fn encode_workspace_path(path: &Utf8Path) -> String {
102    let mut encoded = String::with_capacity(path.as_str().len() * 2);
103
104    for ch in path.as_str().chars() {
105        match ch {
106            '_' => encoded.push_str("__"),
107            '/' => encoded.push_str("_s"),
108            '\\' => encoded.push_str("_b"),
109            ':' => encoded.push_str("_c"),
110            '*' => encoded.push_str("_a"),
111            '"' => encoded.push_str("_q"),
112            '<' => encoded.push_str("_l"),
113            '>' => encoded.push_str("_g"),
114            '|' => encoded.push_str("_p"),
115            '?' => encoded.push_str("_m"),
116            _ => encoded.push(ch),
117        }
118    }
119
120    truncate_with_hash(encoded)
121}
122
123/// Truncates an encoded string to fit within [`MAX_ENCODED_LEN`] bytes.
124///
125/// If the string is already short enough, returns it unchanged. Otherwise,
126/// truncates at a valid UTF-8 boundary and appends an 8-character hash suffix
127/// derived from the full string.
128fn truncate_with_hash(encoded: String) -> String {
129    if encoded.len() <= MAX_ENCODED_LEN {
130        return encoded;
131    }
132
133    // Compute hash of full string before truncation.
134    let hash = xxh3_64(encoded.as_bytes());
135    let hash_suffix = format!("{:08x}", hash & 0xFFFFFFFF);
136
137    // Find the longest valid UTF-8 prefix that fits.
138    let max_prefix_len = MAX_ENCODED_LEN - HASH_SUFFIX_LEN;
139    let bytes = encoded.as_bytes();
140    let truncated_bytes = &bytes[..max_prefix_len.min(bytes.len())];
141
142    // Use utf8_chunks to find the valid UTF-8 portion.
143    let mut valid_len = 0;
144    for chunk in truncated_bytes.utf8_chunks() {
145        valid_len += chunk.valid().len();
146        // Stop at first invalid sequence (which would be an incomplete multi-byte char).
147        if !chunk.invalid().is_empty() {
148            break;
149        }
150    }
151
152    let mut result = encoded[..valid_len].to_string();
153    result.push_str(&hash_suffix);
154    result
155}
156
157/// Decodes a workspace path that was encoded with [`encode_workspace_path`].
158///
159/// Returns `None` if the encoded string is malformed (contains an invalid escape
160/// sequence like `_x` where `x` is not a recognized escape character).
161#[cfg_attr(not(test), expect(dead_code))] // Will be used in replay phase.
162pub fn decode_workspace_path(encoded: &str) -> Option<Utf8PathBuf> {
163    let mut decoded = String::with_capacity(encoded.len());
164    let mut chars = encoded.chars().peekable();
165
166    while let Some(ch) = chars.next() {
167        if ch == '_' {
168            match chars.next() {
169                Some('_') => decoded.push('_'),
170                Some('s') => decoded.push('/'),
171                Some('b') => decoded.push('\\'),
172                Some('c') => decoded.push(':'),
173                Some('a') => decoded.push('*'),
174                Some('q') => decoded.push('"'),
175                Some('l') => decoded.push('<'),
176                Some('g') => decoded.push('>'),
177                Some('p') => decoded.push('|'),
178                Some('m') => decoded.push('?'),
179                // Malformed: `_` at end of string or followed by unknown char.
180                _ => return None,
181            }
182        } else {
183            decoded.push(ch);
184        }
185    }
186
187    Some(Utf8PathBuf::from(decoded))
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193
194    #[test]
195    fn test_records_cache_dir() {
196        // Use a real existing path (the temp dir always exists).
197        let temp_dir =
198            Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir should be valid UTF-8");
199        let cache_dir = records_cache_dir(&temp_dir).expect("cache directory should be available");
200
201        assert!(
202            cache_dir.as_str().contains("nextest"),
203            "cache dir should contain 'nextest': {cache_dir}"
204        );
205        assert!(
206            cache_dir.as_str().contains("projects"),
207            "cache dir should contain 'projects': {cache_dir}"
208        );
209        assert!(
210            cache_dir.as_str().contains("records"),
211            "cache dir should contain 'records': {cache_dir}"
212        );
213    }
214
215    #[test]
216    fn test_records_cache_dir_canonicalizes_symlinks() {
217        // Create a temp directory and a symlink pointing to it.
218        let temp_dir = camino_tempfile::tempdir().expect("tempdir should be created");
219        let real_path = temp_dir.path().to_path_buf();
220
221        // Create a subdirectory to serve as the "workspace".
222        let workspace = real_path.join("workspace");
223        std::fs::create_dir(&workspace).expect("workspace dir should be created");
224
225        // Create a symlink pointing to the workspace.
226        let symlink_path = real_path.join("symlink-to-workspace");
227
228        #[cfg(unix)]
229        std::os::unix::fs::symlink(&workspace, &symlink_path)
230            .expect("symlink should be created on Unix");
231
232        #[cfg(windows)]
233        std::os::windows::fs::symlink_dir(&workspace, &symlink_path)
234            .expect("symlink should be created on Windows");
235
236        // Get cache dir via the real path.
237        let cache_via_real =
238            records_cache_dir(&workspace).expect("cache dir via real path should be available");
239
240        // Get cache dir via the symlink.
241        let cache_via_symlink =
242            records_cache_dir(&symlink_path).expect("cache dir via symlink should be available");
243
244        // They should be the same because canonicalization resolves the symlink.
245        assert_eq!(
246            cache_via_real, cache_via_symlink,
247            "cache dir should be the same whether accessed via real path or symlink"
248        );
249    }
250
251    // Basic encoding tests.
252    #[test]
253    fn test_encode_workspace_path() {
254        let cases = [
255            ("", ""),
256            ("simple", "simple"),
257            ("/home/user", "_shome_suser"),
258            ("/home/user/project", "_shome_suser_sproject"),
259            ("C:\\Users\\name", "C_c_bUsers_bname"),
260            ("D:\\dev\\project", "D_c_bdev_bproject"),
261            ("/path_with_underscore", "_spath__with__underscore"),
262            ("C:\\path_name", "C_c_bpath__name"),
263            ("/a/b/c", "_sa_sb_sc"),
264            // Windows-invalid characters.
265            ("/weird*path", "_sweird_apath"),
266            ("/path?query", "_spath_mquery"),
267            ("/file<name>", "_sfile_lname_g"),
268            ("/path|pipe", "_spath_ppipe"),
269            ("/\"quoted\"", "_s_qquoted_q"),
270            // All Windows-invalid characters combined.
271            ("*\"<>|?", "_a_q_l_g_p_m"),
272        ];
273
274        for (input, expected) in cases {
275            let encoded = encode_workspace_path(Utf8Path::new(input));
276            assert_eq!(
277                encoded, expected,
278                "encoding failed for {input:?}: expected {expected:?}, got {encoded:?}"
279            );
280        }
281    }
282
283    // Roundtrip tests: encode then decode should return original.
284    #[test]
285    fn test_encode_decode_roundtrip() {
286        let cases = [
287            "/home/user/project",
288            "C:\\Users\\name\\dev",
289            "/path_with_underscore",
290            "/_",
291            "_/",
292            "__",
293            "/a_b/c_d",
294            "",
295            "no_special_chars",
296            "/mixed\\path:style",
297            // Windows-invalid characters (valid on Unix).
298            "/path*with*asterisks",
299            "/file?query",
300            "/path<with>angles",
301            "/pipe|char",
302            "/\"quoted\"",
303            // All special chars in one path.
304            "/all*special?chars<in>one|path\"here\"_end",
305        ];
306
307        for original in cases {
308            let encoded = encode_workspace_path(Utf8Path::new(original));
309            let decoded = decode_workspace_path(&encoded);
310            assert_eq!(
311                decoded.as_deref(),
312                Some(Utf8Path::new(original)),
313                "roundtrip failed for {original:?}: encoded={encoded:?}, decoded={decoded:?}"
314            );
315        }
316    }
317
318    // Bijectivity tests: different inputs must produce different outputs.
319    #[test]
320    fn test_encoding_is_bijective() {
321        // These pairs were problematic with the simple dash-based encoding.
322        let pairs = [
323            ("/-", "-/"),
324            ("/a", "_a"),
325            ("_s", "/"),
326            ("a_", "a/"),
327            ("__", "_"),
328            ("/", "\\"),
329            // New escape sequences for Windows-invalid characters.
330            ("_a", "*"),
331            ("_q", "\""),
332            ("_l", "<"),
333            ("_g", ">"),
334            ("_p", "|"),
335            ("_m", "?"),
336            // Ensure Windows-invalid chars don't collide with each other.
337            ("*", "?"),
338            ("<", ">"),
339            ("|", "\""),
340        ];
341
342        for (a, b) in pairs {
343            let encoded_a = encode_workspace_path(Utf8Path::new(a));
344            let encoded_b = encode_workspace_path(Utf8Path::new(b));
345            assert_ne!(
346                encoded_a, encoded_b,
347                "bijectivity violated: {a:?} and {b:?} both encode to {encoded_a:?}"
348            );
349        }
350    }
351
352    // Decode should reject malformed inputs.
353    #[test]
354    fn test_decode_rejects_malformed() {
355        let malformed_inputs = [
356            "_",     // underscore at end
357            "_x",    // unknown escape sequence
358            "foo_",  // underscore at end after content
359            "foo_x", // unknown escape in middle
360            "_S",    // uppercase S not valid
361        ];
362
363        for input in malformed_inputs {
364            assert!(
365                decode_workspace_path(input).is_none(),
366                "should reject malformed input: {input:?}"
367            );
368        }
369    }
370
371    // Valid escape sequences should decode.
372    #[test]
373    fn test_decode_valid_escapes() {
374        let cases = [
375            ("__", "_"),
376            ("_s", "/"),
377            ("_b", "\\"),
378            ("_c", ":"),
379            ("a__b", "a_b"),
380            ("_shome", "/home"),
381            // Windows-invalid character escapes.
382            ("_a", "*"),
383            ("_q", "\""),
384            ("_l", "<"),
385            ("_g", ">"),
386            ("_p", "|"),
387            ("_m", "?"),
388            // Combined.
389            ("_spath_astar_mquery", "/path*star?query"),
390        ];
391
392        for (input, expected) in cases {
393            let decoded = decode_workspace_path(input);
394            assert_eq!(
395                decoded.as_deref(),
396                Some(Utf8Path::new(expected)),
397                "decode failed for {input:?}: expected {expected:?}, got {decoded:?}"
398            );
399        }
400    }
401
402    // Truncation tests.
403    #[test]
404    fn test_short_paths_not_truncated() {
405        // A path that encodes to exactly 96 bytes should not be truncated.
406        let short_path = "/a/b/c/d";
407        let encoded = encode_workspace_path(Utf8Path::new(short_path));
408        assert!(
409            encoded.len() <= MAX_ENCODED_LEN,
410            "short path should not be truncated: {encoded:?} (len={})",
411            encoded.len()
412        );
413        // Should not contain a hash suffix (no truncation occurred).
414        assert_eq!(encoded, "_sa_sb_sc_sd");
415    }
416
417    #[test]
418    fn test_long_paths_truncated_with_hash() {
419        // Create a path that will definitely exceed 96 bytes when encoded.
420        // Each `/x` becomes `_sx` (3 bytes), so we need > 32 components.
421        let long_path = "/a".repeat(50); // 100 bytes raw, 150 bytes encoded
422        let encoded = encode_workspace_path(Utf8Path::new(&long_path));
423
424        assert_eq!(
425            encoded.len(),
426            MAX_ENCODED_LEN,
427            "truncated path should be exactly {MAX_ENCODED_LEN} bytes: {encoded:?} (len={})",
428            encoded.len()
429        );
430
431        // Should end with an 8-character hex hash.
432        let hash_suffix = &encoded[encoded.len() - HASH_SUFFIX_LEN..];
433        assert!(
434            hash_suffix.chars().all(|c| c.is_ascii_hexdigit()),
435            "hash suffix should be hex digits: {hash_suffix:?}"
436        );
437    }
438
439    #[test]
440    fn test_truncation_preserves_uniqueness() {
441        // Two different long paths should produce different truncated results.
442        let path_a = "/a".repeat(50);
443        let path_b = "/b".repeat(50);
444
445        let encoded_a = encode_workspace_path(Utf8Path::new(&path_a));
446        let encoded_b = encode_workspace_path(Utf8Path::new(&path_b));
447
448        assert_ne!(
449            encoded_a, encoded_b,
450            "different paths should produce different encodings even when truncated"
451        );
452    }
453
454    #[test]
455    fn test_truncation_with_unicode() {
456        // Create a path with multi-byte UTF-8 characters that would be split.
457        // '日' is 3 bytes in UTF-8.
458        let unicode_path = "/日本語".repeat(20); // Each repeat is 10 bytes raw.
459        let encoded = encode_workspace_path(Utf8Path::new(&unicode_path));
460
461        assert!(
462            encoded.len() <= MAX_ENCODED_LEN,
463            "encoded path should not exceed {MAX_ENCODED_LEN} bytes: len={}",
464            encoded.len()
465        );
466
467        // Verify the result is valid UTF-8 (this would panic if not).
468        let _ = encoded.as_str();
469
470        // Verify the hash suffix is present and valid hex.
471        let hash_suffix = &encoded[encoded.len() - HASH_SUFFIX_LEN..];
472        assert!(
473            hash_suffix.chars().all(|c| c.is_ascii_hexdigit()),
474            "hash suffix should be hex digits: {hash_suffix:?}"
475        );
476    }
477
478    #[test]
479    fn test_truncation_boundary_at_96_bytes() {
480        // Create paths of varying lengths around the 96-byte boundary.
481        // The encoding doubles some characters, so we need to be careful.
482
483        // A path that encodes to exactly 96 bytes should not be truncated.
484        // 'a' stays as 'a', so we can use a string of 96 'a's.
485        let exactly_96 = "a".repeat(96);
486        let encoded = encode_workspace_path(Utf8Path::new(&exactly_96));
487        assert_eq!(encoded.len(), 96);
488        assert_eq!(encoded, exactly_96); // No hash suffix.
489
490        // A path that encodes to 97 bytes should be truncated.
491        let just_over = "a".repeat(97);
492        let encoded = encode_workspace_path(Utf8Path::new(&just_over));
493        assert_eq!(encoded.len(), 96);
494        // Should have hash suffix.
495        let hash_suffix = &encoded[90..];
496        assert!(hash_suffix.chars().all(|c| c.is_ascii_hexdigit()));
497    }
498
499    #[test]
500    fn test_truncation_different_suffixes_same_prefix() {
501        // Two paths with the same prefix but different endings should get different hashes.
502        let base = "a".repeat(90);
503        let path_a = format!("{base}XXXXXXX");
504        let path_b = format!("{base}YYYYYYY");
505
506        let encoded_a = encode_workspace_path(Utf8Path::new(&path_a));
507        let encoded_b = encode_workspace_path(Utf8Path::new(&path_b));
508
509        // Both should be truncated (97 chars each).
510        assert_eq!(encoded_a.len(), 96);
511        assert_eq!(encoded_b.len(), 96);
512
513        // The hash suffixes should be different.
514        assert_ne!(
515            &encoded_a[90..],
516            &encoded_b[90..],
517            "different paths should have different hash suffixes"
518        );
519    }
520}