nextest_runner/record/
state_dir.rs

1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Platform-specific state directory discovery for nextest records.
5//!
6//! Test run recordings are stored in `XDG_STATE_HOME` (on Linux/macOS) rather
7//! than `XDG_CACHE_HOME` because they are accumulated state, not regenerable
8//! cache data. The XDG spec defines cache as "non-essential data files [that]
9//! the application must be able to regenerate," but recordings capture a
10//! specific execution at a specific point in time and cannot be regenerated.
11
12use crate::errors::StateDirError;
13use camino::{Utf8Path, Utf8PathBuf};
14use etcetera::{BaseStrategy, choose_base_strategy};
15use xxhash_rust::xxh3::xxh3_64;
16
17/// Maximum length of the encoded workspace path in bytes.
18const MAX_ENCODED_LEN: usize = 96;
19
20/// Length of the hash suffix appended to truncated paths.
21///
22/// Between the first many bytes and this, we should ideally have more than
23/// enough entropy to disambiguate repos.
24const HASH_SUFFIX_LEN: usize = 8;
25
26/// Environment variable to override the nextest state directory.
27///
28/// When set, this overrides the platform-specific state directory. The records
29/// directory will be `$NEXTEST_STATE_DIR/projects/<encoded-workspace>/records/`.
30pub const NEXTEST_STATE_DIR_ENV: &str = "NEXTEST_STATE_DIR";
31
32/// Returns the platform-specific state directory for nextest records for a workspace.
33///
34/// If the `NEXTEST_STATE_DIR` environment variable is set, uses that as the base
35/// directory. Otherwise, uses the platform-specific default:
36///
37/// - Linux, macOS, and other Unix: `$XDG_STATE_HOME/nextest/projects/<encoded-workspace>/records/`
38///   or `~/.local/state/nextest/projects/<encoded-workspace>/records/`
39/// - Windows: `%LOCALAPPDATA%\nextest\projects\<encoded-workspace>\records\`
40///   (Windows has no state directory concept, so falls back to cache directory.)
41///
42/// The workspace root is canonicalized (symlinks resolved) before being encoded
43/// using `encode_workspace_path` to produce a directory-safe, bijective
44/// representation. This ensures that accessing a workspace via a symlink
45/// produces the same state directory as accessing it via the real path.
46///
47/// Returns an error if:
48///
49/// - The platform state directory cannot be determined.
50/// - The workspace path cannot be canonicalized (e.g., doesn't exist).
51/// - Any path is not valid UTF-8.
52pub fn records_state_dir(workspace_root: &Utf8Path) -> Result<Utf8PathBuf, StateDirError> {
53    // If NEXTEST_STATE_DIR is set, use it directly.
54    if let Ok(state_dir) = std::env::var(NEXTEST_STATE_DIR_ENV) {
55        let base_dir = Utf8PathBuf::from(state_dir);
56        let canonical_workspace =
57            workspace_root
58                .canonicalize_utf8()
59                .map_err(|error| StateDirError::Canonicalize {
60                    workspace_root: workspace_root.to_owned(),
61                    error,
62                })?;
63        let encoded_workspace = encode_workspace_path(&canonical_workspace);
64        return Ok(base_dir
65            .join("projects")
66            .join(&encoded_workspace)
67            .join("records"));
68    }
69
70    let strategy = choose_base_strategy().map_err(StateDirError::BaseDirStrategy)?;
71
72    // Canonicalize the workspace root to resolve symlinks. This ensures that
73    // accessing a workspace via a symlink produces the same state directory.
74    let canonical_workspace =
75        workspace_root
76            .canonicalize_utf8()
77            .map_err(|error| StateDirError::Canonicalize {
78                workspace_root: workspace_root.to_owned(),
79                error,
80            })?;
81    let encoded_workspace = encode_workspace_path(&canonical_workspace);
82
83    // Compute the state directory path. Use state_dir() if available, otherwise
84    // fall back to cache_dir() (Windows has no state directory concept).
85    let nextest_dir = if let Some(base_state_dir) = strategy.state_dir() {
86        base_state_dir.join("nextest")
87    } else {
88        strategy.cache_dir().join("nextest")
89    };
90
91    let nextest_dir_utf8 = Utf8PathBuf::from_path_buf(nextest_dir.clone())
92        .map_err(|_| StateDirError::StateDirNotUtf8 { path: nextest_dir })?;
93
94    Ok(nextest_dir_utf8
95        .join("projects")
96        .join(&encoded_workspace)
97        .join("records"))
98}
99
100/// Encodes a workspace path into a directory-safe string.
101///
102/// The encoding is bijective (reversible) and produces valid directory names on all
103/// platforms. The encoding scheme uses underscore as an escape character:
104///
105/// - `_` → `__` (escape underscore first)
106/// - `/` → `_s` (Unix path separator)
107/// - `\` → `_b` (Windows path separator)
108/// - `:` → `_c` (Windows drive letter separator)
109/// - `*` → `_a` (asterisk, invalid on Windows)
110/// - `"` → `_q` (double quote, invalid on Windows)
111/// - `<` → `_l` (less than, invalid on Windows)
112/// - `>` → `_g` (greater than, invalid on Windows)
113/// - `|` → `_p` (pipe, invalid on Windows)
114/// - `?` → `_m` (question mark, invalid on Windows)
115///
116/// If the encoded path exceeds 96 bytes, it is truncated at a valid UTF-8 boundary
117/// and an 8-character hash suffix is appended to maintain uniqueness.
118///
119/// # Examples
120///
121/// - `/home/rain/dev/nextest` → `_shome_srain_sdev_snextest`
122/// - `C:\Users\rain\dev` → `C_c_bUsers_brain_bdev`
123/// - `/path_with_underscore` → `_spath__with__underscore`
124/// - `/weird*path?` → `_sweird_apath_m`
125pub fn encode_workspace_path(path: &Utf8Path) -> String {
126    let mut encoded = String::with_capacity(path.as_str().len() * 2);
127
128    for ch in path.as_str().chars() {
129        match ch {
130            '_' => encoded.push_str("__"),
131            '/' => encoded.push_str("_s"),
132            '\\' => encoded.push_str("_b"),
133            ':' => encoded.push_str("_c"),
134            '*' => encoded.push_str("_a"),
135            '"' => encoded.push_str("_q"),
136            '<' => encoded.push_str("_l"),
137            '>' => encoded.push_str("_g"),
138            '|' => encoded.push_str("_p"),
139            '?' => encoded.push_str("_m"),
140            _ => encoded.push(ch),
141        }
142    }
143
144    truncate_with_hash(encoded)
145}
146
147/// Truncates an encoded string to fit within [`MAX_ENCODED_LEN`] bytes.
148///
149/// If the string is already short enough, returns it unchanged. Otherwise,
150/// truncates at a valid UTF-8 boundary and appends an 8-character hash suffix
151/// derived from the full string.
152fn truncate_with_hash(encoded: String) -> String {
153    if encoded.len() <= MAX_ENCODED_LEN {
154        return encoded;
155    }
156
157    // Compute hash of full string before truncation.
158    let hash = xxh3_64(encoded.as_bytes());
159    let hash_suffix = format!("{:08x}", hash & 0xFFFFFFFF);
160
161    // Find the longest valid UTF-8 prefix that fits.
162    let max_prefix_len = MAX_ENCODED_LEN - HASH_SUFFIX_LEN;
163    let bytes = encoded.as_bytes();
164    let truncated_bytes = &bytes[..max_prefix_len.min(bytes.len())];
165
166    // Use utf8_chunks to find the valid UTF-8 portion.
167    let mut valid_len = 0;
168    for chunk in truncated_bytes.utf8_chunks() {
169        valid_len += chunk.valid().len();
170        // Stop at first invalid sequence (which would be an incomplete multi-byte char).
171        if !chunk.invalid().is_empty() {
172            break;
173        }
174    }
175
176    let mut result = encoded[..valid_len].to_string();
177    result.push_str(&hash_suffix);
178    result
179}
180
181/// Decodes a workspace path that was encoded with [`encode_workspace_path`].
182///
183/// Returns `None` if the encoded string is malformed (contains an invalid escape
184/// sequence like `_x` where `x` is not a recognized escape character).
185#[cfg_attr(not(test), expect(dead_code))] // Will be used in replay phase.
186pub fn decode_workspace_path(encoded: &str) -> Option<Utf8PathBuf> {
187    let mut decoded = String::with_capacity(encoded.len());
188    let mut chars = encoded.chars().peekable();
189
190    while let Some(ch) = chars.next() {
191        if ch == '_' {
192            match chars.next() {
193                Some('_') => decoded.push('_'),
194                Some('s') => decoded.push('/'),
195                Some('b') => decoded.push('\\'),
196                Some('c') => decoded.push(':'),
197                Some('a') => decoded.push('*'),
198                Some('q') => decoded.push('"'),
199                Some('l') => decoded.push('<'),
200                Some('g') => decoded.push('>'),
201                Some('p') => decoded.push('|'),
202                Some('m') => decoded.push('?'),
203                // Malformed: `_` at end of string or followed by unknown char.
204                _ => return None,
205            }
206        } else {
207            decoded.push(ch);
208        }
209    }
210
211    Some(Utf8PathBuf::from(decoded))
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use std::fs;
218
219    #[test]
220    fn test_records_state_dir() {
221        // Use a real existing path (the temp dir always exists).
222        let temp_dir =
223            Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir should be valid UTF-8");
224        let state_dir = records_state_dir(&temp_dir).expect("state directory should be available");
225
226        assert!(
227            state_dir.as_str().contains("nextest"),
228            "state dir should contain 'nextest': {state_dir}"
229        );
230        assert!(
231            state_dir.as_str().contains("projects"),
232            "state dir should contain 'projects': {state_dir}"
233        );
234        assert!(
235            state_dir.as_str().contains("records"),
236            "state dir should contain 'records': {state_dir}"
237        );
238    }
239
240    #[test]
241    fn test_records_state_dir_canonicalizes_symlinks() {
242        // Create a temp directory and a symlink pointing to it.
243        let temp_dir = camino_tempfile::tempdir().expect("tempdir should be created");
244        let real_path = temp_dir.path().to_path_buf();
245
246        // Create a subdirectory to serve as the "workspace".
247        let workspace = real_path.join("workspace");
248        fs::create_dir(&workspace).expect("workspace dir should be created");
249
250        // Create a symlink pointing to the workspace.
251        let symlink_path = real_path.join("symlink-to-workspace");
252
253        #[cfg(unix)]
254        std::os::unix::fs::symlink(&workspace, &symlink_path)
255            .expect("symlink should be created on Unix");
256
257        #[cfg(windows)]
258        std::os::windows::fs::symlink_dir(&workspace, &symlink_path)
259            .expect("symlink should be created on Windows");
260
261        // Get state dir via the real path.
262        let state_via_real =
263            records_state_dir(&workspace).expect("state dir via real path should be available");
264
265        // Get state dir via the symlink.
266        let state_via_symlink =
267            records_state_dir(&symlink_path).expect("state dir via symlink should be available");
268
269        // They should be the same because canonicalization resolves the symlink.
270        assert_eq!(
271            state_via_real, state_via_symlink,
272            "state dir should be the same whether accessed via real path or symlink"
273        );
274    }
275
276    // Basic encoding tests.
277    #[test]
278    fn test_encode_workspace_path() {
279        let cases = [
280            ("", ""),
281            ("simple", "simple"),
282            ("/home/user", "_shome_suser"),
283            ("/home/user/project", "_shome_suser_sproject"),
284            ("C:\\Users\\name", "C_c_bUsers_bname"),
285            ("D:\\dev\\project", "D_c_bdev_bproject"),
286            ("/path_with_underscore", "_spath__with__underscore"),
287            ("C:\\path_name", "C_c_bpath__name"),
288            ("/a/b/c", "_sa_sb_sc"),
289            // Windows-invalid characters.
290            ("/weird*path", "_sweird_apath"),
291            ("/path?query", "_spath_mquery"),
292            ("/file<name>", "_sfile_lname_g"),
293            ("/path|pipe", "_spath_ppipe"),
294            ("/\"quoted\"", "_s_qquoted_q"),
295            // All Windows-invalid characters combined.
296            ("*\"<>|?", "_a_q_l_g_p_m"),
297        ];
298
299        for (input, expected) in cases {
300            let encoded = encode_workspace_path(Utf8Path::new(input));
301            assert_eq!(
302                encoded, expected,
303                "encoding failed for {input:?}: expected {expected:?}, got {encoded:?}"
304            );
305        }
306    }
307
308    // Roundtrip tests: encode then decode should return original.
309    #[test]
310    fn test_encode_decode_roundtrip() {
311        let cases = [
312            "/home/user/project",
313            "C:\\Users\\name\\dev",
314            "/path_with_underscore",
315            "/_",
316            "_/",
317            "__",
318            "/a_b/c_d",
319            "",
320            "no_special_chars",
321            "/mixed\\path:style",
322            // Windows-invalid characters (valid on Unix).
323            "/path*with*asterisks",
324            "/file?query",
325            "/path<with>angles",
326            "/pipe|char",
327            "/\"quoted\"",
328            // All special chars in one path.
329            "/all*special?chars<in>one|path\"here\"_end",
330        ];
331
332        for original in cases {
333            let encoded = encode_workspace_path(Utf8Path::new(original));
334            let decoded = decode_workspace_path(&encoded);
335            assert_eq!(
336                decoded.as_deref(),
337                Some(Utf8Path::new(original)),
338                "roundtrip failed for {original:?}: encoded={encoded:?}, decoded={decoded:?}"
339            );
340        }
341    }
342
343    // Bijectivity tests: different inputs must produce different outputs.
344    #[test]
345    fn test_encoding_is_bijective() {
346        // These pairs were problematic with the simple dash-based encoding.
347        let pairs = [
348            ("/-", "-/"),
349            ("/a", "_a"),
350            ("_s", "/"),
351            ("a_", "a/"),
352            ("__", "_"),
353            ("/", "\\"),
354            // New escape sequences for Windows-invalid characters.
355            ("_a", "*"),
356            ("_q", "\""),
357            ("_l", "<"),
358            ("_g", ">"),
359            ("_p", "|"),
360            ("_m", "?"),
361            // Ensure Windows-invalid chars don't collide with each other.
362            ("*", "?"),
363            ("<", ">"),
364            ("|", "\""),
365        ];
366
367        for (a, b) in pairs {
368            let encoded_a = encode_workspace_path(Utf8Path::new(a));
369            let encoded_b = encode_workspace_path(Utf8Path::new(b));
370            assert_ne!(
371                encoded_a, encoded_b,
372                "bijectivity violated: {a:?} and {b:?} both encode to {encoded_a:?}"
373            );
374        }
375    }
376
377    // Decode should reject malformed inputs.
378    #[test]
379    fn test_decode_rejects_malformed() {
380        let malformed_inputs = [
381            "_",     // underscore at end
382            "_x",    // unknown escape sequence
383            "foo_",  // underscore at end after content
384            "foo_x", // unknown escape in middle
385            "_S",    // uppercase S not valid
386        ];
387
388        for input in malformed_inputs {
389            assert!(
390                decode_workspace_path(input).is_none(),
391                "should reject malformed input: {input:?}"
392            );
393        }
394    }
395
396    // Valid escape sequences should decode.
397    #[test]
398    fn test_decode_valid_escapes() {
399        let cases = [
400            ("__", "_"),
401            ("_s", "/"),
402            ("_b", "\\"),
403            ("_c", ":"),
404            ("a__b", "a_b"),
405            ("_shome", "/home"),
406            // Windows-invalid character escapes.
407            ("_a", "*"),
408            ("_q", "\""),
409            ("_l", "<"),
410            ("_g", ">"),
411            ("_p", "|"),
412            ("_m", "?"),
413            // Combined.
414            ("_spath_astar_mquery", "/path*star?query"),
415        ];
416
417        for (input, expected) in cases {
418            let decoded = decode_workspace_path(input);
419            assert_eq!(
420                decoded.as_deref(),
421                Some(Utf8Path::new(expected)),
422                "decode failed for {input:?}: expected {expected:?}, got {decoded:?}"
423            );
424        }
425    }
426
427    // Truncation tests.
428    #[test]
429    fn test_short_paths_not_truncated() {
430        // A path that encodes to exactly 96 bytes should not be truncated.
431        let short_path = "/a/b/c/d";
432        let encoded = encode_workspace_path(Utf8Path::new(short_path));
433        assert!(
434            encoded.len() <= MAX_ENCODED_LEN,
435            "short path should not be truncated: {encoded:?} (len={})",
436            encoded.len()
437        );
438        // Should not contain a hash suffix (no truncation occurred).
439        assert_eq!(encoded, "_sa_sb_sc_sd");
440    }
441
442    #[test]
443    fn test_long_paths_truncated_with_hash() {
444        // Create a path that will definitely exceed 96 bytes when encoded.
445        // Each `/x` becomes `_sx` (3 bytes), so we need > 32 components.
446        let long_path = "/a".repeat(50); // 100 bytes raw, 150 bytes encoded
447        let encoded = encode_workspace_path(Utf8Path::new(&long_path));
448
449        assert_eq!(
450            encoded.len(),
451            MAX_ENCODED_LEN,
452            "truncated path should be exactly {MAX_ENCODED_LEN} bytes: {encoded:?} (len={})",
453            encoded.len()
454        );
455
456        // Should end with an 8-character hex hash.
457        let hash_suffix = &encoded[encoded.len() - HASH_SUFFIX_LEN..];
458        assert!(
459            hash_suffix.chars().all(|c| c.is_ascii_hexdigit()),
460            "hash suffix should be hex digits: {hash_suffix:?}"
461        );
462    }
463
464    #[test]
465    fn test_truncation_preserves_uniqueness() {
466        // Two different long paths should produce different truncated results.
467        let path_a = "/a".repeat(50);
468        let path_b = "/b".repeat(50);
469
470        let encoded_a = encode_workspace_path(Utf8Path::new(&path_a));
471        let encoded_b = encode_workspace_path(Utf8Path::new(&path_b));
472
473        assert_ne!(
474            encoded_a, encoded_b,
475            "different paths should produce different encodings even when truncated"
476        );
477    }
478
479    #[test]
480    fn test_truncation_with_unicode() {
481        // Create a path with multi-byte UTF-8 characters that would be split.
482        // '日' is 3 bytes in UTF-8.
483        let unicode_path = "/日本語".repeat(20); // Each repeat is 10 bytes raw.
484        let encoded = encode_workspace_path(Utf8Path::new(&unicode_path));
485
486        assert!(
487            encoded.len() <= MAX_ENCODED_LEN,
488            "encoded path should not exceed {MAX_ENCODED_LEN} bytes: len={}",
489            encoded.len()
490        );
491
492        // Verify the result is valid UTF-8 (this would panic if not).
493        let _ = encoded.as_str();
494
495        // Verify the hash suffix is present and valid hex.
496        let hash_suffix = &encoded[encoded.len() - HASH_SUFFIX_LEN..];
497        assert!(
498            hash_suffix.chars().all(|c| c.is_ascii_hexdigit()),
499            "hash suffix should be hex digits: {hash_suffix:?}"
500        );
501    }
502
503    #[test]
504    fn test_truncation_boundary_at_96_bytes() {
505        // Create paths of varying lengths around the 96-byte boundary.
506        // The encoding doubles some characters, so we need to be careful.
507
508        // A path that encodes to exactly 96 bytes should not be truncated.
509        // 'a' stays as 'a', so we can use a string of 96 'a's.
510        let exactly_96 = "a".repeat(96);
511        let encoded = encode_workspace_path(Utf8Path::new(&exactly_96));
512        assert_eq!(encoded.len(), 96);
513        assert_eq!(encoded, exactly_96); // No hash suffix.
514
515        // A path that encodes to 97 bytes should be truncated.
516        let just_over = "a".repeat(97);
517        let encoded = encode_workspace_path(Utf8Path::new(&just_over));
518        assert_eq!(encoded.len(), 96);
519        // Should have hash suffix.
520        let hash_suffix = &encoded[90..];
521        assert!(hash_suffix.chars().all(|c| c.is_ascii_hexdigit()));
522    }
523
524    #[test]
525    fn test_truncation_different_suffixes_same_prefix() {
526        // Two paths with the same prefix but different endings should get different hashes.
527        let base = "a".repeat(90);
528        let path_a = format!("{base}XXXXXXX");
529        let path_b = format!("{base}YYYYYYY");
530
531        let encoded_a = encode_workspace_path(Utf8Path::new(&path_a));
532        let encoded_b = encode_workspace_path(Utf8Path::new(&path_b));
533
534        // Both should be truncated (97 chars each).
535        assert_eq!(encoded_a.len(), 96);
536        assert_eq!(encoded_b.len(), 96);
537
538        // The hash suffixes should be different.
539        assert_ne!(
540            &encoded_a[90..],
541            &encoded_b[90..],
542            "different paths should have different hash suffixes"
543        );
544    }
545}