zip/read/
stream.rs

1use super::{
2    central_header_to_zip_file_inner, make_symlink, read_zipfile_from_stream, ZipCentralEntryBlock,
3    ZipFile, ZipFileData, ZipResult,
4};
5use crate::spec::FixedSizeBlock;
6use indexmap::IndexMap;
7use std::io::{self, Read};
8use std::path::{Path, PathBuf};
9
10/// Stream decoder for zip.
11#[derive(Debug)]
12pub struct ZipStreamReader<R>(R);
13
14impl<R> ZipStreamReader<R> {
15    /// Create a new ZipStreamReader
16    pub const fn new(reader: R) -> Self {
17        Self(reader)
18    }
19}
20
21impl<R: Read> ZipStreamReader<R> {
22    fn parse_central_directory(&mut self) -> ZipResult<ZipStreamFileMetadata> {
23        // Give archive_offset and central_header_start dummy value 0, since
24        // they are not used in the output.
25        let archive_offset = 0;
26        let central_header_start = 0;
27
28        // Parse central header
29        let block = ZipCentralEntryBlock::parse(&mut self.0)?;
30        let file = central_header_to_zip_file_inner(
31            &mut self.0,
32            archive_offset,
33            central_header_start,
34            block,
35        )?;
36        Ok(ZipStreamFileMetadata(file))
37    }
38
39    /// Iterate over the stream and extract all file and their
40    /// metadata.
41    pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
42        while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
43            visitor.visit_file(&mut file)?;
44        }
45
46        while let Ok(metadata) = self.parse_central_directory() {
47            visitor.visit_additional_metadata(&metadata)?;
48        }
49
50        Ok(())
51    }
52
53    /// Extract a Zip archive into a directory, overwriting files if they
54    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
55    ///
56    /// Extraction is not atomic; If an error is encountered, some of the files
57    /// may be left on disk.
58    pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
59        use std::fs;
60        fs::create_dir_all(&directory)?;
61        let directory = directory.as_ref().canonicalize()?;
62        struct Extractor(PathBuf, IndexMap<Box<str>, ()>);
63        impl ZipStreamVisitor for Extractor {
64            fn visit_file<R: Read>(&mut self, file: &mut ZipFile<'_, R>) -> ZipResult<()> {
65                self.1.insert(file.name().into(), ());
66                let mut outpath = self.0.clone();
67                file.safe_prepare_path(&self.0, &mut outpath, None::<&(_, fn(&Path) -> bool)>)?;
68
69                if file.is_symlink() {
70                    let mut target = Vec::with_capacity(file.size() as usize);
71                    file.read_to_end(&mut target)?;
72                    make_symlink(&outpath, &target, &self.1)?;
73                    return Ok(());
74                }
75
76                if file.is_dir() {
77                    fs::create_dir_all(&outpath)?;
78                } else {
79                    let mut outfile = fs::File::create(&outpath)?;
80                    io::copy(file, &mut outfile)?;
81                }
82
83                Ok(())
84            }
85
86            #[allow(unused)]
87            fn visit_additional_metadata(
88                &mut self,
89                metadata: &ZipStreamFileMetadata,
90            ) -> ZipResult<()> {
91                #[cfg(unix)]
92                {
93                    use super::ZipError;
94                    let filepath = metadata
95                        .enclosed_name()
96                        .ok_or(crate::result::invalid!("Invalid file path"))?;
97
98                    let outpath = self.0.join(filepath);
99
100                    use std::os::unix::fs::PermissionsExt;
101                    if let Some(mode) = metadata.unix_mode() {
102                        fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
103                    }
104                }
105
106                Ok(())
107            }
108        }
109
110        self.visit(&mut Extractor(directory, IndexMap::new()))
111    }
112}
113
114/// Visitor for ZipStreamReader
115pub trait ZipStreamVisitor {
116    ///  * `file` - contains the content of the file and most of the metadata,
117    ///    except:
118    ///     - `comment`: set to an empty string
119    ///     - `data_start`: set to 0
120    ///     - `external_attributes`: `unix_mode()`: will return None
121    fn visit_file<R: Read>(&mut self, file: &mut ZipFile<'_, R>) -> ZipResult<()>;
122
123    /// This function is guaranteed to be called after all `visit_file`s.
124    ///
125    ///  * `metadata` - Provides missing metadata in `visit_file`.
126    fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
127}
128
129/// Additional metadata for the file.
130#[derive(Debug)]
131pub struct ZipStreamFileMetadata(ZipFileData);
132
133impl ZipStreamFileMetadata {
134    /// Get the name of the file
135    ///
136    /// # Warnings
137    ///
138    /// It is dangerous to use this name directly when extracting an archive.
139    /// It may contain an absolute path (`/etc/shadow`), or break out of the
140    /// current directory (`../runtime`). Carelessly writing to these paths
141    /// allows an attacker to craft a ZIP archive that will overwrite critical
142    /// files.
143    ///
144    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
145    /// as a safe path.
146    pub fn name(&self) -> &str {
147        &self.0.file_name
148    }
149
150    /// Get the name of the file, in the raw (internal) byte representation.
151    ///
152    /// The encoding of this data is currently undefined.
153    pub fn name_raw(&self) -> &[u8] {
154        &self.0.file_name_raw
155    }
156
157    /// Rewrite the path, ignoring any path components with special meaning.
158    ///
159    /// - Absolute paths are made relative
160    /// - [std::path::Component::ParentDir]s are ignored
161    /// - Truncates the filename at a NULL byte
162    ///
163    /// This is appropriate if you need to be able to extract *something* from
164    /// any archive, but will easily misrepresent trivial paths like
165    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
166    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
167    pub fn mangled_name(&self) -> PathBuf {
168        self.0.file_name_sanitized()
169    }
170
171    /// Ensure the file path is safe to use as a [`Path`].
172    ///
173    /// - It can't contain NULL bytes
174    /// - It can't resolve to a path outside the current directory
175    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
176    /// - It can't be an absolute path
177    ///
178    /// This will read well-formed ZIP files correctly, and is resistant
179    /// to path-based exploits. It is recommended over
180    /// [`ZipFile::mangled_name`].
181    pub fn enclosed_name(&self) -> Option<PathBuf> {
182        self.0.enclosed_name()
183    }
184
185    /// Returns whether the file is actually a directory
186    pub fn is_dir(&self) -> bool {
187        self.name()
188            .chars()
189            .next_back()
190            .is_some_and(|c| c == '/' || c == '\\')
191    }
192
193    /// Returns whether the file is a regular file
194    pub fn is_file(&self) -> bool {
195        !self.is_dir()
196    }
197
198    /// Get the comment of the file
199    pub fn comment(&self) -> &str {
200        &self.0.file_comment
201    }
202
203    /// Get unix mode for the file
204    pub const fn unix_mode(&self) -> Option<u32> {
205        self.0.unix_mode()
206    }
207}
208
209#[cfg(test)]
210mod test {
211    use tempfile::TempDir;
212
213    use crate::read::stream::{ZipStreamFileMetadata, ZipStreamReader, ZipStreamVisitor};
214    use crate::read::ZipFile;
215    use crate::result::ZipResult;
216    use crate::write::SimpleFileOptions;
217    use crate::ZipWriter;
218    use std::collections::BTreeSet;
219    use std::io::{Cursor, Read};
220
221    struct DummyVisitor;
222    impl ZipStreamVisitor for DummyVisitor {
223        fn visit_file<R: Read>(&mut self, _file: &mut ZipFile<'_, R>) -> ZipResult<()> {
224            Ok(())
225        }
226
227        fn visit_additional_metadata(
228            &mut self,
229            _metadata: &ZipStreamFileMetadata,
230        ) -> ZipResult<()> {
231            Ok(())
232        }
233    }
234
235    #[allow(dead_code)]
236    #[derive(Default, Debug, Eq, PartialEq)]
237    struct CounterVisitor(u64, u64);
238    impl ZipStreamVisitor for CounterVisitor {
239        fn visit_file<R: Read>(&mut self, _file: &mut ZipFile<'_, R>) -> ZipResult<()> {
240            self.0 += 1;
241            Ok(())
242        }
243
244        fn visit_additional_metadata(
245            &mut self,
246            _metadata: &ZipStreamFileMetadata,
247        ) -> ZipResult<()> {
248            self.1 += 1;
249            Ok(())
250        }
251    }
252
253    #[test]
254    fn invalid_offset() {
255        ZipStreamReader::new(Cursor::new(include_bytes!(
256            "../../tests/data/invalid_offset.zip"
257        )))
258        .visit(&mut DummyVisitor)
259        .unwrap_err();
260    }
261
262    #[test]
263    fn invalid_offset2() {
264        ZipStreamReader::new(Cursor::new(include_bytes!(
265            "../../tests/data/invalid_offset2.zip"
266        )))
267        .visit(&mut DummyVisitor)
268        .unwrap_err();
269    }
270
271    #[test]
272    fn zip_read_streaming() {
273        let reader =
274            ZipStreamReader::new(Cursor::new(include_bytes!("../../tests/data/mimetype.zip")));
275
276        #[derive(Default)]
277        struct V {
278            filenames: BTreeSet<Box<str>>,
279        }
280        impl ZipStreamVisitor for V {
281            fn visit_file<R: Read>(&mut self, file: &mut ZipFile<'_, R>) -> ZipResult<()> {
282                if file.is_file() {
283                    self.filenames.insert(file.name().into());
284                }
285
286                Ok(())
287            }
288            fn visit_additional_metadata(
289                &mut self,
290                metadata: &ZipStreamFileMetadata,
291            ) -> ZipResult<()> {
292                if metadata.is_file() {
293                    assert!(
294                        self.filenames.contains(metadata.name()),
295                        "{} is missing its file content",
296                        metadata.name()
297                    );
298                }
299
300                Ok(())
301            }
302        }
303
304        reader.visit(&mut V::default()).unwrap();
305    }
306
307    #[test]
308    fn file_and_dir_predicates() {
309        let reader = ZipStreamReader::new(Cursor::new(include_bytes!(
310            "../../tests/data/files_and_dirs.zip"
311        )));
312
313        #[derive(Default)]
314        struct V {
315            filenames: BTreeSet<Box<str>>,
316        }
317        impl ZipStreamVisitor for V {
318            fn visit_file<R: Read>(&mut self, file: &mut ZipFile<'_, R>) -> ZipResult<()> {
319                let full_name = file.enclosed_name().unwrap();
320                let file_name = full_name.file_name().unwrap().to_str().unwrap();
321                assert!(
322                    (file_name.starts_with("dir") && file.is_dir())
323                        || (file_name.starts_with("file") && file.is_file())
324                );
325
326                if file.is_file() {
327                    self.filenames.insert(file.name().into());
328                }
329
330                Ok(())
331            }
332            fn visit_additional_metadata(
333                &mut self,
334                metadata: &ZipStreamFileMetadata,
335            ) -> ZipResult<()> {
336                if metadata.is_file() {
337                    assert!(
338                        self.filenames.contains(metadata.name()),
339                        "{} is missing its file content",
340                        metadata.name()
341                    );
342                }
343
344                Ok(())
345            }
346        }
347
348        reader.visit(&mut V::default()).unwrap();
349    }
350
351    /// test case to ensure we don't preemptively over allocate based on the
352    /// declared number of files in the CDE of an invalid zip when the number of
353    /// files declared is more than the alleged offset in the CDE
354    #[test]
355    fn invalid_cde_number_of_files_allocation_smaller_offset() {
356        ZipStreamReader::new(Cursor::new(include_bytes!(
357            "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
358        )))
359        .visit(&mut DummyVisitor)
360        .unwrap_err();
361    }
362
363    /// test case to ensure we don't preemptively over allocate based on the
364    /// declared number of files in the CDE of an invalid zip when the number of
365    /// files declared is less than the alleged offset in the CDE
366    #[test]
367    fn invalid_cde_number_of_files_allocation_greater_offset() {
368        ZipStreamReader::new(Cursor::new(include_bytes!(
369            "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
370        )))
371        .visit(&mut DummyVisitor)
372        .unwrap_err();
373    }
374
375    /// Symlinks being extracted shouldn't be followed out of the destination directory.
376    #[test]
377    fn test_cannot_symlink_outside_destination() -> ZipResult<()> {
378        use std::fs::create_dir;
379
380        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
381        writer.add_symlink("symlink/", "../dest-sibling/", SimpleFileOptions::default())?;
382        writer.start_file("symlink/dest-file", SimpleFileOptions::default())?;
383        let reader = ZipStreamReader::new(writer.finish()?);
384        let dest_parent = TempDir::with_prefix("stream__cannot_symlink_outside_destination")?;
385        let dest_sibling = dest_parent.path().join("dest-sibling");
386        create_dir(&dest_sibling)?;
387        let dest = dest_parent.path().join("dest");
388        create_dir(&dest)?;
389        assert!(reader.extract(dest).is_err());
390        assert!(!dest_sibling.join("dest-file").exists());
391        Ok(())
392    }
393
394    #[test]
395    fn test_can_create_destination() -> ZipResult<()> {
396        let mut v = Vec::new();
397        v.extend_from_slice(include_bytes!("../../tests/data/mimetype.zip"));
398        let reader = ZipStreamReader::new(v.as_slice());
399        let dest = TempDir::with_prefix("stream_test_can_create_destination").unwrap();
400        reader.extract(&dest)?;
401        assert!(dest.path().join("mimetype").exists());
402        Ok(())
403    }
404}