zip/
read.rs

1//! Types for reading ZIP archives
2
3#[cfg(feature = "aes-crypto")]
4use crate::aes::{AesReader, AesReaderValid};
5use crate::compression::{CompressionMethod, Decompressor};
6use crate::cp437::FromCp437;
7use crate::crc32::Crc32Reader;
8use crate::extra_fields::{ExtendedTimestamp, ExtraField, Ntfs};
9use crate::result::{invalid, ZipError, ZipResult};
10use crate::spec::{
11    self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod, ZIP64_BYTES_THR,
12};
13use crate::types::{
14    AesMode, AesVendorVersion, DateTime, SimpleFileOptions, System, ZipCentralEntryBlock,
15    ZipFileData, ZipLocalEntryBlock,
16};
17use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
18use core::mem::{replace, size_of};
19use core::ops::{Deref, Range};
20use indexmap::IndexMap;
21use std::borrow::Cow;
22use std::ffi::OsStr;
23use std::io::{self, copy, sink, Read, Seek, SeekFrom, Write};
24use std::path::{Component, Path, PathBuf};
25use std::sync::{Arc, OnceLock};
26
27mod config;
28
29pub use config::{ArchiveOffset, Config};
30
31/// Provides high level API for reading from a stream.
32pub(crate) mod stream;
33
34pub(crate) mod magic_finder;
35
36/// Immutable metadata about a `ZipArchive`.
37#[derive(Debug)]
38pub struct ZipArchiveMetadata {
39    pub(crate) files: IndexMap<Box<str>, ZipFileData>,
40    pub(crate) offset: u64,
41    pub(crate) dir_start: u64,
42    // This isn't yet used anywhere, but it is here for use cases in the future.
43    #[allow(dead_code)]
44    pub(crate) config: Config,
45    pub(crate) comment: Box<[u8]>,
46    pub(crate) zip64_comment: Option<Box<[u8]>>,
47}
48
49pub(crate) mod zip_archive {
50    use crate::read::ZipArchiveMetadata;
51    use indexmap::IndexMap;
52    use std::sync::Arc;
53
54    #[derive(Debug)]
55    pub(crate) struct SharedBuilder {
56        pub(crate) files: Vec<super::ZipFileData>,
57        pub(super) offset: u64,
58        pub(super) dir_start: u64,
59        // This isn't yet used anywhere, but it is here for use cases in the future.
60        #[allow(dead_code)]
61        pub(super) config: super::Config,
62    }
63
64    impl SharedBuilder {
65        pub fn build(
66            self,
67            comment: Box<[u8]>,
68            zip64_comment: Option<Box<[u8]>>,
69        ) -> ZipArchiveMetadata {
70            let mut index_map = IndexMap::with_capacity(self.files.len());
71            self.files.into_iter().for_each(|file| {
72                index_map.insert(file.file_name.clone(), file);
73            });
74            ZipArchiveMetadata {
75                files: index_map,
76                offset: self.offset,
77                dir_start: self.dir_start,
78                config: self.config,
79                comment,
80                zip64_comment,
81            }
82        }
83    }
84
85    /// ZIP archive reader
86    ///
87    /// At the moment, this type is cheap to clone if this is the case for the
88    /// reader it uses. However, this is not guaranteed by this crate and it may
89    /// change in the future.
90    ///
91    /// ```no_run
92    /// use std::io::{Read, Seek};
93    /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
94    ///     use zip::HasZipMetadata;
95    ///     let mut zip = zip::ZipArchive::new(reader)?;
96    ///
97    ///     for i in 0..zip.len() {
98    ///         let mut file = zip.by_index(i)?;
99    ///         println!("Filename: {}", file.name());
100    ///         std::io::copy(&mut file, &mut std::io::stdout())?;
101    ///     }
102    ///
103    ///     Ok(())
104    /// }
105    /// ```
106    #[derive(Clone, Debug)]
107    pub struct ZipArchive<R> {
108        pub(super) reader: R,
109        pub(super) shared: Arc<ZipArchiveMetadata>,
110    }
111}
112
113#[cfg(feature = "aes-crypto")]
114use crate::aes::PWD_VERIFY_LENGTH;
115use crate::extra_fields::UnicodeExtraField;
116use crate::result::ZipError::InvalidPassword;
117use crate::spec::is_dir;
118use crate::types::ffi::{S_IFLNK, S_IFREG};
119use crate::unstable::{path_to_string, LittleEndianReadExt};
120pub use zip_archive::ZipArchive;
121
122#[allow(clippy::large_enum_variant)]
123pub(crate) enum CryptoReader<'a, R: Read> {
124    Plaintext(io::Take<&'a mut R>),
125    ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut R>>),
126    #[cfg(feature = "aes-crypto")]
127    Aes {
128        reader: AesReaderValid<io::Take<&'a mut R>>,
129        vendor_version: AesVendorVersion,
130    },
131}
132
133impl<R: Read> Read for CryptoReader<'_, R> {
134    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
135        match self {
136            CryptoReader::Plaintext(r) => r.read(buf),
137            CryptoReader::ZipCrypto(r) => r.read(buf),
138            #[cfg(feature = "aes-crypto")]
139            CryptoReader::Aes { reader: r, .. } => r.read(buf),
140        }
141    }
142
143    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
144        match self {
145            CryptoReader::Plaintext(r) => r.read_to_end(buf),
146            CryptoReader::ZipCrypto(r) => r.read_to_end(buf),
147            #[cfg(feature = "aes-crypto")]
148            CryptoReader::Aes { reader: r, .. } => r.read_to_end(buf),
149        }
150    }
151
152    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
153        match self {
154            CryptoReader::Plaintext(r) => r.read_to_string(buf),
155            CryptoReader::ZipCrypto(r) => r.read_to_string(buf),
156            #[cfg(feature = "aes-crypto")]
157            CryptoReader::Aes { reader: r, .. } => r.read_to_string(buf),
158        }
159    }
160}
161
162impl<'a, R: Read> CryptoReader<'a, R> {
163    /// Consumes this decoder, returning the underlying reader.
164    pub fn into_inner(self) -> io::Take<&'a mut R> {
165        match self {
166            CryptoReader::Plaintext(r) => r,
167            CryptoReader::ZipCrypto(r) => r.into_inner(),
168            #[cfg(feature = "aes-crypto")]
169            CryptoReader::Aes { reader: r, .. } => r.into_inner(),
170        }
171    }
172
173    /// Returns `true` if the data is encrypted using AE2.
174    pub const fn is_ae2_encrypted(&self) -> bool {
175        #[cfg(feature = "aes-crypto")]
176        return matches!(
177            self,
178            CryptoReader::Aes {
179                vendor_version: AesVendorVersion::Ae2,
180                ..
181            }
182        );
183        #[cfg(not(feature = "aes-crypto"))]
184        false
185    }
186}
187
188#[cold]
189fn invalid_state<T>() -> io::Result<T> {
190    Err(io::Error::other("ZipFileReader was in an invalid state"))
191}
192
193pub(crate) enum ZipFileReader<'a, R: Read> {
194    NoReader,
195    Raw(io::Take<&'a mut R>),
196    Compressed(Box<Crc32Reader<Decompressor<io::BufReader<CryptoReader<'a, R>>>>>),
197}
198
199impl<R: Read> Read for ZipFileReader<'_, R> {
200    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
201        match self {
202            ZipFileReader::NoReader => invalid_state(),
203            ZipFileReader::Raw(r) => r.read(buf),
204            ZipFileReader::Compressed(r) => r.read(buf),
205        }
206    }
207
208    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
209        match self {
210            ZipFileReader::NoReader => invalid_state(),
211            ZipFileReader::Raw(r) => r.read_exact(buf),
212            ZipFileReader::Compressed(r) => r.read_exact(buf),
213        }
214    }
215
216    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
217        match self {
218            ZipFileReader::NoReader => invalid_state(),
219            ZipFileReader::Raw(r) => r.read_to_end(buf),
220            ZipFileReader::Compressed(r) => r.read_to_end(buf),
221        }
222    }
223
224    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
225        match self {
226            ZipFileReader::NoReader => invalid_state(),
227            ZipFileReader::Raw(r) => r.read_to_string(buf),
228            ZipFileReader::Compressed(r) => r.read_to_string(buf),
229        }
230    }
231}
232
233impl<'a, R: Read> ZipFileReader<'a, R> {
234    fn into_inner(self) -> io::Result<io::Take<&'a mut R>> {
235        match self {
236            ZipFileReader::NoReader => invalid_state(),
237            ZipFileReader::Raw(r) => Ok(r),
238            ZipFileReader::Compressed(r) => {
239                Ok(r.into_inner().into_inner()?.into_inner().into_inner())
240            }
241        }
242    }
243}
244
245/// A struct for reading a zip file
246pub struct ZipFile<'a, R: Read> {
247    pub(crate) data: Cow<'a, ZipFileData>,
248    pub(crate) reader: ZipFileReader<'a, R>,
249}
250
251/// A struct for reading and seeking a zip file
252pub struct ZipFileSeek<'a, R> {
253    data: Cow<'a, ZipFileData>,
254    reader: ZipFileSeekReader<'a, R>,
255}
256
257enum ZipFileSeekReader<'a, R> {
258    Raw(SeekableTake<'a, R>),
259}
260
261struct SeekableTake<'a, R> {
262    inner: &'a mut R,
263    inner_starting_offset: u64,
264    length: u64,
265    current_offset: u64,
266}
267
268impl<'a, R: Seek> SeekableTake<'a, R> {
269    pub fn new(inner: &'a mut R, length: u64) -> io::Result<Self> {
270        let inner_starting_offset = inner.stream_position()?;
271        Ok(Self {
272            inner,
273            inner_starting_offset,
274            length,
275            current_offset: 0,
276        })
277    }
278}
279
280impl<R: Seek> Seek for SeekableTake<'_, R> {
281    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
282        let offset = match pos {
283            SeekFrom::Start(offset) => Some(offset),
284            SeekFrom::End(offset) => self.length.checked_add_signed(offset),
285            SeekFrom::Current(offset) => self.current_offset.checked_add_signed(offset),
286        };
287        match offset {
288            None => Err(io::Error::new(
289                io::ErrorKind::InvalidInput,
290                "invalid seek to a negative or overflowing position",
291            )),
292            Some(offset) => {
293                let clamped_offset = std::cmp::min(self.length, offset);
294                let new_inner_offset = self
295                    .inner
296                    .seek(SeekFrom::Start(self.inner_starting_offset + clamped_offset))?;
297                self.current_offset = new_inner_offset - self.inner_starting_offset;
298                Ok(self.current_offset)
299            }
300        }
301    }
302}
303
304impl<R: Read> Read for SeekableTake<'_, R> {
305    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
306        let written = self
307            .inner
308            .take(self.length - self.current_offset)
309            .read(buf)?;
310        self.current_offset += written as u64;
311        Ok(written)
312    }
313}
314
315pub(crate) fn make_writable_dir_all<T: AsRef<Path>>(outpath: T) -> Result<(), ZipError> {
316    use std::fs;
317    fs::create_dir_all(outpath.as_ref())?;
318    #[cfg(unix)]
319    {
320        // Dirs must be writable until all normal files are extracted
321        use std::os::unix::fs::PermissionsExt;
322        std::fs::set_permissions(
323            outpath.as_ref(),
324            std::fs::Permissions::from_mode(
325                0o700 | std::fs::metadata(outpath.as_ref())?.permissions().mode(),
326            ),
327        )?;
328    }
329    Ok(())
330}
331
332pub(crate) fn find_content<'a, R: Read + Seek>(
333    data: &ZipFileData,
334    reader: &'a mut R,
335) -> ZipResult<io::Take<&'a mut R>> {
336    // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result!
337    let data_start = data.data_start(reader)?;
338
339    reader.seek(SeekFrom::Start(data_start))?;
340    Ok(reader.take(data.compressed_size))
341}
342
343fn find_content_seek<'a, R: Read + Seek>(
344    data: &ZipFileData,
345    reader: &'a mut R,
346) -> ZipResult<SeekableTake<'a, R>> {
347    // Parse local header
348    let data_start = data.data_start(reader)?;
349    reader.seek(SeekFrom::Start(data_start))?;
350
351    // Explicit Ok and ? are needed to convert io::Error to ZipError
352    Ok(SeekableTake::new(reader, data.compressed_size)?)
353}
354
355pub(crate) fn find_data_start(
356    data: &ZipFileData,
357    reader: &mut (impl Read + Seek + Sized),
358) -> Result<u64, ZipError> {
359    // Go to start of data.
360    reader.seek(SeekFrom::Start(data.header_start))?;
361
362    // Parse static-sized fields and check the magic value.
363    let block = ZipLocalEntryBlock::parse(reader)?;
364
365    // Calculate the end of the local header from the fields we just parsed.
366    let variable_fields_len =
367        // Each of these fields must be converted to u64 before adding, as the result may
368        // easily overflow a u16.
369        block.file_name_length as u64 + block.extra_field_length as u64;
370    let data_start =
371        data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
372
373    // Set the value so we don't have to read it again.
374    match data.data_start.set(data_start) {
375        Ok(()) => (),
376        // If the value was already set in the meantime, ensure it matches (this is probably
377        // unnecessary).
378        Err(_) => {
379            debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
380        }
381    }
382
383    Ok(data_start)
384}
385
386#[allow(clippy::too_many_arguments)]
387pub(crate) fn make_crypto_reader<'a, R: Read>(
388    data: &ZipFileData,
389    reader: io::Take<&'a mut R>,
390    password: Option<&[u8]>,
391    aes_info: Option<(AesMode, AesVendorVersion, CompressionMethod)>,
392) -> ZipResult<CryptoReader<'a, R>> {
393    #[allow(deprecated)]
394    {
395        if let CompressionMethod::Unsupported(_) = data.compression_method {
396            return unsupported_zip_error("Compression method not supported");
397        }
398    }
399
400    let reader = match (password, aes_info) {
401        #[cfg(not(feature = "aes-crypto"))]
402        (Some(_), Some(_)) => {
403            return Err(ZipError::UnsupportedArchive(
404                "AES encrypted files cannot be decrypted without the aes-crypto feature.",
405            ))
406        }
407        #[cfg(feature = "aes-crypto")]
408        (Some(password), Some((aes_mode, vendor_version, _))) => CryptoReader::Aes {
409            reader: AesReader::new(reader, aes_mode, data.compressed_size).validate(password)?,
410            vendor_version,
411        },
412        (Some(password), None) => {
413            let validator = if data.using_data_descriptor {
414                ZipCryptoValidator::InfoZipMsdosTime(
415                    data.last_modified_time.map_or(0, |x| x.timepart()),
416                )
417            } else {
418                ZipCryptoValidator::PkzipCrc32(data.crc32)
419            };
420            CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?)
421        }
422        (None, Some(_)) => return Err(InvalidPassword),
423        (None, None) => CryptoReader::Plaintext(reader),
424    };
425    Ok(reader)
426}
427
428pub(crate) fn make_reader<R: Read>(
429    compression_method: CompressionMethod,
430    uncompressed_size: u64,
431    crc32: u32,
432    reader: CryptoReader<R>,
433    flags: u16,
434) -> ZipResult<ZipFileReader<R>> {
435    let ae2_encrypted = reader.is_ae2_encrypted();
436
437    Ok(ZipFileReader::Compressed(Box::new(Crc32Reader::new(
438        Decompressor::new(
439            io::BufReader::new(reader),
440            compression_method,
441            uncompressed_size,
442            flags,
443        )?,
444        crc32,
445        ae2_encrypted,
446    ))))
447}
448
449pub(crate) fn make_symlink<T>(
450    outpath: &Path,
451    target: &[u8],
452    #[allow(unused)] existing_files: &IndexMap<Box<str>, T>,
453) -> ZipResult<()> {
454    let Ok(target_str) = std::str::from_utf8(target) else {
455        return Err(invalid!("Invalid UTF-8 as symlink target"));
456    };
457
458    #[cfg(not(any(unix, windows)))]
459    {
460        use std::fs::File;
461        let output = File::create(outpath);
462        output?.write_all(target)?;
463    }
464    #[cfg(unix)]
465    {
466        std::os::unix::fs::symlink(Path::new(&target_str), outpath)?;
467    }
468    #[cfg(windows)]
469    {
470        let target = Path::new(OsStr::new(&target_str));
471        let target_is_dir_from_archive =
472            existing_files.contains_key(target_str) && is_dir(target_str);
473        let target_is_dir = if target_is_dir_from_archive {
474            true
475        } else if let Ok(meta) = std::fs::metadata(target) {
476            meta.is_dir()
477        } else {
478            false
479        };
480        if target_is_dir {
481            std::os::windows::fs::symlink_dir(target, outpath)?;
482        } else {
483            std::os::windows::fs::symlink_file(target, outpath)?;
484        }
485    }
486    Ok(())
487}
488
489#[derive(Debug)]
490pub(crate) struct CentralDirectoryInfo {
491    pub(crate) archive_offset: u64,
492    pub(crate) directory_start: u64,
493    pub(crate) number_of_files: usize,
494    pub(crate) disk_number: u32,
495    pub(crate) disk_with_central_directory: u32,
496}
497
498impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
499    type Error = ZipError;
500
501    fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
502        let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
503            match &value.eocd64 {
504                Some(DataAndPosition { data: eocd64, .. }) => {
505                    if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
506                        return Err(invalid!("ZIP64 footer indicates more files on this disk than in the whole archive"));
507                    }
508                    (
509                        eocd64.central_directory_offset,
510                        eocd64.number_of_files as usize,
511                        eocd64.disk_number,
512                        eocd64.disk_with_central_directory,
513                    )
514                }
515                _ => (
516                    value.eocd.data.central_directory_offset as u64,
517                    value.eocd.data.number_of_files_on_this_disk as usize,
518                    value.eocd.data.disk_number as u32,
519                    value.eocd.data.disk_with_central_directory as u32,
520                ),
521            };
522
523        let directory_start = relative_cd_offset
524            .checked_add(value.archive_offset)
525            .ok_or(invalid!("Invalid central directory size or offset"))?;
526
527        Ok(Self {
528            archive_offset: value.archive_offset,
529            directory_start,
530            number_of_files,
531            disk_number,
532            disk_with_central_directory,
533        })
534    }
535}
536
537impl<R> ZipArchive<R> {
538    pub(crate) fn from_finalized_writer(
539        files: IndexMap<Box<str>, ZipFileData>,
540        comment: Box<[u8]>,
541        zip64_comment: Option<Box<[u8]>>,
542        reader: R,
543        central_start: u64,
544    ) -> ZipResult<Self> {
545        let initial_offset = match files.first() {
546            Some((_, file)) => file.header_start,
547            None => central_start,
548        };
549        let shared = Arc::new(ZipArchiveMetadata {
550            files,
551            offset: initial_offset,
552            dir_start: central_start,
553            config: Config {
554                archive_offset: ArchiveOffset::Known(initial_offset),
555            },
556            comment,
557            zip64_comment,
558        });
559        Ok(Self { reader, shared })
560    }
561
562    /// Total size of the files in the archive, if it can be known. Doesn't include directories or
563    /// metadata.
564    pub fn decompressed_size(&self) -> Option<u128> {
565        let mut total = 0u128;
566        for file in self.shared.files.values() {
567            if file.using_data_descriptor {
568                return None;
569            }
570            total = total.checked_add(file.uncompressed_size as u128)?;
571        }
572        Some(total)
573    }
574}
575
576impl<R: Read + Seek> ZipArchive<R> {
577    pub(crate) fn merge_contents<W: Write + Seek>(
578        &mut self,
579        mut w: W,
580    ) -> ZipResult<IndexMap<Box<str>, ZipFileData>> {
581        if self.shared.files.is_empty() {
582            return Ok(IndexMap::new());
583        }
584        let mut new_files = self.shared.files.clone();
585        /* The first file header will probably start at the beginning of the file, but zip doesn't
586         * enforce that, and executable zips like PEX files will have a shebang line so will
587         * definitely be greater than 0.
588         *
589         * assert_eq!(0, new_files[0].header_start); // Avoid this.
590         */
591
592        let first_new_file_header_start = w.stream_position()?;
593
594        /* Push back file header starts for all entries in the covered files. */
595        new_files.values_mut().try_for_each(|f| {
596            /* This is probably the only really important thing to change. */
597            f.header_start = f
598                .header_start
599                .checked_add(first_new_file_header_start)
600                .ok_or(invalid!(
601                    "new header start from merge would have been too large"
602                ))?;
603            /* This is only ever used internally to cache metadata lookups (it's not part of the
604             * zip spec), and 0 is the sentinel value. */
605            f.central_header_start = 0;
606            /* This is an atomic variable so it can be updated from another thread in the
607             * implementation (which is good!). */
608            if let Some(old_data_start) = f.data_start.take() {
609                let new_data_start = old_data_start
610                    .checked_add(first_new_file_header_start)
611                    .ok_or(invalid!(
612                        "new data start from merge would have been too large"
613                    ))?;
614                f.data_start.get_or_init(|| new_data_start);
615            }
616            Ok::<_, ZipError>(())
617        })?;
618
619        /* Rewind to the beginning of the file.
620         *
621         * NB: we *could* decide to start copying from new_files[0].header_start instead, which
622         * would avoid copying over e.g. any pex shebangs or other file contents that start before
623         * the first zip file entry. However, zip files actually shouldn't care about garbage data
624         * in *between* real entries, since the central directory header records the correct start
625         * location of each, and keeping track of that math is more complicated logic that will only
626         * rarely be used, since most zips that get merged together are likely to be produced
627         * specifically for that purpose (and therefore are unlikely to have a shebang or other
628         * preface). Finally, this preserves any data that might actually be useful.
629         */
630        self.reader.rewind()?;
631        /* Find the end of the file data. */
632        let length_to_read = self.shared.dir_start;
633        /* Produce a Read that reads bytes up until the start of the central directory header.
634         * This "as &mut dyn Read" trick is used elsewhere to avoid having to clone the underlying
635         * handle, which it really shouldn't need to anyway. */
636        let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read);
637        /* Copy over file data from source archive directly. */
638        io::copy(&mut limited_raw, &mut w)?;
639
640        /* Return the files we've just written to the data stream. */
641        Ok(new_files)
642    }
643
644    /// Get the directory start offset and number of files. This is done in a
645    /// separate function to ease the control flow design.
646    pub(crate) fn get_metadata(config: Config, reader: &mut R) -> ZipResult<ZipArchiveMetadata> {
647        // End of the probed region, initially set to the end of the file
648        let file_len = reader.seek(io::SeekFrom::End(0))?;
649        let mut end_exclusive = file_len;
650        let mut last_err = None;
651
652        loop {
653            // Find the EOCD and possibly EOCD64 entries and determine the archive offset.
654            let cde = match spec::find_central_directory(
655                reader,
656                config.archive_offset,
657                end_exclusive,
658                file_len,
659            ) {
660                Ok(cde) => cde,
661                Err(e) => {
662                    // return the previous error first (if there is)
663                    return Err(last_err.unwrap_or(e));
664                }
665            };
666
667            // Turn EOCD into internal representation.
668            match CentralDirectoryInfo::try_from(&cde)
669                .and_then(|info| Self::read_central_header(info, config, reader))
670            {
671                Ok(shared) => {
672                    return Ok(shared.build(
673                        cde.eocd.data.zip_file_comment,
674                        cde.eocd64.map(|v| v.data.extensible_data_sector),
675                    ));
676                }
677                Err(e) => {
678                    last_err = Some(e);
679                }
680            };
681            // Something went wrong while decoding the cde, try to find a new one
682            end_exclusive = cde.eocd.position;
683            continue;
684        }
685    }
686
687    fn read_central_header(
688        dir_info: CentralDirectoryInfo,
689        config: Config,
690        reader: &mut R,
691    ) -> Result<zip_archive::SharedBuilder, ZipError> {
692        // If the parsed number of files is greater than the offset then
693        // something fishy is going on and we shouldn't trust number_of_files.
694        let file_capacity = if dir_info.number_of_files > dir_info.directory_start as usize {
695            0
696        } else {
697            dir_info.number_of_files
698        };
699
700        if dir_info.disk_number != dir_info.disk_with_central_directory {
701            return unsupported_zip_error("Support for multi-disk files is not implemented");
702        }
703
704        if file_capacity.saturating_mul(size_of::<ZipFileData>()) > isize::MAX as usize {
705            return unsupported_zip_error("Oversized central directory");
706        }
707
708        let mut files = Vec::with_capacity(file_capacity);
709        reader.seek(SeekFrom::Start(dir_info.directory_start))?;
710        for _ in 0..dir_info.number_of_files {
711            let file = central_header_to_zip_file(reader, &dir_info)?;
712            files.push(file);
713        }
714
715        Ok(zip_archive::SharedBuilder {
716            files,
717            offset: dir_info.archive_offset,
718            dir_start: dir_info.directory_start,
719            config,
720        })
721    }
722
723    /// Returns the verification value and salt for the AES encryption of the file
724    ///
725    /// It fails if the file number is invalid.
726    ///
727    /// # Returns
728    ///
729    /// - None if the file is not encrypted with AES
730    #[cfg(feature = "aes-crypto")]
731    pub fn get_aes_verification_key_and_salt(
732        &mut self,
733        file_number: usize,
734    ) -> ZipResult<Option<AesInfo>> {
735        let (_, data) = self
736            .shared
737            .files
738            .get_index(file_number)
739            .ok_or(ZipError::FileNotFound)?;
740
741        let limit_reader = find_content(data, &mut self.reader)?;
742        match data.aes_mode {
743            None => Ok(None),
744            Some((aes_mode, _, _)) => {
745                let (verification_value, salt) =
746                    AesReader::new(limit_reader, aes_mode, data.compressed_size)
747                        .get_verification_value_and_salt()?;
748                let aes_info = AesInfo {
749                    aes_mode,
750                    verification_value,
751                    salt,
752                };
753                Ok(Some(aes_info))
754            }
755        }
756    }
757
758    /// Read a ZIP archive, collecting the files it contains.
759    ///
760    /// This uses the central directory record of the ZIP file, and ignores local file headers.
761    ///
762    /// A default [`Config`] is used.
763    pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
764        Self::with_config(Default::default(), reader)
765    }
766
767    /// Get the metadata associated with the ZIP archive.
768    ///
769    /// This can be used with [`Self::unsafe_new_with_metadata`] to create a new reader over the
770    /// same file without needing to reparse the metadata.
771    pub fn metadata(&self) -> Arc<ZipArchiveMetadata> {
772        self.shared.clone()
773    }
774
775    /// Read a ZIP archive using the given `metadata`.
776    ///
777    /// This is useful for creating multiple readers over the same file without
778    /// needing to reparse the metadata.
779    ///
780    /// # Safety
781    /// `unsafe` is used here to indicate that `reader` and `metadata` could
782    /// potentially be incompatible, and it is left to the user to ensure they are.
783    ///
784    /// # Example
785    ///
786    /// ```no_run
787    /// # use std::fs;
788    /// use rayon::prelude::*;
789    ///
790    /// const FILE_NAME: &str = "my_data.zip";
791    ///
792    /// let file = fs::File::open(FILE_NAME).unwrap();
793    /// let mut archive = zip::ZipArchive::new(file).unwrap();
794    ///
795    /// let file_names = (0..archive.len())
796    ///     .into_par_iter()
797    ///     .map_init({
798    ///         let metadata = archive.metadata().clone();
799    ///         move || {
800    ///             let file = fs::File::open(FILE_NAME).unwrap();
801    ///             unsafe { zip::ZipArchive::unsafe_new_with_metadata(file, metadata.clone()) }
802    ///         }},
803    ///         |archive, i| {
804    ///             let mut file = archive.by_index(i).unwrap();
805    ///             file.enclosed_name()
806    ///         }
807    ///     )
808    ///     .filter_map(|name| name)
809    ///     .collect::<Vec<_>>();
810    /// ```
811    pub unsafe fn unsafe_new_with_metadata(reader: R, metadata: Arc<ZipArchiveMetadata>) -> Self {
812        Self {
813            reader,
814            shared: metadata,
815        }
816    }
817
818    /// Read a ZIP archive providing a read configuration, collecting the files it contains.
819    ///
820    /// This uses the central directory record of the ZIP file, and ignores local file headers.
821    pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
822        let shared = Self::get_metadata(config, &mut reader)?;
823
824        Ok(ZipArchive {
825            reader,
826            shared: shared.into(),
827        })
828    }
829
830    /// Extract a Zip archive into a directory, overwriting files if they
831    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. Symbolic links are only
832    /// created and followed if the target is within the destination directory (this is checked
833    /// conservatively using [`std::fs::canonicalize`]).
834    ///
835    /// Extraction is not atomic. If an error is encountered, some of the files
836    /// may be left on disk. However, on Unix targets, no newly-created directories with part but
837    /// not all of their contents extracted will be readable, writable or usable as process working
838    /// directories by any non-root user except you.
839    ///
840    /// On Unix and Windows, symbolic links are extracted correctly. On other platforms such as
841    /// WebAssembly, symbolic links aren't supported, so they're extracted as normal files
842    /// containing the target path in UTF-8.
843    pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
844        self.extract_internal(directory, None::<fn(&Path) -> bool>)
845    }
846
847    /// Extracts a Zip archive into a directory in the same fashion as
848    /// [`ZipArchive::extract`], but detects a "root" directory in the archive
849    /// (a single top-level directory that contains the rest of the archive's
850    /// entries) and extracts its contents directly.
851    ///
852    /// For a sensible default `filter`, you can use [`root_dir_common_filter`].
853    /// For a custom `filter`, see [`RootDirFilter`].
854    ///
855    /// See [`ZipArchive::root_dir`] for more information on how the root
856    /// directory is detected and the meaning of the `filter` parameter.
857    ///
858    /// ## Example
859    ///
860    /// Imagine a Zip archive with the following structure:
861    ///
862    /// ```text
863    /// root/file1.txt
864    /// root/file2.txt
865    /// root/sub/file3.txt
866    /// root/sub/subsub/file4.txt
867    /// ```
868    ///
869    /// If the archive is extracted to `foo` using [`ZipArchive::extract`],
870    /// the resulting directory structure will be:
871    ///
872    /// ```text
873    /// foo/root/file1.txt
874    /// foo/root/file2.txt
875    /// foo/root/sub/file3.txt
876    /// foo/root/sub/subsub/file4.txt
877    /// ```
878    ///
879    /// If the archive is extracted to `foo` using
880    /// [`ZipArchive::extract_unwrapped_root_dir`], the resulting directory
881    /// structure will be:
882    ///
883    /// ```text
884    /// foo/file1.txt
885    /// foo/file2.txt
886    /// foo/sub/file3.txt
887    /// foo/sub/subsub/file4.txt
888    /// ```
889    ///
890    /// ## Example - No Root Directory
891    ///
892    /// Imagine a Zip archive with the following structure:
893    ///
894    /// ```text
895    /// root/file1.txt
896    /// root/file2.txt
897    /// root/sub/file3.txt
898    /// root/sub/subsub/file4.txt
899    /// other/file5.txt
900    /// ```
901    ///
902    /// Due to the presence of the `other` directory,
903    /// [`ZipArchive::extract_unwrapped_root_dir`] will extract this in the same
904    /// fashion as [`ZipArchive::extract`] as there is now no "root directory."
905    pub fn extract_unwrapped_root_dir<P: AsRef<Path>>(
906        &mut self,
907        directory: P,
908        root_dir_filter: impl RootDirFilter,
909    ) -> ZipResult<()> {
910        self.extract_internal(directory, Some(root_dir_filter))
911    }
912
913    fn extract_internal<P: AsRef<Path>>(
914        &mut self,
915        directory: P,
916        root_dir_filter: Option<impl RootDirFilter>,
917    ) -> ZipResult<()> {
918        use std::fs;
919
920        fs::create_dir_all(&directory)?;
921        let directory = directory.as_ref().canonicalize()?;
922
923        let root_dir = root_dir_filter
924            .and_then(|filter| {
925                self.root_dir(&filter)
926                    .transpose()
927                    .map(|root_dir| root_dir.map(|root_dir| (root_dir, filter)))
928            })
929            .transpose()?;
930
931        // If we have a root dir, simplify the path components to be more
932        // appropriate for passing to `safe_prepare_path`
933        let root_dir = root_dir
934            .as_ref()
935            .map(|(root_dir, filter)| {
936                crate::path::simplified_components(root_dir)
937                    .ok_or_else(|| {
938                        // Should be unreachable
939                        debug_assert!(false, "Invalid root dir path");
940
941                        invalid!("Invalid root dir path")
942                    })
943                    .map(|root_dir| (root_dir, filter))
944            })
945            .transpose()?;
946
947        #[cfg(unix)]
948        let mut files_by_unix_mode = Vec::new();
949
950        for i in 0..self.len() {
951            let mut file = self.by_index(i)?;
952
953            let mut outpath = directory.clone();
954            file.safe_prepare_path(directory.as_ref(), &mut outpath, root_dir.as_ref())?;
955
956            let symlink_target = if file.is_symlink() && (cfg!(unix) || cfg!(windows)) {
957                let mut target = Vec::with_capacity(file.size() as usize);
958                file.read_to_end(&mut target)?;
959                Some(target)
960            } else {
961                if file.is_dir() {
962                    crate::read::make_writable_dir_all(&outpath)?;
963                    continue;
964                }
965                None
966            };
967
968            drop(file);
969
970            if let Some(target) = symlink_target {
971                make_symlink(&outpath, &target, &self.shared.files)?;
972                continue;
973            }
974            let mut file = self.by_index(i)?;
975            let mut outfile = fs::File::create(&outpath)?;
976
977            io::copy(&mut file, &mut outfile)?;
978            #[cfg(unix)]
979            {
980                // Check for real permissions, which we'll set in a second pass
981                if let Some(mode) = file.unix_mode() {
982                    files_by_unix_mode.push((outpath.clone(), mode));
983                }
984            }
985            #[cfg(feature = "chrono")]
986            {
987                // Set original timestamp.
988                if let Some(last_modified) = file.last_modified() {
989                    if let Some(t) = datetime_to_systemtime(&last_modified) {
990                        outfile.set_modified(t)?;
991                    }
992                }
993            }
994        }
995        #[cfg(unix)]
996        {
997            use std::cmp::Reverse;
998            use std::os::unix::fs::PermissionsExt;
999
1000            if files_by_unix_mode.len() > 1 {
1001                // Ensure we update children's permissions before making a parent unwritable
1002                files_by_unix_mode.sort_by_key(|(path, _)| Reverse(path.clone()));
1003            }
1004            for (path, mode) in files_by_unix_mode.into_iter() {
1005                fs::set_permissions(&path, fs::Permissions::from_mode(mode))?;
1006            }
1007        }
1008        Ok(())
1009    }
1010
1011    /// Number of files contained in this zip.
1012    pub fn len(&self) -> usize {
1013        self.shared.files.len()
1014    }
1015
1016    /// Get the starting offset of the zip central directory.
1017    pub fn central_directory_start(&self) -> u64 {
1018        self.shared.dir_start
1019    }
1020
1021    /// Whether this zip archive contains no files
1022    pub fn is_empty(&self) -> bool {
1023        self.len() == 0
1024    }
1025
1026    /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
1027    ///
1028    /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
1029    /// of that prepended data.
1030    pub fn offset(&self) -> u64 {
1031        self.shared.offset
1032    }
1033
1034    /// Get the comment of the zip archive.
1035    pub fn comment(&self) -> &[u8] {
1036        &self.shared.comment
1037    }
1038
1039    /// Get the ZIP64 comment of the zip archive, if it is ZIP64.
1040    pub fn zip64_comment(&self) -> Option<&[u8]> {
1041        self.shared.zip64_comment.as_deref()
1042    }
1043
1044    /// Returns an iterator over all the file and directory names in this archive.
1045    pub fn file_names(&self) -> impl Iterator<Item = &str> {
1046        self.shared.files.keys().map(|s| s.as_ref())
1047    }
1048
1049    /// Returns Ok(true) if any compressed data in this archive belongs to more than one file. This
1050    /// doesn't make the archive invalid, but some programs will refuse to decompress it because the
1051    /// copies would take up space independently in the destination.
1052    pub fn has_overlapping_files(&mut self) -> ZipResult<bool> {
1053        let mut ranges = Vec::<Range<u64>>::with_capacity(self.shared.files.len());
1054        for file in self.shared.files.values() {
1055            if file.compressed_size == 0 {
1056                continue;
1057            }
1058            let start = file.data_start(&mut self.reader)?;
1059            let end = start + file.compressed_size;
1060            if ranges
1061                .iter()
1062                .any(|range| range.start <= end && start <= range.end)
1063            {
1064                return Ok(true);
1065            }
1066            ranges.push(start..end);
1067        }
1068        Ok(false)
1069    }
1070
1071    /// Search for a file entry by name, decrypt with given password
1072    ///
1073    /// # Warning
1074    ///
1075    /// The implementation of the cryptographic algorithms has not
1076    /// gone through a correctness review, and you should assume it is insecure:
1077    /// passwords used with this API may be compromised.
1078    ///
1079    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1080    /// to check for a 1/256 chance that the password is correct.
1081    /// There are many passwords out there that will also pass the validity checks
1082    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1083    /// due to its fairly primitive approach to cryptography.
1084    pub fn by_name_decrypt(&mut self, name: &str, password: &[u8]) -> ZipResult<ZipFile<'_, R>> {
1085        self.by_name_with_optional_password(name, Some(password))
1086    }
1087
1088    /// Search for a file entry by name
1089    pub fn by_name(&mut self, name: &str) -> ZipResult<ZipFile<'_, R>> {
1090        self.by_name_with_optional_password(name, None)
1091    }
1092
1093    /// Get the index of a file entry by name, if it's present.
1094    #[inline(always)]
1095    pub fn index_for_name(&self, name: &str) -> Option<usize> {
1096        self.shared.files.get_index_of(name)
1097    }
1098
1099    /// Search for a file entry by path, decrypt with given password
1100    ///
1101    /// # Warning
1102    ///
1103    /// The implementation of the cryptographic algorithms has not
1104    /// gone through a correctness review, and you should assume it is insecure:
1105    /// passwords used with this API may be compromised.
1106    ///
1107    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1108    /// to check for a 1/256 chance that the password is correct.
1109    /// There are many passwords out there that will also pass the validity checks
1110    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1111    /// due to its fairly primitive approach to cryptography.
1112    pub fn by_path_decrypt<T: AsRef<Path>>(
1113        &mut self,
1114        path: T,
1115        password: &[u8],
1116    ) -> ZipResult<ZipFile<'_, R>> {
1117        self.index_for_path(path)
1118            .ok_or(ZipError::FileNotFound)
1119            .and_then(|index| {
1120                self.by_index_with_options(index, ZipReadOptions::new().password(Some(password)))
1121            })
1122    }
1123
1124    /// Search for a file entry by path
1125    pub fn by_path<T: AsRef<Path>>(&mut self, path: T) -> ZipResult<ZipFile<'_, R>> {
1126        self.index_for_path(path)
1127            .ok_or(ZipError::FileNotFound)
1128            .and_then(|index| self.by_index_with_options(index, ZipReadOptions::new()))
1129    }
1130
1131    /// Get the index of a file entry by path, if it's present.
1132    #[inline(always)]
1133    pub fn index_for_path<T: AsRef<Path>>(&self, path: T) -> Option<usize> {
1134        self.index_for_name(&path_to_string(path))
1135    }
1136
1137    /// Get the name of a file entry, if it's present.
1138    #[inline(always)]
1139    pub fn name_for_index(&self, index: usize) -> Option<&str> {
1140        self.shared
1141            .files
1142            .get_index(index)
1143            .map(|(name, _)| name.as_ref())
1144    }
1145
1146    /// Search for a file entry by name and return a seekable object.
1147    pub fn by_name_seek(&mut self, name: &str) -> ZipResult<ZipFileSeek<'_, R>> {
1148        self.by_index_seek(self.index_for_name(name).ok_or(ZipError::FileNotFound)?)
1149    }
1150
1151    /// Search for a file entry by index and return a seekable object.
1152    pub fn by_index_seek(&mut self, index: usize) -> ZipResult<ZipFileSeek<'_, R>> {
1153        let reader = &mut self.reader;
1154        self.shared
1155            .files
1156            .get_index(index)
1157            .ok_or(ZipError::FileNotFound)
1158            .and_then(move |(_, data)| {
1159                let seek_reader = match data.compression_method {
1160                    CompressionMethod::Stored => {
1161                        ZipFileSeekReader::Raw(find_content_seek(data, reader)?)
1162                    }
1163                    _ => {
1164                        return Err(ZipError::UnsupportedArchive(
1165                            "Seekable compressed files are not yet supported",
1166                        ))
1167                    }
1168                };
1169                Ok(ZipFileSeek {
1170                    reader: seek_reader,
1171                    data: Cow::Borrowed(data),
1172                })
1173            })
1174    }
1175
1176    fn by_name_with_optional_password<'a>(
1177        &'a mut self,
1178        name: &str,
1179        password: Option<&[u8]>,
1180    ) -> ZipResult<ZipFile<'a, R>> {
1181        let Some(index) = self.shared.files.get_index_of(name) else {
1182            return Err(ZipError::FileNotFound);
1183        };
1184        self.by_index_with_options(index, ZipReadOptions::new().password(password))
1185    }
1186
1187    /// Get a contained file by index, decrypt with given password
1188    ///
1189    /// # Warning
1190    ///
1191    /// The implementation of the cryptographic algorithms has not
1192    /// gone through a correctness review, and you should assume it is insecure:
1193    /// passwords used with this API may be compromised.
1194    ///
1195    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1196    /// to check for a 1/256 chance that the password is correct.
1197    /// There are many passwords out there that will also pass the validity checks
1198    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1199    /// due to its fairly primitive approach to cryptography.
1200    pub fn by_index_decrypt(
1201        &mut self,
1202        file_number: usize,
1203        password: &[u8],
1204    ) -> ZipResult<ZipFile<'_, R>> {
1205        self.by_index_with_options(file_number, ZipReadOptions::new().password(Some(password)))
1206    }
1207
1208    /// Get a contained file by index
1209    pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_, R>> {
1210        self.by_index_with_options(file_number, ZipReadOptions::new())
1211    }
1212
1213    /// Get a contained file by index without decompressing it
1214    pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_, R>> {
1215        let reader = &mut self.reader;
1216        let (_, data) = self
1217            .shared
1218            .files
1219            .get_index(file_number)
1220            .ok_or(ZipError::FileNotFound)?;
1221        Ok(ZipFile {
1222            reader: ZipFileReader::Raw(find_content(data, reader)?),
1223            data: Cow::Borrowed(data),
1224        })
1225    }
1226
1227    /// Get a contained file by index with options.
1228    pub fn by_index_with_options(
1229        &mut self,
1230        file_number: usize,
1231        mut options: ZipReadOptions<'_>,
1232    ) -> ZipResult<ZipFile<'_, R>> {
1233        let (_, data) = self
1234            .shared
1235            .files
1236            .get_index(file_number)
1237            .ok_or(ZipError::FileNotFound)?;
1238
1239        if options.ignore_encryption_flag {
1240            // Always use no password when we're ignoring the encryption flag.
1241            options.password = None;
1242        } else {
1243            // Require and use the password only if the file is encrypted.
1244            match (options.password, data.encrypted) {
1245                (None, true) => {
1246                    return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED))
1247                }
1248                // Password supplied, but none needed! Discard.
1249                (Some(_), false) => options.password = None,
1250                _ => {}
1251            }
1252        }
1253        let limit_reader = find_content(data, &mut self.reader)?;
1254
1255        let crypto_reader =
1256            make_crypto_reader(data, limit_reader, options.password, data.aes_mode)?;
1257
1258        Ok(ZipFile {
1259            data: Cow::Borrowed(data),
1260            reader: make_reader(
1261                data.compression_method,
1262                data.uncompressed_size,
1263                data.crc32,
1264                crypto_reader,
1265                data.flags,
1266            )?,
1267        })
1268    }
1269
1270    /// Find the "root directory" of an archive if it exists, filtering out
1271    /// irrelevant entries when searching.
1272    ///
1273    /// Our definition of a "root directory" is a single top-level directory
1274    /// that contains the rest of the archive's entries. This is useful for
1275    /// extracting archives that contain a single top-level directory that
1276    /// you want to "unwrap" and extract directly.
1277    ///
1278    /// For a sensible default filter, you can use [`root_dir_common_filter`].
1279    /// For a custom filter, see [`RootDirFilter`].
1280    pub fn root_dir(&self, filter: impl RootDirFilter) -> ZipResult<Option<PathBuf>> {
1281        let mut root_dir: Option<PathBuf> = None;
1282
1283        for i in 0..self.len() {
1284            let (_, file) = self
1285                .shared
1286                .files
1287                .get_index(i)
1288                .ok_or(ZipError::FileNotFound)?;
1289
1290            let path = match file.enclosed_name() {
1291                Some(path) => path,
1292                None => return Ok(None),
1293            };
1294
1295            if !filter(&path) {
1296                continue;
1297            }
1298
1299            macro_rules! replace_root_dir {
1300                ($path:ident) => {
1301                    match &mut root_dir {
1302                        Some(root_dir) => {
1303                            if *root_dir != $path {
1304                                // We've found multiple root directories,
1305                                // abort.
1306                                return Ok(None);
1307                            } else {
1308                                continue;
1309                            }
1310                        }
1311
1312                        None => {
1313                            root_dir = Some($path.into());
1314                            continue;
1315                        }
1316                    }
1317                };
1318            }
1319
1320            // If this entry is located at the root of the archive...
1321            if path.components().count() == 1 {
1322                if file.is_dir() {
1323                    // If it's a directory, it could be the root directory.
1324                    replace_root_dir!(path);
1325                } else {
1326                    // If it's anything else, this archive does not have a
1327                    // root directory.
1328                    return Ok(None);
1329                }
1330            }
1331
1332            // Find the root directory for this entry.
1333            let mut path = path.as_path();
1334            while let Some(parent) = path.parent().filter(|path| *path != Path::new("")) {
1335                path = parent;
1336            }
1337
1338            replace_root_dir!(path);
1339        }
1340
1341        Ok(root_dir)
1342    }
1343
1344    /// Unwrap and return the inner reader object
1345    ///
1346    /// The position of the reader is undefined.
1347    pub fn into_inner(self) -> R {
1348        self.reader
1349    }
1350}
1351
1352/// Holds the AES information of a file in the zip archive
1353#[derive(Debug)]
1354#[cfg(feature = "aes-crypto")]
1355pub struct AesInfo {
1356    /// The AES encryption mode
1357    pub aes_mode: AesMode,
1358    /// The verification key
1359    pub verification_value: [u8; PWD_VERIFY_LENGTH],
1360    /// The salt
1361    pub salt: Vec<u8>,
1362}
1363
1364const fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
1365    Err(ZipError::UnsupportedArchive(detail))
1366}
1367
1368/// Parse a central directory entry to collect the information for the file.
1369pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
1370    reader: &mut R,
1371    central_directory: &CentralDirectoryInfo,
1372) -> ZipResult<ZipFileData> {
1373    let central_header_start = reader.stream_position()?;
1374
1375    // Parse central header
1376    let block = ZipCentralEntryBlock::parse(reader)?;
1377
1378    let file = central_header_to_zip_file_inner(
1379        reader,
1380        central_directory.archive_offset,
1381        central_header_start,
1382        block,
1383    )?;
1384
1385    let central_header_end = reader.stream_position()?;
1386
1387    reader.seek(SeekFrom::Start(central_header_end))?;
1388    Ok(file)
1389}
1390
1391#[inline]
1392fn read_variable_length_byte_field<R: Read>(reader: &mut R, len: usize) -> io::Result<Box<[u8]>> {
1393    let mut data = vec![0; len].into_boxed_slice();
1394    reader.read_exact(&mut data)?;
1395    Ok(data)
1396}
1397
1398/// Parse a central directory entry to collect the information for the file.
1399fn central_header_to_zip_file_inner<R: Read>(
1400    reader: &mut R,
1401    archive_offset: u64,
1402    central_header_start: u64,
1403    block: ZipCentralEntryBlock,
1404) -> ZipResult<ZipFileData> {
1405    let ZipCentralEntryBlock {
1406        // magic,
1407        version_made_by,
1408        // version_to_extract,
1409        flags,
1410        compression_method,
1411        last_mod_time,
1412        last_mod_date,
1413        crc32,
1414        compressed_size,
1415        uncompressed_size,
1416        file_name_length,
1417        extra_field_length,
1418        file_comment_length,
1419        // disk_number,
1420        // internal_file_attributes,
1421        external_file_attributes,
1422        offset,
1423        ..
1424    } = block;
1425
1426    let encrypted = flags & 1 == 1;
1427    let is_utf8 = flags & (1 << 11) != 0;
1428    let using_data_descriptor = flags & (1 << 3) != 0;
1429
1430    let file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?;
1431    let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?;
1432    let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?;
1433    let file_name: Box<str> = match is_utf8 {
1434        true => String::from_utf8_lossy(&file_name_raw).into(),
1435        false => file_name_raw.clone().from_cp437(),
1436    };
1437    let file_comment: Box<str> = match is_utf8 {
1438        true => String::from_utf8_lossy(&file_comment_raw).into(),
1439        false => file_comment_raw.from_cp437(),
1440    };
1441
1442    // Construct the result
1443    let mut result = ZipFileData {
1444        system: System::from((version_made_by >> 8) as u8),
1445        /* NB: this strips the top 8 bits! */
1446        version_made_by: version_made_by as u8,
1447        encrypted,
1448        using_data_descriptor,
1449        is_utf8,
1450        compression_method: CompressionMethod::parse_from_u16(compression_method),
1451        compression_level: None,
1452        last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
1453        crc32,
1454        compressed_size: compressed_size.into(),
1455        uncompressed_size: uncompressed_size.into(),
1456        flags,
1457        file_name,
1458        file_name_raw,
1459        extra_field: Some(Arc::new(extra_field.to_vec())),
1460        central_extra_field: None,
1461        file_comment,
1462        header_start: offset.into(),
1463        extra_data_start: None,
1464        central_header_start,
1465        data_start: OnceLock::new(),
1466        external_attributes: external_file_attributes,
1467        large_file: false,
1468        aes_mode: None,
1469        aes_extra_data_start: 0,
1470        extra_fields: Vec::new(),
1471    };
1472    match parse_extra_field(&mut result) {
1473        Ok(stripped_extra_field) => {
1474            result.extra_field = stripped_extra_field;
1475        }
1476        Err(ZipError::Io(..)) => {}
1477        Err(e) => return Err(e),
1478    }
1479
1480    let aes_enabled = result.compression_method == CompressionMethod::AES;
1481    if aes_enabled && result.aes_mode.is_none() {
1482        return Err(invalid!("AES encryption without AES extra data field"));
1483    }
1484
1485    // Account for shifted zip offsets.
1486    result.header_start = result
1487        .header_start
1488        .checked_add(archive_offset)
1489        .ok_or(invalid!("Archive header is too large"))?;
1490
1491    Ok(result)
1492}
1493
1494pub(crate) fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<Option<Arc<Vec<u8>>>> {
1495    let Some(ref extra_field) = file.extra_field else {
1496        return Ok(None);
1497    };
1498    let extra_field = extra_field.clone();
1499    let mut processed_extra_field = extra_field.clone();
1500    let len = extra_field.len();
1501    let mut reader = io::Cursor::new(&**extra_field);
1502
1503    /* TODO: codify this structure into Zip64ExtraFieldBlock fields! */
1504    let mut position = reader.position() as usize;
1505    while (position) < len {
1506        let old_position = position;
1507        let remove = parse_single_extra_field(file, &mut reader, position as u64, false)?;
1508        position = reader.position() as usize;
1509        if remove {
1510            let remaining = len - (position - old_position);
1511            if remaining == 0 {
1512                return Ok(None);
1513            }
1514            let mut new_extra_field = Vec::with_capacity(remaining);
1515            new_extra_field.extend_from_slice(&extra_field[0..old_position]);
1516            new_extra_field.extend_from_slice(&extra_field[position..]);
1517            processed_extra_field = Arc::new(new_extra_field);
1518        }
1519    }
1520    Ok(Some(processed_extra_field))
1521}
1522
1523pub(crate) fn parse_single_extra_field<R: Read>(
1524    file: &mut ZipFileData,
1525    reader: &mut R,
1526    bytes_already_read: u64,
1527    disallow_zip64: bool,
1528) -> ZipResult<bool> {
1529    let kind = reader.read_u16_le()?;
1530    let len = reader.read_u16_le()?;
1531    match kind {
1532        // Zip64 extended information extra field
1533        0x0001 => {
1534            if disallow_zip64 {
1535                return Err(invalid!("Can't write a custom field using the ZIP64 ID"));
1536            }
1537            file.large_file = true;
1538            let mut consumed_len = 0;
1539            if len >= 24 || file.uncompressed_size == spec::ZIP64_BYTES_THR {
1540                file.uncompressed_size = reader.read_u64_le()?;
1541                consumed_len += size_of::<u64>();
1542            }
1543            if len >= 24 || file.compressed_size == spec::ZIP64_BYTES_THR {
1544                file.compressed_size = reader.read_u64_le()?;
1545                consumed_len += size_of::<u64>();
1546            }
1547            if len >= 24 || file.header_start == spec::ZIP64_BYTES_THR {
1548                file.header_start = reader.read_u64_le()?;
1549                consumed_len += size_of::<u64>();
1550            }
1551            let Some(leftover_len) = (len as usize).checked_sub(consumed_len) else {
1552                return Err(invalid!("ZIP64 extra-data field is the wrong length"));
1553            };
1554            reader.read_exact(&mut vec![0u8; leftover_len])?;
1555            return Ok(true);
1556        }
1557        0x000a => {
1558            // NTFS extra field
1559            file.extra_fields
1560                .push(ExtraField::Ntfs(Ntfs::try_from_reader(reader, len)?));
1561        }
1562        0x9901 => {
1563            // AES
1564            if len != 7 {
1565                return Err(ZipError::UnsupportedArchive(
1566                    "AES extra data field has an unsupported length",
1567                ));
1568            }
1569            let vendor_version = reader.read_u16_le()?;
1570            let vendor_id = reader.read_u16_le()?;
1571            let mut out = [0u8];
1572            reader.read_exact(&mut out)?;
1573            let aes_mode = out[0];
1574            let compression_method = CompressionMethod::parse_from_u16(reader.read_u16_le()?);
1575
1576            if vendor_id != 0x4541 {
1577                return Err(invalid!("Invalid AES vendor"));
1578            }
1579            let vendor_version = match vendor_version {
1580                0x0001 => AesVendorVersion::Ae1,
1581                0x0002 => AesVendorVersion::Ae2,
1582                _ => return Err(invalid!("Invalid AES vendor version")),
1583            };
1584            match aes_mode {
1585                0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version, compression_method)),
1586                0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version, compression_method)),
1587                0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version, compression_method)),
1588                _ => return Err(invalid!("Invalid AES encryption strength")),
1589            };
1590            file.compression_method = compression_method;
1591            file.aes_extra_data_start = bytes_already_read;
1592        }
1593        0x5455 => {
1594            // extended timestamp
1595            // https://libzip.org/specifications/extrafld.txt
1596
1597            file.extra_fields.push(ExtraField::ExtendedTimestamp(
1598                ExtendedTimestamp::try_from_reader(reader, len)?,
1599            ));
1600        }
1601        0x6375 => {
1602            // Info-ZIP Unicode Comment Extra Field
1603            // APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
1604            file.file_comment = String::from_utf8(
1605                UnicodeExtraField::try_from_reader(reader, len)?
1606                    .unwrap_valid(file.file_comment.as_bytes())?
1607                    .into_vec(),
1608            )?
1609            .into();
1610        }
1611        0x7075 => {
1612            // Info-ZIP Unicode Path Extra Field
1613            // APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
1614            file.file_name_raw = UnicodeExtraField::try_from_reader(reader, len)?
1615                .unwrap_valid(&file.file_name_raw)?;
1616            file.file_name =
1617                String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
1618            file.is_utf8 = true;
1619        }
1620        _ => {
1621            reader.read_exact(&mut vec![0u8; len as usize])?;
1622            // Other fields are ignored
1623        }
1624    }
1625    Ok(false)
1626}
1627
1628/// A trait for exposing file metadata inside the zip.
1629pub trait HasZipMetadata {
1630    /// Get the file metadata
1631    fn get_metadata(&self) -> &ZipFileData;
1632}
1633
1634/// Options for reading a file from an archive.
1635#[derive(Default)]
1636pub struct ZipReadOptions<'a> {
1637    /// The password to use when decrypting the file.  This is ignored if not required.
1638    password: Option<&'a [u8]>,
1639
1640    /// Ignore the value of the encryption flag and proceed as if the file were plaintext.
1641    ignore_encryption_flag: bool,
1642}
1643
1644impl<'a> ZipReadOptions<'a> {
1645    /// Create a new set of options with the default values.
1646    #[must_use]
1647    pub fn new() -> Self {
1648        Self::default()
1649    }
1650
1651    /// Set the password, if any, to use.  Return for chaining.
1652    #[must_use]
1653    pub fn password(mut self, password: Option<&'a [u8]>) -> Self {
1654        self.password = password;
1655        self
1656    }
1657
1658    /// Set the ignore encryption flag.  Return for chaining.
1659    #[must_use]
1660    pub fn ignore_encryption_flag(mut self, ignore: bool) -> Self {
1661        self.ignore_encryption_flag = ignore;
1662        self
1663    }
1664}
1665
1666/// Methods for retrieving information on zip files
1667impl<'a, R: Read> ZipFile<'a, R> {
1668    pub(crate) fn take_raw_reader(&mut self) -> io::Result<io::Take<&'a mut R>> {
1669        replace(&mut self.reader, ZipFileReader::NoReader).into_inner()
1670    }
1671
1672    /// Get the version of the file
1673    pub fn version_made_by(&self) -> (u8, u8) {
1674        (
1675            self.get_metadata().version_made_by / 10,
1676            self.get_metadata().version_made_by % 10,
1677        )
1678    }
1679
1680    /// Get the name of the file
1681    ///
1682    /// # Warnings
1683    ///
1684    /// It is dangerous to use this name directly when extracting an archive.
1685    /// It may contain an absolute path (`/etc/shadow`), or break out of the
1686    /// current directory (`../runtime`). Carelessly writing to these paths
1687    /// allows an attacker to craft a ZIP archive that will overwrite critical
1688    /// files.
1689    ///
1690    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
1691    /// as a safe path.
1692    pub fn name(&self) -> &str {
1693        &self.get_metadata().file_name
1694    }
1695
1696    /// Get the name of the file, in the raw (internal) byte representation.
1697    ///
1698    /// The encoding of this data is currently undefined.
1699    pub fn name_raw(&self) -> &[u8] {
1700        &self.get_metadata().file_name_raw
1701    }
1702
1703    /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
1704    /// removes a leading '/' and removes '..' parts.
1705    #[deprecated(
1706        since = "0.5.7",
1707        note = "by stripping `..`s from the path, the meaning of paths can change.
1708                `mangled_name` can be used if this behaviour is desirable"
1709    )]
1710    pub fn sanitized_name(&self) -> PathBuf {
1711        self.mangled_name()
1712    }
1713
1714    /// Rewrite the path, ignoring any path components with special meaning.
1715    ///
1716    /// - Absolute paths are made relative
1717    /// - [`ParentDir`]s are ignored
1718    /// - Truncates the filename at a NULL byte
1719    ///
1720    /// This is appropriate if you need to be able to extract *something* from
1721    /// any archive, but will easily misrepresent trivial paths like
1722    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
1723    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
1724    ///
1725    /// [`ParentDir`]: `Component::ParentDir`
1726    pub fn mangled_name(&self) -> PathBuf {
1727        self.get_metadata().file_name_sanitized()
1728    }
1729
1730    /// Ensure the file path is safe to use as a [`Path`].
1731    ///
1732    /// - It can't contain NULL bytes
1733    /// - It can't resolve to a path outside the current directory
1734    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
1735    /// - It can't be an absolute path
1736    ///
1737    /// This will read well-formed ZIP files correctly, and is resistant
1738    /// to path-based exploits. It is recommended over
1739    /// [`ZipFile::mangled_name`].
1740    pub fn enclosed_name(&self) -> Option<PathBuf> {
1741        self.get_metadata().enclosed_name()
1742    }
1743
1744    pub(crate) fn simplified_components(&self) -> Option<Vec<&OsStr>> {
1745        self.get_metadata().simplified_components()
1746    }
1747
1748    /// Prepare the path for extraction by creating necessary missing directories and checking for symlinks to be contained within the base path.
1749    ///
1750    /// `base_path` parameter is assumed to be canonicalized.
1751    pub(crate) fn safe_prepare_path(
1752        &self,
1753        base_path: &Path,
1754        outpath: &mut PathBuf,
1755        root_dir: Option<&(Vec<&OsStr>, impl RootDirFilter)>,
1756    ) -> ZipResult<()> {
1757        let components = self
1758            .simplified_components()
1759            .ok_or(invalid!("Invalid file path"))?;
1760
1761        let components = match root_dir {
1762            Some((root_dir, filter)) => match components.strip_prefix(&**root_dir) {
1763                Some(components) => components,
1764
1765                // In this case, we expect that the file was not in the root
1766                // directory, but was filtered out when searching for the
1767                // root directory.
1768                None => {
1769                    // We could technically find ourselves at this code
1770                    // path if the user provides an unstable or
1771                    // non-deterministic `filter` function.
1772                    //
1773                    // If debug assertions are on, we should panic here.
1774                    // Otherwise, the safest thing to do here is to just
1775                    // extract as-is.
1776                    debug_assert!(
1777                        !filter(&PathBuf::from_iter(components.iter())),
1778                        "Root directory filter should not match at this point"
1779                    );
1780
1781                    // Extract as-is.
1782                    &components[..]
1783                }
1784            },
1785
1786            None => &components[..],
1787        };
1788
1789        let components_len = components.len();
1790
1791        for (is_last, component) in components
1792            .iter()
1793            .copied()
1794            .enumerate()
1795            .map(|(i, c)| (i == components_len - 1, c))
1796        {
1797            // we can skip the target directory itself because the base path is assumed to be "trusted" (if the user say extract to a symlink we can follow it)
1798            outpath.push(component);
1799
1800            // check if the path is a symlink, the target must be _inherently_ within the directory
1801            for limit in (0..5u8).rev() {
1802                let meta = match std::fs::symlink_metadata(&outpath) {
1803                    Ok(meta) => meta,
1804                    Err(e) if e.kind() == io::ErrorKind::NotFound => {
1805                        if !is_last {
1806                            crate::read::make_writable_dir_all(&outpath)?;
1807                        }
1808                        break;
1809                    }
1810                    Err(e) => return Err(e.into()),
1811                };
1812
1813                if !meta.is_symlink() {
1814                    break;
1815                }
1816
1817                if limit == 0 {
1818                    return Err(invalid!("Extraction followed a symlink too deep"));
1819                }
1820
1821                // note that we cannot accept links that do not inherently resolve to a path inside the directory to prevent:
1822                // - disclosure of unrelated path exists (no check for a path exist and then ../ out)
1823                // - issues with file-system specific path resolution (case sensitivity, etc)
1824                let target = std::fs::read_link(&outpath)?;
1825
1826                if !crate::path::simplified_components(&target)
1827                    .ok_or(invalid!("Invalid symlink target path"))?
1828                    .starts_with(
1829                        &crate::path::simplified_components(base_path)
1830                            .ok_or(invalid!("Invalid base path"))?,
1831                    )
1832                {
1833                    let is_absolute_enclosed = base_path
1834                        .components()
1835                        .map(Some)
1836                        .chain(std::iter::once(None))
1837                        .zip(target.components().map(Some).chain(std::iter::repeat(None)))
1838                        .all(|(a, b)| match (a, b) {
1839                            // both components are normal
1840                            (Some(Component::Normal(a)), Some(Component::Normal(b))) => a == b,
1841                            // both components consumed fully
1842                            (None, None) => true,
1843                            // target consumed fully but base path is not
1844                            (Some(_), None) => false,
1845                            // base path consumed fully but target is not (and normal)
1846                            (None, Some(Component::CurDir | Component::Normal(_))) => true,
1847                            _ => false,
1848                        });
1849
1850                    if !is_absolute_enclosed {
1851                        return Err(invalid!("Symlink is not inherently safe"));
1852                    }
1853                }
1854
1855                outpath.push(target);
1856            }
1857        }
1858        Ok(())
1859    }
1860
1861    /// Get the comment of the file
1862    pub fn comment(&self) -> &str {
1863        &self.get_metadata().file_comment
1864    }
1865
1866    /// Get the compression method used to store the file
1867    pub fn compression(&self) -> CompressionMethod {
1868        self.get_metadata().compression_method
1869    }
1870
1871    /// Get if the files is encrypted or not
1872    pub fn encrypted(&self) -> bool {
1873        self.data.encrypted
1874    }
1875
1876    /// Get the size of the file, in bytes, in the archive
1877    pub fn compressed_size(&self) -> u64 {
1878        self.get_metadata().compressed_size
1879    }
1880
1881    /// Get the size of the file, in bytes, when uncompressed
1882    pub fn size(&self) -> u64 {
1883        self.get_metadata().uncompressed_size
1884    }
1885
1886    /// Get the time the file was last modified
1887    pub fn last_modified(&self) -> Option<DateTime> {
1888        self.data.last_modified_time
1889    }
1890    /// Returns whether the file is actually a directory
1891    pub fn is_dir(&self) -> bool {
1892        is_dir(self.name())
1893    }
1894
1895    /// Returns whether the file is actually a symbolic link
1896    pub fn is_symlink(&self) -> bool {
1897        self.unix_mode()
1898            .is_some_and(|mode| mode & S_IFLNK == S_IFLNK)
1899    }
1900
1901    /// Returns whether the file is a normal file (i.e. not a directory or symlink)
1902    pub fn is_file(&self) -> bool {
1903        !self.is_dir() && !self.is_symlink()
1904    }
1905
1906    /// Get unix mode for the file
1907    pub fn unix_mode(&self) -> Option<u32> {
1908        self.get_metadata().unix_mode()
1909    }
1910
1911    /// Get the CRC32 hash of the original file
1912    pub fn crc32(&self) -> u32 {
1913        self.get_metadata().crc32
1914    }
1915
1916    /// Get the extra data of the zip header for this file
1917    pub fn extra_data(&self) -> Option<&[u8]> {
1918        self.get_metadata()
1919            .extra_field
1920            .as_ref()
1921            .map(|v| v.deref().deref())
1922    }
1923
1924    /// Get the starting offset of the data of the compressed file
1925    pub fn data_start(&self) -> u64 {
1926        *self.data.data_start.get().unwrap()
1927    }
1928
1929    /// Get the starting offset of the zip header for this file
1930    pub fn header_start(&self) -> u64 {
1931        self.get_metadata().header_start
1932    }
1933    /// Get the starting offset of the zip header in the central directory for this file
1934    pub fn central_header_start(&self) -> u64 {
1935        self.get_metadata().central_header_start
1936    }
1937
1938    /// Get the [`SimpleFileOptions`] that would be used to write this file to
1939    /// a new zip archive.
1940    pub fn options(&self) -> SimpleFileOptions {
1941        let mut options = SimpleFileOptions::default()
1942            .large_file(self.compressed_size().max(self.size()) > ZIP64_BYTES_THR)
1943            .compression_method(self.compression())
1944            .unix_permissions(self.unix_mode().unwrap_or(0o644) | S_IFREG)
1945            .last_modified_time(
1946                self.last_modified()
1947                    .filter(|m| m.is_valid())
1948                    .unwrap_or_else(DateTime::default_for_write),
1949            );
1950
1951        options.normalize();
1952        #[cfg(feature = "aes-crypto")]
1953        if let Some(aes) = self.get_metadata().aes_mode {
1954            // Preserve AES metadata in options for downstream writers.
1955            // This is metadata-only and does not trigger encryption.
1956            options.aes_mode = Some(aes);
1957        }
1958        options
1959    }
1960}
1961
1962/// Methods for retrieving information on zip files
1963impl<R: Read> ZipFile<'_, R> {
1964    /// iterate through all extra fields
1965    pub fn extra_data_fields(&self) -> impl Iterator<Item = &ExtraField> {
1966        self.data.extra_fields.iter()
1967    }
1968}
1969
1970impl<R: Read> HasZipMetadata for ZipFile<'_, R> {
1971    fn get_metadata(&self) -> &ZipFileData {
1972        self.data.as_ref()
1973    }
1974}
1975
1976impl<R: Read> Read for ZipFile<'_, R> {
1977    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1978        self.reader.read(buf)
1979    }
1980
1981    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
1982        self.reader.read_exact(buf)
1983    }
1984
1985    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
1986        self.reader.read_to_end(buf)
1987    }
1988
1989    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
1990        self.reader.read_to_string(buf)
1991    }
1992}
1993
1994impl<R: Read> Read for ZipFileSeek<'_, R> {
1995    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1996        match &mut self.reader {
1997            ZipFileSeekReader::Raw(r) => r.read(buf),
1998        }
1999    }
2000}
2001
2002impl<R: Seek> Seek for ZipFileSeek<'_, R> {
2003    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
2004        match &mut self.reader {
2005            ZipFileSeekReader::Raw(r) => r.seek(pos),
2006        }
2007    }
2008}
2009
2010impl<R> HasZipMetadata for ZipFileSeek<'_, R> {
2011    fn get_metadata(&self) -> &ZipFileData {
2012        self.data.as_ref()
2013    }
2014}
2015
2016impl<R: Read> Drop for ZipFile<'_, R> {
2017    fn drop(&mut self) {
2018        // self.data is Owned, this reader is constructed by a streaming reader.
2019        // In this case, we want to exhaust the reader so that the next file is accessible.
2020        if let Cow::Owned(_) = self.data {
2021            // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
2022            if let Ok(mut inner) = self.take_raw_reader() {
2023                let _ = copy(&mut inner, &mut sink());
2024            }
2025        }
2026    }
2027}
2028
2029/// Read ZipFile structures from a non-seekable reader.
2030///
2031/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
2032/// as some information will be missing when reading this manner.
2033///
2034/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
2035/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
2036/// is encountered. No more files should be read after this.
2037///
2038/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
2039/// the structure is done.
2040///
2041/// Missing fields are:
2042/// * `comment`: set to an empty string
2043/// * `data_start`: set to 0
2044/// * `external_attributes`: `unix_mode()`: will return None
2045pub fn read_zipfile_from_stream<R: Read>(reader: &mut R) -> ZipResult<Option<ZipFile<'_, R>>> {
2046    // We can't use the typical ::parse() method, as we follow separate code paths depending on the
2047    // "magic" value (since the magic value will be from the central directory header if we've
2048    // finished iterating over all the actual files).
2049    /* TODO: smallvec? */
2050
2051    let mut block = ZipLocalEntryBlock::zeroed();
2052    reader.read_exact(block.as_bytes_mut())?;
2053
2054    match block.magic().from_le() {
2055        spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (),
2056        spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
2057        _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR),
2058    }
2059
2060    let block = block.from_le();
2061
2062    let mut result = ZipFileData::from_local_block(block, reader)?;
2063
2064    match parse_extra_field(&mut result) {
2065        Ok(..) | Err(ZipError::Io(..)) => {}
2066        Err(e) => return Err(e),
2067    }
2068
2069    let limit_reader = reader.take(result.compressed_size);
2070
2071    let result_flags = result.flags;
2072    let crypto_reader = make_crypto_reader(&result, limit_reader, None, None)?;
2073    let ZipFileData {
2074        crc32,
2075        uncompressed_size,
2076        compression_method,
2077        ..
2078    } = result;
2079
2080    Ok(Some(ZipFile {
2081        data: Cow::Owned(result),
2082        reader: make_reader(
2083            compression_method,
2084            uncompressed_size,
2085            crc32,
2086            crypto_reader,
2087            result_flags,
2088        )?,
2089    }))
2090}
2091
2092/// A filter that determines whether an entry should be ignored when searching
2093/// for the root directory of a Zip archive.
2094///
2095/// Returns `true` if the entry should be considered, and `false` if it should
2096/// be ignored.
2097///
2098/// See [`root_dir_common_filter`] for a sensible default filter.
2099pub trait RootDirFilter: Fn(&Path) -> bool {}
2100impl<F: Fn(&Path) -> bool> RootDirFilter for F {}
2101
2102/// Common filters when finding the root directory of a Zip archive.
2103///
2104/// This filter is a sensible default for most use cases and filters out common
2105/// system files that are usually irrelevant to the contents of the archive.
2106///
2107/// Currently, the filter ignores:
2108/// - `/__MACOSX/`
2109/// - `/.DS_Store`
2110/// - `/Thumbs.db`
2111///
2112/// **This function is not guaranteed to be stable and may change in future versions.**
2113///
2114/// # Example
2115///
2116/// ```rust
2117/// # use std::path::Path;
2118/// assert!(zip::read::root_dir_common_filter(Path::new("foo.txt")));
2119/// assert!(!zip::read::root_dir_common_filter(Path::new(".DS_Store")));
2120/// assert!(!zip::read::root_dir_common_filter(Path::new("Thumbs.db")));
2121/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX")));
2122/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX/foo.txt")));
2123/// ```
2124pub fn root_dir_common_filter(path: &Path) -> bool {
2125    const COMMON_FILTER_ROOT_FILES: &[&str] = &[".DS_Store", "Thumbs.db"];
2126
2127    if path.starts_with("__MACOSX") {
2128        return false;
2129    }
2130
2131    if path.components().count() == 1
2132        && path.file_name().is_some_and(|file_name| {
2133            COMMON_FILTER_ROOT_FILES
2134                .iter()
2135                .map(OsStr::new)
2136                .any(|cmp| cmp == file_name)
2137        })
2138    {
2139        return false;
2140    }
2141
2142    true
2143}
2144
2145#[cfg(feature = "chrono")]
2146/// Generate a `SystemTime` from a `DateTime`.
2147fn datetime_to_systemtime(datetime: &DateTime) -> Option<std::time::SystemTime> {
2148    if let Some(t) = generate_chrono_datetime(datetime) {
2149        let time = chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(t, chrono::Utc);
2150        return Some(time.into());
2151    }
2152    None
2153}
2154
2155#[cfg(feature = "chrono")]
2156/// Generate a `NaiveDateTime` from a `DateTime`.
2157fn generate_chrono_datetime(datetime: &DateTime) -> Option<chrono::NaiveDateTime> {
2158    if let Some(d) = chrono::NaiveDate::from_ymd_opt(
2159        datetime.year().into(),
2160        datetime.month().into(),
2161        datetime.day().into(),
2162    ) {
2163        if let Some(d) = d.and_hms_opt(
2164            datetime.hour().into(),
2165            datetime.minute().into(),
2166            datetime.second().into(),
2167        ) {
2168            return Some(d);
2169        }
2170    }
2171    None
2172}
2173
2174#[cfg(test)]
2175mod test {
2176    use crate::read::ZipReadOptions;
2177    use crate::result::ZipResult;
2178    use crate::types::SimpleFileOptions;
2179    use crate::CompressionMethod::Stored;
2180    use crate::{ZipArchive, ZipWriter};
2181    use std::io::{Cursor, Read, Write};
2182    use tempfile::TempDir;
2183
2184    #[test]
2185    fn invalid_offset() {
2186        use super::ZipArchive;
2187
2188        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2189            "../tests/data/invalid_offset.zip"
2190        )));
2191        assert!(reader.is_err());
2192    }
2193
2194    #[test]
2195    fn invalid_offset2() {
2196        use super::ZipArchive;
2197
2198        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2199            "../tests/data/invalid_offset2.zip"
2200        )));
2201        assert!(reader.is_err());
2202    }
2203
2204    #[test]
2205    fn zip64_with_leading_junk() {
2206        use super::ZipArchive;
2207
2208        let reader =
2209            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/zip64_demo.zip"))).unwrap();
2210        assert_eq!(reader.len(), 1);
2211    }
2212
2213    #[test]
2214    fn zip_contents() {
2215        use super::ZipArchive;
2216
2217        let mut reader =
2218            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip"))).unwrap();
2219        assert_eq!(reader.comment(), b"");
2220        assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
2221    }
2222
2223    #[test]
2224    fn zip_read_streaming() {
2225        use super::read_zipfile_from_stream;
2226
2227        let mut reader = Cursor::new(include_bytes!("../tests/data/mimetype.zip"));
2228        loop {
2229            if read_zipfile_from_stream(&mut reader).unwrap().is_none() {
2230                break;
2231            }
2232        }
2233    }
2234
2235    #[test]
2236    fn zip_clone() {
2237        use super::ZipArchive;
2238        use std::io::Read;
2239
2240        let mut reader1 =
2241            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip"))).unwrap();
2242        let mut reader2 = reader1.clone();
2243
2244        let mut file1 = reader1.by_index(0).unwrap();
2245        let mut file2 = reader2.by_index(0).unwrap();
2246
2247        let t = file1.last_modified().unwrap();
2248        assert_eq!(
2249            (
2250                t.year(),
2251                t.month(),
2252                t.day(),
2253                t.hour(),
2254                t.minute(),
2255                t.second()
2256            ),
2257            (1980, 1, 1, 0, 0, 0)
2258        );
2259
2260        let mut buf1 = [0; 5];
2261        let mut buf2 = [0; 5];
2262        let mut buf3 = [0; 5];
2263        let mut buf4 = [0; 5];
2264
2265        file1.read_exact(&mut buf1).unwrap();
2266        file2.read_exact(&mut buf2).unwrap();
2267        file1.read_exact(&mut buf3).unwrap();
2268        file2.read_exact(&mut buf4).unwrap();
2269
2270        assert_eq!(buf1, buf2);
2271        assert_eq!(buf3, buf4);
2272        assert_ne!(buf1, buf3);
2273    }
2274
2275    #[test]
2276    fn file_and_dir_predicates() {
2277        use super::ZipArchive;
2278
2279        let mut zip = ZipArchive::new(Cursor::new(include_bytes!(
2280            "../tests/data/files_and_dirs.zip"
2281        )))
2282        .unwrap();
2283
2284        for i in 0..zip.len() {
2285            let zip_file = zip.by_index(i).unwrap();
2286            let full_name = zip_file.enclosed_name().unwrap();
2287            let file_name = full_name.file_name().unwrap().to_str().unwrap();
2288            assert!(
2289                (file_name.starts_with("dir") && zip_file.is_dir())
2290                    || (file_name.starts_with("file") && zip_file.is_file())
2291            );
2292        }
2293    }
2294
2295    #[test]
2296    fn zip64_magic_in_filenames() {
2297        let files = vec![
2298            include_bytes!("../tests/data/zip64_magic_in_filename_1.zip").to_vec(),
2299            include_bytes!("../tests/data/zip64_magic_in_filename_2.zip").to_vec(),
2300            include_bytes!("../tests/data/zip64_magic_in_filename_3.zip").to_vec(),
2301            include_bytes!("../tests/data/zip64_magic_in_filename_4.zip").to_vec(),
2302            include_bytes!("../tests/data/zip64_magic_in_filename_5.zip").to_vec(),
2303        ];
2304        // Although we don't allow adding files whose names contain the ZIP64 CDB-end or
2305        // CDB-end-locator signatures, we still read them when they aren't genuinely ambiguous.
2306        for file in files {
2307            ZipArchive::new(Cursor::new(file)).unwrap();
2308        }
2309    }
2310
2311    /// test case to ensure we don't preemptively over allocate based on the
2312    /// declared number of files in the CDE of an invalid zip when the number of
2313    /// files declared is more than the alleged offset in the CDE
2314    #[test]
2315    fn invalid_cde_number_of_files_allocation_smaller_offset() {
2316        use super::ZipArchive;
2317
2318        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2319            "../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
2320        )));
2321        assert!(reader.is_err() || reader.unwrap().is_empty());
2322    }
2323
2324    /// test case to ensure we don't preemptively over allocate based on the
2325    /// declared number of files in the CDE of an invalid zip when the number of
2326    /// files declared is less than the alleged offset in the CDE
2327    #[test]
2328    fn invalid_cde_number_of_files_allocation_greater_offset() {
2329        use super::ZipArchive;
2330
2331        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2332            "../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
2333        )));
2334        assert!(reader.is_err());
2335    }
2336
2337    #[cfg(feature = "deflate64")]
2338    #[test]
2339    fn deflate64_index_out_of_bounds() -> std::io::Result<()> {
2340        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2341            "../tests/data/raw_deflate64_index_out_of_bounds.zip"
2342        )))?;
2343        std::io::copy(&mut reader.by_index(0)?, &mut std::io::sink()).expect_err("Invalid file");
2344        Ok(())
2345    }
2346
2347    #[cfg(feature = "deflate64")]
2348    #[test]
2349    fn deflate64_not_enough_space() {
2350        ZipArchive::new(Cursor::new(include_bytes!(
2351            "../tests/data/deflate64_issue_25.zip"
2352        )))
2353        .expect_err("Invalid file");
2354    }
2355
2356    #[cfg(feature = "deflate-flate2")]
2357    #[test]
2358    fn test_read_with_data_descriptor() {
2359        use std::io::Read;
2360
2361        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2362            "../tests/data/data_descriptor.zip"
2363        )))
2364        .unwrap();
2365        let mut decompressed = [0u8; 16];
2366        let mut file = reader.by_index(0).unwrap();
2367        assert_eq!(file.read(&mut decompressed).unwrap(), 12);
2368    }
2369
2370    #[test]
2371    fn test_is_symlink() -> std::io::Result<()> {
2372        let mut reader = ZipArchive::new(Cursor::new(include_bytes!("../tests/data/symlink.zip")))?;
2373        assert!(reader.by_index(0)?.is_symlink());
2374        let tempdir = TempDir::with_prefix("test_is_symlink")?;
2375        reader.extract(&tempdir)?;
2376        assert!(tempdir.path().join("bar").is_symlink());
2377        Ok(())
2378    }
2379
2380    #[test]
2381    #[cfg(feature = "deflate-flate2")]
2382    fn test_utf8_extra_field() {
2383        let mut reader =
2384            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/chinese.zip"))).unwrap();
2385        reader.by_name("七个房间.txt").unwrap();
2386    }
2387
2388    #[test]
2389    fn test_utf8() {
2390        let mut reader =
2391            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/linux-7z.zip"))).unwrap();
2392        reader.by_name("你好.txt").unwrap();
2393    }
2394
2395    #[test]
2396    fn test_utf8_2() {
2397        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2398            "../tests/data/windows-7zip.zip"
2399        )))
2400        .unwrap();
2401        reader.by_name("你好.txt").unwrap();
2402    }
2403
2404    #[test]
2405    fn test_64k_files() -> ZipResult<()> {
2406        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2407        let options = SimpleFileOptions {
2408            compression_method: Stored,
2409            ..Default::default()
2410        };
2411        for i in 0..=u16::MAX {
2412            let file_name = format!("{i}.txt");
2413            writer.start_file(&*file_name, options)?;
2414            writer.write_all(i.to_string().as_bytes())?;
2415        }
2416
2417        let mut reader = ZipArchive::new(writer.finish()?)?;
2418        for i in 0..=u16::MAX {
2419            let expected_name = format!("{i}.txt");
2420            let expected_contents = i.to_string();
2421            let expected_contents = expected_contents.as_bytes();
2422            let mut file = reader.by_name(&expected_name)?;
2423            let mut contents = Vec::with_capacity(expected_contents.len());
2424            file.read_to_end(&mut contents)?;
2425            assert_eq!(contents, expected_contents);
2426            drop(file);
2427            contents.clear();
2428            let mut file = reader.by_index(i as usize)?;
2429            file.read_to_end(&mut contents)?;
2430            assert_eq!(contents, expected_contents);
2431        }
2432        Ok(())
2433    }
2434
2435    /// Symlinks being extracted shouldn't be followed out of the destination directory.
2436    #[test]
2437    fn test_cannot_symlink_outside_destination() -> ZipResult<()> {
2438        use std::fs::create_dir;
2439
2440        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2441        writer.add_symlink("symlink/", "../dest-sibling/", SimpleFileOptions::default())?;
2442        writer.start_file("symlink/dest-file", SimpleFileOptions::default())?;
2443        let mut reader = writer.finish_into_readable()?;
2444        let dest_parent = TempDir::with_prefix("read__test_cannot_symlink_outside_destination")?;
2445        let dest_sibling = dest_parent.path().join("dest-sibling");
2446        create_dir(&dest_sibling)?;
2447        let dest = dest_parent.path().join("dest");
2448        create_dir(&dest)?;
2449        assert!(reader.extract(dest).is_err());
2450        assert!(!dest_sibling.join("dest-file").exists());
2451        Ok(())
2452    }
2453
2454    #[test]
2455    fn test_can_create_destination() -> ZipResult<()> {
2456        let mut reader =
2457            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip")))?;
2458        let dest = TempDir::with_prefix("read__test_can_create_destination")?;
2459        reader.extract(&dest)?;
2460        assert!(dest.path().join("mimetype").exists());
2461        Ok(())
2462    }
2463
2464    #[test]
2465    fn test_central_directory_not_at_end() -> ZipResult<()> {
2466        let mut reader = ZipArchive::new(Cursor::new(include_bytes!("../tests/data/omni.ja")))?;
2467        let mut file = reader.by_name("chrome.manifest")?;
2468        let mut contents = String::new();
2469        file.read_to_string(&mut contents)?; // ensures valid UTF-8
2470        assert!(!contents.is_empty(), "chrome.manifest should not be empty");
2471        drop(file);
2472        for i in 0..reader.len() {
2473            let mut file = reader.by_index(i)?;
2474            // Attempt to read a small portion or all of each file to ensure it's accessible
2475            let mut buffer = Vec::new();
2476            file.read_to_end(&mut buffer)?;
2477            assert_eq!(
2478                buffer.len(),
2479                file.size() as usize,
2480                "File size mismatch for {}",
2481                file.name()
2482            );
2483        }
2484        Ok(())
2485    }
2486
2487    #[test]
2488    fn test_ignore_encryption_flag() -> ZipResult<()> {
2489        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2490            "../tests/data/ignore_encryption_flag.zip"
2491        )))?;
2492
2493        // Get the file entry by ignoring its encryption flag.
2494        let mut file =
2495            reader.by_index_with_options(0, ZipReadOptions::new().ignore_encryption_flag(true))?;
2496        let mut contents = String::new();
2497        assert_eq!(file.name(), "plaintext.txt");
2498
2499        // The file claims it is encrypted, but it is not.
2500        assert!(file.encrypted());
2501        file.read_to_string(&mut contents)?; // ensures valid UTF-8
2502        assert_eq!(contents, "This file is not encrypted.\n");
2503        Ok(())
2504    }
2505}