1use std::{
4 borrow::Cow,
5 collections::HashMap,
6 io::{self, BufRead, Read, Seek},
7};
8
9use crate::{CompressionMethod, Decompressor, FileType, Timestamp, types, utils};
10
11mod extra_field;
12mod raw;
13
14use extra_field::{ExtraField, ExtraFields};
15
16#[cold]
17fn invalid(msg: &str) -> io::Error {
18 io::Error::new(io::ErrorKind::InvalidData, msg)
19}
20
21#[cold]
22fn encrypted_file() -> io::Error {
23 io::Error::new(io::ErrorKind::Unsupported, "encrypted file")
24}
25
26#[cold]
27fn compressed() -> io::Error {
28 io::Error::new(io::ErrorKind::Unsupported, "compressed file")
29}
30
31trait ReadSeek: Read + Seek {}
32impl<R: Read + Seek> ReadSeek for R {}
33
34trait BufReadSeek: BufRead + Seek {}
35impl<R: BufRead + Seek> BufReadSeek for R {}
36
37#[derive(Debug, Clone, Copy)]
45#[non_exhaustive]
46pub enum EncryptionMethod {
47 ZipCrypto,
49 StrongEncrytion,
51 Aes {
56 key_size: u16,
58 check_crc32: bool,
62 },
63}
64
65#[derive(Debug)]
67pub struct RawArchive {
68 entries: Vec<Metadata>,
69 comment: Box<[u8]>,
70}
71
72impl RawArchive {
73 #[inline]
77 pub fn new<R: Read + Seek>(reader: &mut R) -> io::Result<Self> {
78 let (entries, comment) = raw::read_archive(reader)?;
79 Ok(Self { entries, comment })
80 }
81
82 #[inline]
84 pub fn entries(&self) -> &[Metadata] {
85 &self.entries
86 }
87
88 #[inline]
90 pub fn comment(&self) -> &[u8] {
91 &self.comment
92 }
93
94 pub fn extract<R: BufRead + Seek>(
98 &self,
99 reader: &mut R,
100 at: &std::path::Path,
101 ) -> io::Result<()> {
102 match std::fs::create_dir(at) {
103 Ok(()) => (),
104 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
105 Err(err) => return Err(err),
106 };
107
108 for entry in &self.entries {
109 entry.extract(reader, at)?;
110 }
111
112 Ok(())
113 }
114
115 #[cfg(feature = "parallel")]
122 pub fn parallel_extract<R: sync_file::ReadAt + sync_file::Size + Sync>(
123 &self,
124 reader: &R,
125 at: &std::path::Path,
126 ) -> io::Result<()> {
127 use rayon::prelude::*;
128
129 match std::fs::create_dir(at) {
130 Ok(()) => (),
131 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
132 Err(err) => return Err(err),
133 };
134
135 self.entries.par_iter().try_for_each_init(
136 || io::BufReader::new(sync_file::Adapter::new(reader)),
137 |reader, entry| entry.extract(reader, at),
138 )?;
139
140 Ok(())
141 }
142}
143
144impl FileType {
145 fn test(attr: u32, name: &str) -> Option<Self> {
146 let dos_attr = attr as u16;
147 let unix_mode = (attr >> 16) as u16;
148 let unix_kind = unix_mode >> 12;
149
150 let is_file = (dos_attr & (1 << 5)) != 0 || unix_kind == 8;
151 let is_dir = (dos_attr & (1 << 4)) != 0 || unix_kind == 4;
152 let is_symlink = unix_kind == 10;
153 let trailing_slash = name.ends_with('/');
154
155 match (is_file, is_dir, trailing_slash, is_symlink) {
156 (_, false, false, false) => Some(FileType::File),
157 (false, _, true, false) => Some(FileType::Directory),
158 (false, false, false, true) => Some(FileType::Symlink),
159 _ => None,
160 }
161 }
162}
163
164fn convert_string(raw: &[u8], force_unicode: bool) -> Option<(Cow<'_, str>, Option<u32>)> {
165 if let Ok(name) = str::from_utf8(raw) {
169 return Some((Cow::Borrowed(name), None));
170 }
171
172 if force_unicode {
174 None
175 } else {
176 let name = utils::cp437::convert(raw);
177 Some((Cow::Owned(name), Some(crc32fast::hash(raw))))
178 }
179}
180
181#[derive(Debug)]
183pub struct Metadata {
184 header_offset: u64,
185 pub data_offset: u64,
186
187 pub compressed_size: u64,
188 pub uncompressed_size: u64,
189 pub compression_method: CompressionMethod,
190 pub crc32: u32,
191 pub file_type: FileType,
192
193 pub modification_time: Option<Timestamp>,
194 pub access_time: Option<Timestamp>,
195 pub creation_time: Option<Timestamp>,
196
197 pub encryption: Option<EncryptionMethod>,
198
199 name: Box<str>,
200 comment: Box<str>,
201
202 is_streaming: bool,
203 is_zip64: bool,
204 flags: u16,
205}
206
207impl Metadata {
208 fn from_local_header(
209 header: types::LocalFileHeader,
210 file_name: &[u8],
211 extra_fields: &[u8],
212 ) -> Option<Self> {
213 let flags = header.flags.get();
214 let is_encrypted = flags & (1 << 0) != 0;
215 let is_streaming = flags & (1 << 3) != 0;
216 let strong_encryption = flags & (1 << 6) != 0;
217 let is_unicode = flags & (1 << 11) != 0;
218
219 if { header.signature } != types::LocalFileHeader::SIGNATURE {
220 return None;
221 }
222
223 let (name, name_crc) = convert_string(file_name, is_unicode)?;
224 let name = utils::validate_name(&name)?;
225
226 let encryption = match (is_encrypted, strong_encryption) {
227 (false, false) => None,
228 (false, true) => return None,
229 (true, false) => Some(EncryptionMethod::ZipCrypto),
230 (true, true) => Some(EncryptionMethod::StrongEncrytion),
231 };
232
233 let mut meta = Self {
234 crc32: header.crc32.get(),
235 encryption,
236 header_offset: 0,
237 data_offset: 0,
238
239 compressed_size: header.compressed_size.get() as u64,
240 uncompressed_size: header.uncompressed_size.get() as u64,
241 compression_method: CompressionMethod(header.compression_method.get()),
242 file_type: FileType::File,
243
244 modification_time: None,
245 access_time: None,
246 creation_time: None,
247
248 name,
249 comment: Box::default(),
250
251 is_streaming,
252 is_zip64: false,
253 flags,
254 };
255
256 meta.parse_extra_fields(ExtraFields(extra_fields), name_crc, None)?;
257
258 Some(meta)
259 }
260
261 fn from_central_header(
262 header: types::CentralFileHeader,
263 file_name: &[u8],
264 extra_fields: &[u8],
265 comment: &[u8],
266 ) -> Option<Self> {
267 let flags = header.flags.get();
268 let is_encrypted = flags & (1 << 0) != 0;
269 let is_streaming = flags & (1 << 3) != 0;
270 let strong_encryption = flags & (1 << 6) != 0;
271 let is_unicode = flags & (1 << 11) != 0;
272
273 if { header.signature } != types::CentralFileHeader::SIGNATURE
274 || header.disk_number.get() != 0
275 {
276 return None;
277 }
278
279 let (comment, comment_crc) = convert_string(comment, is_unicode)?;
280 let comment = comment.into_owned().into_boxed_str();
281 let (name, name_crc) = convert_string(file_name, is_unicode)?;
282 let name = utils::validate_name(&name)?;
283 let file_type = FileType::test(header.external_attributes.get(), &name)?;
284
285 let encryption = match (is_encrypted, strong_encryption) {
286 (false, false) => None,
287 (false, true) => return None,
288 (true, false) => Some(EncryptionMethod::ZipCrypto),
289 (true, true) => Some(EncryptionMethod::StrongEncrytion),
290 };
291
292 let mut meta = Self {
293 crc32: header.crc32.get(),
294 encryption,
295 header_offset: header.local_header_offset.get() as u64,
296 data_offset: 0,
297
298 compressed_size: header.compressed_size.get() as u64,
299 uncompressed_size: header.uncompressed_size.get() as u64,
300 compression_method: CompressionMethod(header.compression_method.get()),
301 file_type,
302
303 modification_time: None,
304 access_time: None,
305 creation_time: None,
306
307 name,
308 comment,
309
310 is_streaming,
311 is_zip64: false,
312 flags,
313 };
314
315 meta.parse_extra_fields(ExtraFields(extra_fields), name_crc, comment_crc)?;
316
317 Some(meta)
318 }
319
320 fn parse_extra_fields(
321 &mut self,
322 extra_fields: ExtraFields,
323 name_crc: Option<u32>,
324 comment_crc: Option<u32>,
325 ) -> Option<()> {
326 for field in extra_fields.iter() {
327 match field {
328 ExtraField::Zip64ExtendedInformation(mut info) => {
329 if self.uncompressed_size == 0xffff_ffff {
330 self.uncompressed_size = info.next()?;
331 }
332 if self.compressed_size == 0xffff_ffff {
333 self.compressed_size = info.next()?;
334 }
335 if self.header_offset == 0xffff_ffff {
336 self.header_offset = info.next()?;
337 }
338 info.end()?;
340 self.is_zip64 = true;
341 }
342 ExtraField::UnicodeComment(unicode) => {
343 if Some(unicode.header_comment_crc32) != comment_crc {
344 return None;
345 }
346 self.comment = unicode.comment.into();
347 }
348
349 ExtraField::UnicodeName(unicode) => {
350 if Some(unicode.header_name_crc32) != name_crc {
351 return None;
352 }
353 self.name = utils::validate_name(unicode.name)?;
354 }
355
356 ExtraField::Ntfs(ntfs) => {
357 self.modification_time = ntfs.times.mtime;
358 self.access_time = ntfs.times.atime;
359 self.creation_time = ntfs.times.ctime;
360 }
361
362 ExtraField::ExtendedTimestamp(ts) => {
363 self.modification_time = ts.modification_time;
364 self.access_time = ts.access_time;
365 self.creation_time = ts.creation_time;
366 }
367
368 ExtraField::Aes(aes) => {
369 if self.compression_method != CompressionMethod::AES
370 || (!aes.check_crc32 && self.crc32 != 0)
371 {
372 return None;
373 }
374 let Some(enc @ EncryptionMethod::ZipCrypto) = &mut self.encryption else {
375 return None;
376 };
377
378 *enc = EncryptionMethod::Aes {
379 key_size: aes.key_size,
380 check_crc32: aes.check_crc32,
381 };
382 self.compression_method = aes.compression;
383 }
384
385 ExtraField::Invalid => return None,
386
387 _ => (),
388 }
389 }
390
391 if self.compression_method == CompressionMethod::AES {
392 return None;
393 }
394
395 Some(())
396 }
397
398 #[inline]
400 pub fn is_encrypted(&self) -> bool {
401 self.encryption.is_some()
402 }
403
404 #[inline]
406 pub fn name(&self) -> &str {
407 &self.name
408 }
409
410 #[inline]
412 pub fn comment(&self) -> &str {
413 &self.comment
414 }
415
416 pub fn read<R: BufRead + Seek>(&self, reader: R) -> io::Result<impl Read + use<R>> {
420 if self.encryption.is_some() {
421 return Err(encrypted_file());
422 }
423
424 let reader = Decompressor::new(self.read_raw(reader)?, self.compression_method)?;
425 Ok(self.content_checker(reader))
426 }
427
428 pub fn read_stored<R: Read + Seek>(&self, mut reader: R) -> io::Result<io::Take<R>> {
436 if self.encryption.is_some() {
437 return Err(encrypted_file());
438 }
439 if self.compression_method != CompressionMethod::STORE {
440 return Err(compressed());
441 }
442
443 let mut checker = utils::Crc32Checker::new(self.read_raw(&mut reader)?, self.crc32);
445 std::io::copy(&mut checker, &mut io::sink())?;
446
447 self.read_raw(reader)
448 }
449
450 pub fn read_raw<R: Read + Seek>(&self, mut reader: R) -> io::Result<io::Take<R>> {
454 reader.seek(io::SeekFrom::Start(self.data_offset))?;
455 Ok(reader.take(self.compressed_size))
456 }
457
458 #[inline]
462 pub fn content_checker<R: Read>(&self, reader: R) -> impl Read + use<R> {
463 utils::Crc32Checker::new(
464 utils::LengthChecker::new(reader, self.uncompressed_size),
465 self.crc32,
466 )
467 }
468
469 #[inline]
471 pub fn extract<R: BufRead + Seek>(
472 &self,
473 reader: &mut R,
474 root: impl AsRef<std::path::Path>,
475 ) -> io::Result<()> {
476 self._extract(reader, root.as_ref())
477 }
478
479 fn _extract(&self, reader: &mut dyn BufReadSeek, at: &std::path::Path) -> io::Result<()> {
480 if !std::fs::metadata(at)?.is_dir() {
481 return Err(io::Error::from(io::ErrorKind::NotFound));
482 }
483
484 let path = at.join(&*self.name);
485 std::fs::create_dir_all(path.parent().unwrap())?;
486
487 match self.file_type {
488 FileType::File => {
489 let mut f = std::fs::File::create_new(&path)?;
490 io::copy(&mut self.read(reader)?, &mut f)?;
491
492 if let Some(mod_time) = self.modification_time {
493 f.set_times(std::fs::FileTimes::new().set_modified(mod_time.to_std()))?;
494 }
495 }
496 FileType::Directory => {
497 std::fs::create_dir(path)?;
498 }
499 FileType::Symlink => {
500 let target = io::read_to_string(self.read(reader)?)?;
501 if !utils::validate_symlink(&self.name, &target) {
502 return Err(invalid("invalid symlink target"));
503 }
504
505 #[cfg(unix)]
506 std::os::unix::fs::symlink(target, path)?;
507
508 #[cfg(windows)]
509 if target.ends_with('/') {
510 std::os::windows::fs::symlink_dir(target, path)?;
511 } else {
512 std::os::windows::fs::symlink_file(target, path)?;
513 }
514
515 #[cfg(not(any(unix, windows)))]
516 std::fs::write(path, target.as_bytes())?;
517 }
518 }
519
520 Ok(())
521 }
522}
523
524#[derive(Debug)]
547pub struct Archive<R> {
548 inner: RawArchive,
549 names: HashMap<Box<str>, usize>,
550 reader: R,
551}
552
553impl Archive<io::BufReader<std::fs::File>> {
554 #[inline]
556 pub fn open(path: impl AsRef<std::path::Path>) -> io::Result<Self> {
557 Self::_open(path.as_ref())
558 }
559
560 fn _open(path: &std::path::Path) -> io::Result<Self> {
561 Self::new(io::BufReader::new(std::fs::File::open(path)?))
562 }
563}
564
565#[cfg(feature = "parallel")]
566impl Archive<io::BufReader<sync_file::SyncFile>> {
567 #[inline]
569 pub fn open_parallel(path: impl AsRef<std::path::Path>) -> io::Result<Self> {
570 Self::_open(path.as_ref())
571 }
572
573 fn _open(path: &std::path::Path) -> io::Result<Self> {
574 Self::new(io::BufReader::new(sync_file::SyncFile::open(path)?))
575 }
576}
577
578impl<R: BufRead + Seek> Archive<R> {
579 pub fn new(mut reader: R) -> io::Result<Self> {
591 let inner = RawArchive::new(&mut reader)?;
592
593 let names = inner
594 .entries()
595 .iter()
596 .enumerate()
597 .map(|(i, meta)| (meta.name().into(), i))
598 .collect();
599
600 Ok(Self {
601 inner,
602 names,
603 reader,
604 })
605 }
606
607 #[inline]
609 pub fn entries(&self) -> &[Metadata] {
610 &self.inner.entries
611 }
612
613 #[inline]
615 pub fn get_by_index(&mut self, index: usize) -> Option<File<'_, R>> {
616 let metadata = self.inner.entries().get(index)?;
617 Some(File {
618 metadata,
619 reader: &mut self.reader,
620 })
621 }
622
623 pub fn get_by_name(&mut self, name: &str) -> Option<File<'_, R>> {
625 let index = *self.names.get(name)?;
626 self.get_by_index(index)
627 }
628
629 pub fn index_of(&self, name: &str) -> Option<usize> {
631 self.names.get(name).copied()
632 }
633
634 #[inline]
636 pub fn commment(&self) -> &[u8] {
637 &self.inner.comment
638 }
639
640 #[inline]
644 pub fn extract(&mut self, at: impl AsRef<std::path::Path>) -> io::Result<()> {
645 self.inner.extract(&mut self.reader, at.as_ref())
646 }
647
648 #[cfg(feature = "parallel")]
655 #[inline]
656 pub fn parallel_extract(&self, at: impl AsRef<std::path::Path>) -> io::Result<()>
657 where
658 R: sync_file::ReadAt + sync_file::Size + Sync,
659 {
660 self.inner.parallel_extract(&self.reader, at.as_ref())
661 }
662
663 #[inline]
665 pub fn get_ref(&self) -> &R {
666 &self.reader
667 }
668
669 #[inline]
671 pub fn get_mut(&mut self) -> &mut R {
672 &mut self.reader
673 }
674}
675
676#[derive(Debug)]
678pub struct File<'a, R> {
679 metadata: &'a Metadata,
680 reader: &'a mut R,
681}
682
683impl<'a, R: BufRead + Seek> File<'a, R> {
684 #[inline]
689 pub fn metadata(&self) -> &'a Metadata {
690 self.metadata
691 }
692
693 #[inline]
697 pub fn read(&mut self) -> io::Result<impl Read + '_> {
698 self.metadata.read(&mut *self.reader)
699 }
700
701 pub fn read_stored(self) -> io::Result<io::Take<&'a mut R>> {
709 self.metadata.read_stored(self.reader)
710 }
711
712 #[inline]
716 pub fn read_raw(&mut self) -> io::Result<io::Take<&mut R>> {
717 self.metadata.read_raw(self.reader)
718 }
719
720 pub fn into_reader(self) -> &'a mut R {
726 self.reader
727 }
728}