1use std::{
4 borrow::Cow,
5 collections::HashMap,
6 io::{self, BufRead, Read, Seek},
7};
8
9use crate::{
10 CompressionMethod, Decompressor, FileType, types,
11 utils::{Crc32Checker, LengthChecker, Timestamp, cp437},
12};
13
14mod extra_field;
15mod raw;
16
17use extra_field::{ExtraField, ExtraFields};
18
19#[cold]
20fn invalid(msg: &str) -> io::Error {
21 io::Error::new(io::ErrorKind::InvalidData, msg)
22}
23
24#[cold]
25fn encrypted_file() -> io::Error {
26 io::Error::new(io::ErrorKind::Unsupported, "encrypted file")
27}
28
29#[cold]
30fn compressed() -> io::Error {
31 io::Error::new(io::ErrorKind::Unsupported, "compressed file")
32}
33
34fn validate_symlink(name: &str, target: &str) -> bool {
35 if target.starts_with('/') || target.contains('\\') || (cfg!(windows) && target.contains(':')) {
36 return false;
37 }
38
39 let mut depth = name.split('/').count() - 1;
40 for part in target.split('/') {
41 match part {
42 "." => (),
43 ".." => match depth.checked_sub(1) {
44 Some(d) => depth = d,
45 None => return false,
46 },
47 _ => depth += 1,
48 }
49 }
50
51 true
52}
53
54trait ReadSeek: Read + Seek {}
55impl<R: Read + Seek> ReadSeek for R {}
56
57trait BufReadSeek: BufRead + Seek {}
58impl<R: BufRead + Seek> BufReadSeek for R {}
59
60#[derive(Debug, Clone, Copy)]
68#[non_exhaustive]
69pub enum EncryptionMethod {
70 ZipCrypto,
72 Aes {
77 key_size: u16,
79 check_crc32: bool,
83 },
84}
85
86pub struct RawArchive {
88 entries: Vec<Metadata>,
89 comment: Box<[u8]>,
90}
91
92impl RawArchive {
93 #[inline]
97 pub fn new<R: Read + Seek>(reader: &mut R) -> io::Result<Self> {
98 let (entries, comment) = raw::read_archive(reader)?;
99 Ok(Self { entries, comment })
100 }
101
102 #[inline]
104 pub fn entries(&self) -> &[Metadata] {
105 &self.entries
106 }
107
108 #[inline]
110 pub fn comment(&self) -> &[u8] {
111 &self.comment
112 }
113
114 pub fn extract<R: BufRead + Seek>(
118 &self,
119 reader: &mut R,
120 at: &std::path::Path,
121 ) -> io::Result<()> {
122 match std::fs::create_dir(at) {
123 Ok(()) => (),
124 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
125 Err(err) => return Err(err),
126 };
127
128 for entry in &self.entries {
129 entry.extract(reader, at)?;
130 }
131
132 Ok(())
133 }
134
135 #[cfg(feature = "parallel")]
142 pub fn parallel_extract<R: sync_file::ReadAt + sync_file::Size + Sync>(
143 &self,
144 reader: &R,
145 at: &std::path::Path,
146 ) -> io::Result<()> {
147 use rayon::prelude::*;
148
149 match std::fs::create_dir(at) {
150 Ok(()) => (),
151 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
152 Err(err) => return Err(err),
153 };
154
155 self.entries.par_iter().try_for_each_init(
156 || io::BufReader::new(sync_file::Adapter::new(reader)),
157 |reader, entry| entry.extract(reader, at),
158 )?;
159
160 Ok(())
161 }
162}
163
164impl FileType {
165 fn test(attr: u32, name: &str) -> Option<Self> {
166 let dos_attr = attr as u16;
167 let unix_mode = (attr >> 16) as u16;
168 let unix_kind = unix_mode >> 12;
169
170 let is_file = (dos_attr & (1 << 5)) != 0 || unix_kind == 8;
171 let is_dir = (dos_attr & (1 << 4)) != 0 || unix_kind == 4;
172 let is_symlink = unix_kind == 10;
173 let trailing_slash = name.ends_with('/');
174
175 match (is_file, is_dir, trailing_slash, is_symlink) {
176 (_, false, false, false) => Some(FileType::File),
177 (false, _, true, false) => Some(FileType::Directory),
178 (false, false, false, true) => Some(FileType::Symlink),
179 _ => None,
180 }
181 }
182}
183
184fn convert_string(raw: &[u8], force_unicode: bool) -> Option<(Cow<'_, str>, Option<u32>)> {
185 if let Ok(name) = str::from_utf8(raw) {
189 return Some((Cow::Borrowed(name), None));
190 }
191
192 if force_unicode {
194 None
195 } else {
196 let name = cp437::convert(raw);
197 Some((Cow::Owned(name), Some(crc32fast::hash(raw))))
198 }
199}
200
201fn check_name(name: &str) -> Option<Box<str>> {
202 if name.starts_with('/')
203 || name.contains('\\')
204 || name.contains('\0')
205 || (cfg!(windows) && name.contains(':'))
206 {
207 return None;
208 }
209
210 let mut dst = String::with_capacity(name.len());
211 for part in name.split_inclusive('/') {
212 match part {
213 "." | ".." | "../" => return None,
215 "/" | "./" => (),
216 _ => dst.push_str(part),
217 }
218 }
219
220 if dst.is_empty() {
221 return None;
222 }
223
224 Some(dst.into_boxed_str())
225}
226
227#[derive(Debug)]
229pub struct Metadata {
230 header_offset: u64,
231 pub data_offset: u64,
232
233 pub compressed_size: u64,
234 pub uncompressed_size: u64,
235 pub compression_method: CompressionMethod,
236 pub crc32: u32,
237 pub file_type: FileType,
238
239 pub modification_time: Option<Timestamp>,
240 pub access_time: Option<Timestamp>,
241 pub creation_time: Option<Timestamp>,
242
243 pub encryption: Option<EncryptionMethod>,
244
245 name: Box<str>,
246 comment: Box<str>,
247
248 is_streaming: bool,
249 is_zip64: bool,
250 flags: u16,
251}
252
253impl Metadata {
254 fn from_local_header(
255 header: types::LocalFileHeader,
256 file_name: &[u8],
257 extra_fields: &[u8],
258 ) -> Option<Self> {
259 let flags = header.flags.get();
260 let is_encrypted = flags & (1 << 0) != 0;
261 let is_streaming = flags & (1 << 3) != 0;
262 let is_unicode = flags & (1 << 11) != 0;
263
264 if { header.signature } != types::LocalFileHeader::SIGNATURE {
265 return None;
266 }
267
268 let (name, name_crc) = convert_string(file_name, is_unicode)?;
269 let name = check_name(&name)?;
270
271 let mut meta = Self {
272 crc32: header.crc32.get(),
273 encryption: is_encrypted.then_some(EncryptionMethod::ZipCrypto),
274 header_offset: 0,
275 data_offset: 0,
276
277 compressed_size: header.compressed_size.get() as u64,
278 uncompressed_size: header.uncompressed_size.get() as u64,
279 compression_method: CompressionMethod(header.compression_method.get()),
280 file_type: FileType::File,
281
282 modification_time: None,
283 access_time: None,
284 creation_time: None,
285
286 name,
287 comment: Box::default(),
288
289 is_streaming,
290 is_zip64: false,
291 flags,
292 };
293
294 meta.parse_extra_fields(ExtraFields(extra_fields), name_crc, None)?;
295
296 Some(meta)
297 }
298
299 fn from_central_header(
300 header: types::CentralFileHeader,
301 file_name: &[u8],
302 extra_fields: &[u8],
303 comment: &[u8],
304 ) -> Option<Self> {
305 let flags = header.flags.get();
306 let is_encrypted = flags & (1 << 0) != 0;
307 let is_streaming = flags & (1 << 3) != 0;
308 let is_unicode = flags & (1 << 11) != 0;
309
310 if { header.signature } != types::CentralFileHeader::SIGNATURE
311 || header.disk_number.get() != 0
312 {
313 return None;
314 }
315
316 let (comment, comment_crc) = convert_string(comment, is_unicode)?;
317 let comment = comment.into_owned().into_boxed_str();
318 let (name, name_crc) = convert_string(file_name, is_unicode)?;
319 let name = check_name(&name)?;
320 let file_type = FileType::test(header.external_attributes.get(), &name)?;
321
322 let mut meta = Self {
323 crc32: header.crc32.get(),
324 encryption: is_encrypted.then_some(EncryptionMethod::ZipCrypto),
325 header_offset: header.local_header_offset.get() as u64,
326 data_offset: 0,
327
328 compressed_size: header.compressed_size.get() as u64,
329 uncompressed_size: header.uncompressed_size.get() as u64,
330 compression_method: CompressionMethod(header.compression_method.get()),
331 file_type,
332
333 modification_time: None,
334 access_time: None,
335 creation_time: None,
336
337 name,
338 comment,
339
340 is_streaming,
341 is_zip64: false,
342 flags,
343 };
344
345 meta.parse_extra_fields(ExtraFields(extra_fields), name_crc, comment_crc)?;
346
347 Some(meta)
348 }
349
350 fn parse_extra_fields(
351 &mut self,
352 extra_fields: ExtraFields,
353 name_crc: Option<u32>,
354 comment_crc: Option<u32>,
355 ) -> Option<()> {
356 for field in extra_fields.iter() {
357 match field {
358 ExtraField::Zip64ExtendedInformation(mut info) => {
359 if self.uncompressed_size == 0xffff_ffff {
360 self.uncompressed_size = info.next()?;
361 }
362 if self.compressed_size == 0xffff_ffff {
363 self.compressed_size = info.next()?;
364 }
365 if self.header_offset == 0xffff_ffff {
366 self.header_offset = info.next()?;
367 }
368 info.end()?;
370 self.is_zip64 = true;
371 }
372 ExtraField::UnicodeComment(unicode) => {
373 if Some(unicode.header_comment_crc32) != comment_crc {
374 return None;
375 }
376 self.comment = unicode.comment.into();
377 }
378
379 ExtraField::UnicodeName(unicode) => {
380 if Some(unicode.header_name_crc32) != name_crc {
381 return None;
382 }
383 self.name = check_name(unicode.name)?;
384 }
385
386 ExtraField::Ntfs(ntfs) => {
387 self.modification_time = ntfs.times.mtime;
388 self.access_time = ntfs.times.atime;
389 self.creation_time = ntfs.times.ctime;
390 }
391
392 ExtraField::ExtendedTimestamp(ts) => {
393 self.modification_time = ts.modification_time;
394 self.access_time = ts.access_time;
395 self.creation_time = ts.creation_time;
396 }
397
398 ExtraField::Aes(aes) => {
399 if self.compression_method != CompressionMethod::AES
400 || (!aes.check_crc32 && self.crc32 != 0)
401 {
402 return None;
403 }
404 let Some(enc @ EncryptionMethod::ZipCrypto) = &mut self.encryption else {
405 return None;
406 };
407
408 *enc = EncryptionMethod::Aes {
409 key_size: aes.key_size,
410 check_crc32: aes.check_crc32,
411 };
412 self.compression_method = aes.compression;
413 }
414
415 ExtraField::Invalid(_, _) => return None,
416
417 _ => (),
418 }
419 }
420
421 if self.compression_method == CompressionMethod::AES {
422 return None;
423 }
424
425 Some(())
426 }
427
428 #[inline]
430 pub fn is_encrypted(&self) -> bool {
431 self.encryption.is_some()
432 }
433
434 #[inline]
436 pub fn name(&self) -> &str {
437 &self.name
438 }
439
440 #[inline]
442 pub fn comment(&self) -> &str {
443 &self.comment
444 }
445
446 pub fn read<R: BufRead + Seek>(&self, reader: R) -> io::Result<impl Read + use<R>> {
450 if self.encryption.is_some() {
451 return Err(encrypted_file());
452 }
453
454 let reader = Decompressor::new(self.read_raw(reader)?, self.compression_method)?;
455 Ok(self.content_checker(reader))
456 }
457
458 pub fn read_stored<R: Read + Seek>(&self, mut reader: R) -> io::Result<io::Take<R>> {
466 if self.encryption.is_some() {
467 return Err(encrypted_file());
468 }
469 if self.compression_method != CompressionMethod::STORE {
470 return Err(compressed());
471 }
472
473 let mut checker = Crc32Checker::new(self.read_raw(&mut reader)?, self.crc32);
475 std::io::copy(&mut checker, &mut io::sink())?;
476
477 self.read_raw(reader)
478 }
479
480 pub fn read_raw<R: Read + Seek>(&self, mut reader: R) -> io::Result<io::Take<R>> {
484 reader.seek(io::SeekFrom::Start(self.data_offset))?;
485 Ok(reader.take(self.compressed_size))
486 }
487
488 #[inline]
492 pub fn content_checker<R: Read>(&self, reader: R) -> impl Read + use<R> {
493 Crc32Checker::new(
494 LengthChecker::new(reader, self.uncompressed_size),
495 self.crc32,
496 )
497 }
498
499 #[inline]
501 pub fn extract<R: BufRead + Seek>(
502 &self,
503 reader: &mut R,
504 root: impl AsRef<std::path::Path>,
505 ) -> io::Result<()> {
506 self._extract(reader, root.as_ref())
507 }
508
509 fn _extract(&self, reader: &mut dyn BufReadSeek, at: &std::path::Path) -> io::Result<()> {
510 if !std::fs::metadata(at)?.is_dir() {
511 return Err(io::Error::from(io::ErrorKind::NotFound));
512 }
513
514 let path = at.join(&*self.name);
515 std::fs::create_dir_all(path.parent().unwrap())?;
516
517 match self.file_type {
518 FileType::File => {
519 let mut f = std::fs::File::create_new(&path)?;
520 io::copy(&mut self.read(reader)?, &mut f)?;
521
522 if let Some(mod_time) = self.modification_time {
523 f.set_times(std::fs::FileTimes::new().set_modified(mod_time.to_std()))?;
524 }
525 }
526 FileType::Directory => {
527 std::fs::create_dir(path)?;
528 }
529 FileType::Symlink => {
530 let target = io::read_to_string(self.read(reader)?)?;
531 if !validate_symlink(&self.name, &target) {
532 return Err(invalid("invalid symlink target"));
533 }
534
535 #[cfg(unix)]
536 std::os::unix::fs::symlink(target, path)?;
537
538 #[cfg(windows)]
539 if target.ends_with('/') {
540 std::os::windows::fs::symlink_dir(target, path)?;
541 } else {
542 std::os::windows::fs::symlink_file(target, path)?;
543 }
544
545 #[cfg(not(any(unix, windows)))]
546 std::fs::write(path, target.as_bytes())?;
547 }
548 }
549
550 Ok(())
551 }
552}
553
554pub struct Archive<R> {
577 inner: RawArchive,
578 names: HashMap<Box<str>, usize>,
579 reader: R,
580}
581
582impl Archive<io::BufReader<std::fs::File>> {
583 #[inline]
585 pub fn open(path: impl AsRef<std::path::Path>) -> io::Result<Self> {
586 Self::_open(path.as_ref())
587 }
588
589 fn _open(path: &std::path::Path) -> io::Result<Self> {
590 Self::new(io::BufReader::new(std::fs::File::open(path)?))
591 }
592}
593
594#[cfg(feature = "parallel")]
595impl Archive<io::BufReader<sync_file::SyncFile>> {
596 #[inline]
598 pub fn open_parallel(path: impl AsRef<std::path::Path>) -> io::Result<Self> {
599 Self::_open(path.as_ref())
600 }
601
602 fn _open(path: &std::path::Path) -> io::Result<Self> {
603 Self::new(io::BufReader::new(sync_file::SyncFile::open(path)?))
604 }
605}
606
607impl<R: BufRead + Seek> Archive<R> {
608 pub fn new(mut reader: R) -> io::Result<Self> {
620 let inner = RawArchive::new(&mut reader)?;
621
622 let names = inner
623 .entries()
624 .iter()
625 .enumerate()
626 .map(|(i, meta)| (meta.name().into(), i))
627 .collect();
628
629 Ok(Self {
630 inner,
631 names,
632 reader,
633 })
634 }
635
636 #[inline]
638 pub fn entries(&self) -> &[Metadata] {
639 &self.inner.entries
640 }
641
642 #[inline]
644 pub fn get_by_index(&mut self, index: usize) -> Option<File<'_, R>> {
645 let metadata = self.inner.entries().get(index)?;
646 Some(File {
647 metadata,
648 reader: &mut self.reader,
649 })
650 }
651
652 pub fn get_by_name(&mut self, name: &str) -> Option<File<'_, R>> {
654 let index = *self.names.get(name)?;
655 self.get_by_index(index)
656 }
657
658 pub fn index_of(&self, name: &str) -> Option<usize> {
660 self.names.get(name).copied()
661 }
662
663 #[inline]
665 pub fn commment(&self) -> &[u8] {
666 &self.inner.comment
667 }
668
669 #[inline]
673 pub fn extract(&mut self, at: impl AsRef<std::path::Path>) -> io::Result<()> {
674 self.inner.extract(&mut self.reader, at.as_ref())
675 }
676
677 #[cfg(feature = "parallel")]
684 #[inline]
685 pub fn parallel_extract(&self, at: impl AsRef<std::path::Path>) -> io::Result<()>
686 where
687 R: sync_file::ReadAt + sync_file::Size + Sync,
688 {
689 self.inner.parallel_extract(&self.reader, at.as_ref())
690 }
691
692 #[inline]
694 pub fn get_ref(&self) -> &R {
695 &self.reader
696 }
697
698 #[inline]
700 pub fn get_mut(&mut self) -> &mut R {
701 &mut self.reader
702 }
703}
704
705pub struct File<'a, R> {
707 metadata: &'a Metadata,
708 reader: &'a mut R,
709}
710
711impl<'a, R: BufRead + Seek> File<'a, R> {
712 #[inline]
717 pub fn metadata(&self) -> &'a Metadata {
718 self.metadata
719 }
720
721 #[inline]
725 pub fn read(&mut self) -> io::Result<impl Read + '_> {
726 self.metadata.read(&mut *self.reader)
727 }
728
729 pub fn read_stored(self) -> io::Result<io::Take<&'a mut R>> {
737 self.metadata.read_stored(self.reader)
738 }
739
740 #[inline]
744 pub fn read_raw(&mut self) -> io::Result<io::Take<&mut R>> {
745 self.metadata.read_raw(self.reader)
746 }
747
748 pub fn into_reader(self) -> &'a mut R {
754 self.reader
755 }
756}
757
758#[test]
759fn symlink_validation() {
760 assert!(validate_symlink("a/b", "../c"));
761 assert!(!validate_symlink("a/b", "../../c"));
762 assert!(!validate_symlink("a/b", "/c"));
763 #[cfg(windows)]
764 assert!(!validate_symlink("a/b", "C:/e"));
765}