zip/
read.rs

1//! Types for reading ZIP archives
2
3#[cfg(feature = "aes-crypto")]
4use crate::aes::{AesReader, AesReaderValid};
5use crate::compression::{CompressionMethod, Decompressor};
6use crate::cp437::FromCp437;
7use crate::crc32::Crc32Reader;
8use crate::extra_fields::{ExtendedTimestamp, ExtraField, Ntfs};
9use crate::read::zip_archive::{Shared, SharedBuilder};
10use crate::result::invalid;
11use crate::result::{ZipError, ZipResult};
12use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod};
13use crate::types::{
14    AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
15    ZipLocalEntryBlock,
16};
17use crate::write::SimpleFileOptions;
18use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
19use crate::ZIP64_BYTES_THR;
20use indexmap::IndexMap;
21use std::borrow::Cow;
22use std::ffi::OsStr;
23use std::fs::create_dir_all;
24use std::io::{self, copy, prelude::*, sink, SeekFrom};
25use std::mem;
26use std::mem::size_of;
27use std::ops::{Deref, Range};
28use std::path::{Component, Path, PathBuf};
29use std::sync::{Arc, OnceLock};
30
31mod config;
32
33pub use config::*;
34
35/// Provides high level API for reading from a stream.
36pub(crate) mod stream;
37
38pub(crate) mod magic_finder;
39
40// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
41pub(crate) mod zip_archive {
42    use indexmap::IndexMap;
43    use std::sync::Arc;
44
45    /// Extract immutable data from `ZipArchive` to make it cheap to clone
46    #[derive(Debug)]
47    pub(crate) struct Shared {
48        pub(crate) files: IndexMap<Box<str>, super::ZipFileData>,
49        pub(super) offset: u64,
50        pub(super) dir_start: u64,
51        // This isn't yet used anywhere, but it is here for use cases in the future.
52        #[allow(dead_code)]
53        pub(super) config: super::Config,
54        pub(crate) comment: Box<[u8]>,
55        pub(crate) zip64_comment: Option<Box<[u8]>>,
56    }
57
58    #[derive(Debug)]
59    pub(crate) struct SharedBuilder {
60        pub(crate) files: Vec<super::ZipFileData>,
61        pub(super) offset: u64,
62        pub(super) dir_start: u64,
63        // This isn't yet used anywhere, but it is here for use cases in the future.
64        #[allow(dead_code)]
65        pub(super) config: super::Config,
66    }
67
68    impl SharedBuilder {
69        pub fn build(self, comment: Box<[u8]>, zip64_comment: Option<Box<[u8]>>) -> Shared {
70            let mut index_map = IndexMap::with_capacity(self.files.len());
71            self.files.into_iter().for_each(|file| {
72                index_map.insert(file.file_name.clone(), file);
73            });
74            Shared {
75                files: index_map,
76                offset: self.offset,
77                dir_start: self.dir_start,
78                config: self.config,
79                comment,
80                zip64_comment,
81            }
82        }
83    }
84
85    /// ZIP archive reader
86    ///
87    /// At the moment, this type is cheap to clone if this is the case for the
88    /// reader it uses. However, this is not guaranteed by this crate and it may
89    /// change in the future.
90    ///
91    /// ```no_run
92    /// use std::io::prelude::*;
93    /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
94    ///     use zip::HasZipMetadata;
95    ///     let mut zip = zip::ZipArchive::new(reader)?;
96    ///
97    ///     for i in 0..zip.len() {
98    ///         let mut file = zip.by_index(i)?;
99    ///         println!("Filename: {}", file.name());
100    ///         std::io::copy(&mut file, &mut std::io::stdout())?;
101    ///     }
102    ///
103    ///     Ok(())
104    /// }
105    /// ```
106    #[derive(Clone, Debug)]
107    pub struct ZipArchive<R> {
108        pub(super) reader: R,
109        pub(super) shared: Arc<Shared>,
110    }
111}
112
113#[cfg(feature = "aes-crypto")]
114use crate::aes::PWD_VERIFY_LENGTH;
115use crate::extra_fields::UnicodeExtraField;
116use crate::result::ZipError::InvalidPassword;
117use crate::spec::is_dir;
118use crate::types::ffi::{S_IFLNK, S_IFREG};
119use crate::unstable::{path_to_string, LittleEndianReadExt};
120pub use zip_archive::ZipArchive;
121
122#[allow(clippy::large_enum_variant)]
123pub(crate) enum CryptoReader<'a, R: Read> {
124    Plaintext(io::Take<&'a mut R>),
125    ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut R>>),
126    #[cfg(feature = "aes-crypto")]
127    Aes {
128        reader: AesReaderValid<io::Take<&'a mut R>>,
129        vendor_version: AesVendorVersion,
130    },
131}
132
133impl<R: Read> Read for CryptoReader<'_, R> {
134    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
135        match self {
136            CryptoReader::Plaintext(r) => r.read(buf),
137            CryptoReader::ZipCrypto(r) => r.read(buf),
138            #[cfg(feature = "aes-crypto")]
139            CryptoReader::Aes { reader: r, .. } => r.read(buf),
140        }
141    }
142
143    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
144        match self {
145            CryptoReader::Plaintext(r) => r.read_to_end(buf),
146            CryptoReader::ZipCrypto(r) => r.read_to_end(buf),
147            #[cfg(feature = "aes-crypto")]
148            CryptoReader::Aes { reader: r, .. } => r.read_to_end(buf),
149        }
150    }
151
152    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
153        match self {
154            CryptoReader::Plaintext(r) => r.read_to_string(buf),
155            CryptoReader::ZipCrypto(r) => r.read_to_string(buf),
156            #[cfg(feature = "aes-crypto")]
157            CryptoReader::Aes { reader: r, .. } => r.read_to_string(buf),
158        }
159    }
160}
161
162impl<'a, R: Read> CryptoReader<'a, R> {
163    /// Consumes this decoder, returning the underlying reader.
164    pub fn into_inner(self) -> io::Take<&'a mut R> {
165        match self {
166            CryptoReader::Plaintext(r) => r,
167            CryptoReader::ZipCrypto(r) => r.into_inner(),
168            #[cfg(feature = "aes-crypto")]
169            CryptoReader::Aes { reader: r, .. } => r.into_inner(),
170        }
171    }
172
173    /// Returns `true` if the data is encrypted using AE2.
174    pub const fn is_ae2_encrypted(&self) -> bool {
175        #[cfg(feature = "aes-crypto")]
176        return matches!(
177            self,
178            CryptoReader::Aes {
179                vendor_version: AesVendorVersion::Ae2,
180                ..
181            }
182        );
183        #[cfg(not(feature = "aes-crypto"))]
184        false
185    }
186}
187
188#[cold]
189fn invalid_state<T>() -> io::Result<T> {
190    Err(io::Error::other("ZipFileReader was in an invalid state"))
191}
192
193pub(crate) enum ZipFileReader<'a, R: Read> {
194    NoReader,
195    Raw(io::Take<&'a mut R>),
196    Compressed(Box<Crc32Reader<Decompressor<io::BufReader<CryptoReader<'a, R>>>>>),
197}
198
199impl<R: Read> Read for ZipFileReader<'_, R> {
200    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
201        match self {
202            ZipFileReader::NoReader => invalid_state(),
203            ZipFileReader::Raw(r) => r.read(buf),
204            ZipFileReader::Compressed(r) => r.read(buf),
205        }
206    }
207
208    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
209        match self {
210            ZipFileReader::NoReader => invalid_state(),
211            ZipFileReader::Raw(r) => r.read_exact(buf),
212            ZipFileReader::Compressed(r) => r.read_exact(buf),
213        }
214    }
215
216    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
217        match self {
218            ZipFileReader::NoReader => invalid_state(),
219            ZipFileReader::Raw(r) => r.read_to_end(buf),
220            ZipFileReader::Compressed(r) => r.read_to_end(buf),
221        }
222    }
223
224    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
225        match self {
226            ZipFileReader::NoReader => invalid_state(),
227            ZipFileReader::Raw(r) => r.read_to_string(buf),
228            ZipFileReader::Compressed(r) => r.read_to_string(buf),
229        }
230    }
231}
232
233impl<'a, R: Read> ZipFileReader<'a, R> {
234    fn into_inner(self) -> io::Result<io::Take<&'a mut R>> {
235        match self {
236            ZipFileReader::NoReader => invalid_state(),
237            ZipFileReader::Raw(r) => Ok(r),
238            ZipFileReader::Compressed(r) => {
239                Ok(r.into_inner().into_inner().into_inner().into_inner())
240            }
241        }
242    }
243}
244
245/// A struct for reading a zip file
246pub struct ZipFile<'a, R: Read> {
247    pub(crate) data: Cow<'a, ZipFileData>,
248    pub(crate) reader: ZipFileReader<'a, R>,
249}
250
251/// A struct for reading and seeking a zip file
252pub struct ZipFileSeek<'a, R> {
253    data: Cow<'a, ZipFileData>,
254    reader: ZipFileSeekReader<'a, R>,
255}
256
257enum ZipFileSeekReader<'a, R> {
258    Raw(SeekableTake<'a, R>),
259}
260
261struct SeekableTake<'a, R> {
262    inner: &'a mut R,
263    inner_starting_offset: u64,
264    length: u64,
265    current_offset: u64,
266}
267
268impl<'a, R: Seek> SeekableTake<'a, R> {
269    pub fn new(inner: &'a mut R, length: u64) -> io::Result<Self> {
270        let inner_starting_offset = inner.stream_position()?;
271        Ok(Self {
272            inner,
273            inner_starting_offset,
274            length,
275            current_offset: 0,
276        })
277    }
278}
279
280impl<R: Seek> Seek for SeekableTake<'_, R> {
281    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
282        let offset = match pos {
283            SeekFrom::Start(offset) => Some(offset),
284            SeekFrom::End(offset) => self.length.checked_add_signed(offset),
285            SeekFrom::Current(offset) => self.current_offset.checked_add_signed(offset),
286        };
287        match offset {
288            None => Err(io::Error::new(
289                io::ErrorKind::InvalidInput,
290                "invalid seek to a negative or overflowing position",
291            )),
292            Some(offset) => {
293                let clamped_offset = std::cmp::min(self.length, offset);
294                let new_inner_offset = self
295                    .inner
296                    .seek(SeekFrom::Start(self.inner_starting_offset + clamped_offset))?;
297                self.current_offset = new_inner_offset - self.inner_starting_offset;
298                Ok(self.current_offset)
299            }
300        }
301    }
302}
303
304impl<R: Read> Read for SeekableTake<'_, R> {
305    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
306        let written = self
307            .inner
308            .take(self.length - self.current_offset)
309            .read(buf)?;
310        self.current_offset += written as u64;
311        Ok(written)
312    }
313}
314
315pub(crate) fn make_writable_dir_all<T: AsRef<Path>>(outpath: T) -> Result<(), ZipError> {
316    create_dir_all(outpath.as_ref())?;
317    #[cfg(unix)]
318    {
319        // Dirs must be writable until all normal files are extracted
320        use std::os::unix::fs::PermissionsExt;
321        std::fs::set_permissions(
322            outpath.as_ref(),
323            std::fs::Permissions::from_mode(
324                0o700 | std::fs::metadata(outpath.as_ref())?.permissions().mode(),
325            ),
326        )?;
327    }
328    Ok(())
329}
330
331pub(crate) fn find_content<'a, R: Read + Seek>(
332    data: &ZipFileData,
333    reader: &'a mut R,
334) -> ZipResult<io::Take<&'a mut R>> {
335    // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result!
336    let data_start = data.data_start(reader)?;
337
338    reader.seek(SeekFrom::Start(data_start))?;
339    Ok(reader.take(data.compressed_size))
340}
341
342fn find_content_seek<'a, R: Read + Seek>(
343    data: &ZipFileData,
344    reader: &'a mut R,
345) -> ZipResult<SeekableTake<'a, R>> {
346    // Parse local header
347    let data_start = data.data_start(reader)?;
348    reader.seek(SeekFrom::Start(data_start))?;
349
350    // Explicit Ok and ? are needed to convert io::Error to ZipError
351    Ok(SeekableTake::new(reader, data.compressed_size)?)
352}
353
354pub(crate) fn find_data_start(
355    data: &ZipFileData,
356    reader: &mut (impl Read + Seek + Sized),
357) -> Result<u64, ZipError> {
358    // Go to start of data.
359    reader.seek(SeekFrom::Start(data.header_start))?;
360
361    // Parse static-sized fields and check the magic value.
362    let block = ZipLocalEntryBlock::parse(reader)?;
363
364    // Calculate the end of the local header from the fields we just parsed.
365    let variable_fields_len =
366        // Each of these fields must be converted to u64 before adding, as the result may
367        // easily overflow a u16.
368        block.file_name_length as u64 + block.extra_field_length as u64;
369    let data_start =
370        data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
371
372    // Set the value so we don't have to read it again.
373    match data.data_start.set(data_start) {
374        Ok(()) => (),
375        // If the value was already set in the meantime, ensure it matches (this is probably
376        // unnecessary).
377        Err(_) => {
378            debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
379        }
380    }
381
382    Ok(data_start)
383}
384
385#[allow(clippy::too_many_arguments)]
386pub(crate) fn make_crypto_reader<'a, R: Read>(
387    data: &ZipFileData,
388    reader: io::Take<&'a mut R>,
389    password: Option<&[u8]>,
390    aes_info: Option<(AesMode, AesVendorVersion, CompressionMethod)>,
391) -> ZipResult<CryptoReader<'a, R>> {
392    #[allow(deprecated)]
393    {
394        if let CompressionMethod::Unsupported(_) = data.compression_method {
395            return unsupported_zip_error("Compression method not supported");
396        }
397    }
398
399    let reader = match (password, aes_info) {
400        #[cfg(not(feature = "aes-crypto"))]
401        (Some(_), Some(_)) => {
402            return Err(ZipError::UnsupportedArchive(
403                "AES encrypted files cannot be decrypted without the aes-crypto feature.",
404            ))
405        }
406        #[cfg(feature = "aes-crypto")]
407        (Some(password), Some((aes_mode, vendor_version, _))) => CryptoReader::Aes {
408            reader: AesReader::new(reader, aes_mode, data.compressed_size).validate(password)?,
409            vendor_version,
410        },
411        (Some(password), None) => {
412            let validator = if data.using_data_descriptor {
413                ZipCryptoValidator::InfoZipMsdosTime(
414                    data.last_modified_time.map_or(0, |x| x.timepart()),
415                )
416            } else {
417                ZipCryptoValidator::PkzipCrc32(data.crc32)
418            };
419            CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?)
420        }
421        (None, Some(_)) => return Err(InvalidPassword),
422        (None, None) => CryptoReader::Plaintext(reader),
423    };
424    Ok(reader)
425}
426
427pub(crate) fn make_reader<R: Read>(
428    compression_method: CompressionMethod,
429    crc32: u32,
430    reader: CryptoReader<R>,
431) -> ZipResult<ZipFileReader<R>> {
432    let ae2_encrypted = reader.is_ae2_encrypted();
433
434    Ok(ZipFileReader::Compressed(Box::new(Crc32Reader::new(
435        Decompressor::new(io::BufReader::new(reader), compression_method)?,
436        crc32,
437        ae2_encrypted,
438    ))))
439}
440
441pub(crate) fn make_symlink<T>(
442    outpath: &Path,
443    target: &[u8],
444    #[allow(unused)] existing_files: &IndexMap<Box<str>, T>,
445) -> ZipResult<()> {
446    let Ok(target_str) = std::str::from_utf8(target) else {
447        return Err(invalid!("Invalid UTF-8 as symlink target"));
448    };
449
450    #[cfg(not(any(unix, windows)))]
451    {
452        use std::fs::File;
453        let output = File::create(outpath);
454        output?.write_all(target)?;
455    }
456    #[cfg(unix)]
457    {
458        std::os::unix::fs::symlink(Path::new(&target_str), outpath)?;
459    }
460    #[cfg(windows)]
461    {
462        let target = Path::new(OsStr::new(&target_str));
463        let target_is_dir_from_archive =
464            existing_files.contains_key(target_str) && is_dir(target_str);
465        let target_is_dir = if target_is_dir_from_archive {
466            true
467        } else if let Ok(meta) = std::fs::metadata(target) {
468            meta.is_dir()
469        } else {
470            false
471        };
472        if target_is_dir {
473            std::os::windows::fs::symlink_dir(target, outpath)?;
474        } else {
475            std::os::windows::fs::symlink_file(target, outpath)?;
476        }
477    }
478    Ok(())
479}
480
481#[derive(Debug)]
482pub(crate) struct CentralDirectoryInfo {
483    pub(crate) archive_offset: u64,
484    pub(crate) directory_start: u64,
485    pub(crate) number_of_files: usize,
486    pub(crate) disk_number: u32,
487    pub(crate) disk_with_central_directory: u32,
488}
489
490impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
491    type Error = ZipError;
492
493    fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
494        let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
495            match &value.eocd64 {
496                Some(DataAndPosition { data: eocd64, .. }) => {
497                    if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
498                        return Err(invalid!("ZIP64 footer indicates more files on this disk than in the whole archive"));
499                    }
500                    (
501                        eocd64.central_directory_offset,
502                        eocd64.number_of_files as usize,
503                        eocd64.disk_number,
504                        eocd64.disk_with_central_directory,
505                    )
506                }
507                _ => (
508                    value.eocd.data.central_directory_offset as u64,
509                    value.eocd.data.number_of_files_on_this_disk as usize,
510                    value.eocd.data.disk_number as u32,
511                    value.eocd.data.disk_with_central_directory as u32,
512                ),
513            };
514
515        let directory_start = relative_cd_offset
516            .checked_add(value.archive_offset)
517            .ok_or(invalid!("Invalid central directory size or offset"))?;
518
519        Ok(Self {
520            archive_offset: value.archive_offset,
521            directory_start,
522            number_of_files,
523            disk_number,
524            disk_with_central_directory,
525        })
526    }
527}
528
529impl<R> ZipArchive<R> {
530    pub(crate) fn from_finalized_writer(
531        files: IndexMap<Box<str>, ZipFileData>,
532        comment: Box<[u8]>,
533        zip64_comment: Option<Box<[u8]>>,
534        reader: R,
535        central_start: u64,
536    ) -> ZipResult<Self> {
537        let initial_offset = match files.first() {
538            Some((_, file)) => file.header_start,
539            None => central_start,
540        };
541        let shared = Arc::new(Shared {
542            files,
543            offset: initial_offset,
544            dir_start: central_start,
545            config: Config {
546                archive_offset: ArchiveOffset::Known(initial_offset),
547            },
548            comment,
549            zip64_comment,
550        });
551        Ok(Self { reader, shared })
552    }
553
554    /// Total size of the files in the archive, if it can be known. Doesn't include directories or
555    /// metadata.
556    pub fn decompressed_size(&self) -> Option<u128> {
557        let mut total = 0u128;
558        for file in self.shared.files.values() {
559            if file.using_data_descriptor {
560                return None;
561            }
562            total = total.checked_add(file.uncompressed_size as u128)?;
563        }
564        Some(total)
565    }
566}
567
568impl<R: Read + Seek> ZipArchive<R> {
569    pub(crate) fn merge_contents<W: Write + Seek>(
570        &mut self,
571        mut w: W,
572    ) -> ZipResult<IndexMap<Box<str>, ZipFileData>> {
573        if self.shared.files.is_empty() {
574            return Ok(IndexMap::new());
575        }
576        let mut new_files = self.shared.files.clone();
577        /* The first file header will probably start at the beginning of the file, but zip doesn't
578         * enforce that, and executable zips like PEX files will have a shebang line so will
579         * definitely be greater than 0.
580         *
581         * assert_eq!(0, new_files[0].header_start); // Avoid this.
582         */
583
584        let first_new_file_header_start = w.stream_position()?;
585
586        /* Push back file header starts for all entries in the covered files. */
587        new_files.values_mut().try_for_each(|f| {
588            /* This is probably the only really important thing to change. */
589            f.header_start = f
590                .header_start
591                .checked_add(first_new_file_header_start)
592                .ok_or(invalid!(
593                    "new header start from merge would have been too large"
594                ))?;
595            /* This is only ever used internally to cache metadata lookups (it's not part of the
596             * zip spec), and 0 is the sentinel value. */
597            f.central_header_start = 0;
598            /* This is an atomic variable so it can be updated from another thread in the
599             * implementation (which is good!). */
600            if let Some(old_data_start) = f.data_start.take() {
601                let new_data_start = old_data_start
602                    .checked_add(first_new_file_header_start)
603                    .ok_or(invalid!(
604                        "new data start from merge would have been too large"
605                    ))?;
606                f.data_start.get_or_init(|| new_data_start);
607            }
608            Ok::<_, ZipError>(())
609        })?;
610
611        /* Rewind to the beginning of the file.
612         *
613         * NB: we *could* decide to start copying from new_files[0].header_start instead, which
614         * would avoid copying over e.g. any pex shebangs or other file contents that start before
615         * the first zip file entry. However, zip files actually shouldn't care about garbage data
616         * in *between* real entries, since the central directory header records the correct start
617         * location of each, and keeping track of that math is more complicated logic that will only
618         * rarely be used, since most zips that get merged together are likely to be produced
619         * specifically for that purpose (and therefore are unlikely to have a shebang or other
620         * preface). Finally, this preserves any data that might actually be useful.
621         */
622        self.reader.rewind()?;
623        /* Find the end of the file data. */
624        let length_to_read = self.shared.dir_start;
625        /* Produce a Read that reads bytes up until the start of the central directory header.
626         * This "as &mut dyn Read" trick is used elsewhere to avoid having to clone the underlying
627         * handle, which it really shouldn't need to anyway. */
628        let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read);
629        /* Copy over file data from source archive directly. */
630        io::copy(&mut limited_raw, &mut w)?;
631
632        /* Return the files we've just written to the data stream. */
633        Ok(new_files)
634    }
635
636    /// Get the directory start offset and number of files. This is done in a
637    /// separate function to ease the control flow design.
638    pub(crate) fn get_metadata(config: Config, reader: &mut R) -> ZipResult<Shared> {
639        // End of the probed region, initially set to the end of the file
640        let file_len = reader.seek(io::SeekFrom::End(0))?;
641        let mut end_exclusive = file_len;
642
643        loop {
644            // Find the EOCD and possibly EOCD64 entries and determine the archive offset.
645            let cde = spec::find_central_directory(
646                reader,
647                config.archive_offset,
648                end_exclusive,
649                file_len,
650            )?;
651
652            // Turn EOCD into internal representation.
653            let Ok(shared) = CentralDirectoryInfo::try_from(&cde)
654                .and_then(|info| Self::read_central_header(info, config, reader))
655            else {
656                // The next EOCD candidate should start before the current one.
657                end_exclusive = cde.eocd.position;
658                continue;
659            };
660
661            return Ok(shared.build(
662                cde.eocd.data.zip_file_comment,
663                cde.eocd64.map(|v| v.data.extensible_data_sector),
664            ));
665        }
666    }
667
668    fn read_central_header(
669        dir_info: CentralDirectoryInfo,
670        config: Config,
671        reader: &mut R,
672    ) -> Result<SharedBuilder, ZipError> {
673        // If the parsed number of files is greater than the offset then
674        // something fishy is going on and we shouldn't trust number_of_files.
675        let file_capacity = if dir_info.number_of_files > dir_info.directory_start as usize {
676            0
677        } else {
678            dir_info.number_of_files
679        };
680
681        if dir_info.disk_number != dir_info.disk_with_central_directory {
682            return unsupported_zip_error("Support for multi-disk files is not implemented");
683        }
684
685        if file_capacity.saturating_mul(size_of::<ZipFileData>()) > isize::MAX as usize {
686            return unsupported_zip_error("Oversized central directory");
687        }
688
689        let mut files = Vec::with_capacity(file_capacity);
690        reader.seek(SeekFrom::Start(dir_info.directory_start))?;
691        for _ in 0..dir_info.number_of_files {
692            let file = central_header_to_zip_file(reader, &dir_info)?;
693            files.push(file);
694        }
695
696        Ok(SharedBuilder {
697            files,
698            offset: dir_info.archive_offset,
699            dir_start: dir_info.directory_start,
700            config,
701        })
702    }
703
704    /// Returns the verification value and salt for the AES encryption of the file
705    ///
706    /// It fails if the file number is invalid.
707    ///
708    /// # Returns
709    ///
710    /// - None if the file is not encrypted with AES
711    #[cfg(feature = "aes-crypto")]
712    pub fn get_aes_verification_key_and_salt(
713        &mut self,
714        file_number: usize,
715    ) -> ZipResult<Option<AesInfo>> {
716        let (_, data) = self
717            .shared
718            .files
719            .get_index(file_number)
720            .ok_or(ZipError::FileNotFound)?;
721
722        let limit_reader = find_content(data, &mut self.reader)?;
723        match data.aes_mode {
724            None => Ok(None),
725            Some((aes_mode, _, _)) => {
726                let (verification_value, salt) =
727                    AesReader::new(limit_reader, aes_mode, data.compressed_size)
728                        .get_verification_value_and_salt()?;
729                let aes_info = AesInfo {
730                    aes_mode,
731                    verification_value,
732                    salt,
733                };
734                Ok(Some(aes_info))
735            }
736        }
737    }
738
739    /// Read a ZIP archive, collecting the files it contains.
740    ///
741    /// This uses the central directory record of the ZIP file, and ignores local file headers.
742    ///
743    /// A default [`Config`] is used.
744    pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
745        Self::with_config(Default::default(), reader)
746    }
747
748    /// Read a ZIP archive providing a read configuration, collecting the files it contains.
749    ///
750    /// This uses the central directory record of the ZIP file, and ignores local file headers.
751    pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
752        let shared = Self::get_metadata(config, &mut reader)?;
753
754        Ok(ZipArchive {
755            reader,
756            shared: shared.into(),
757        })
758    }
759
760    /// Extract a Zip archive into a directory, overwriting files if they
761    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. Symbolic links are only
762    /// created and followed if the target is within the destination directory (this is checked
763    /// conservatively using [`std::fs::canonicalize`]).
764    ///
765    /// Extraction is not atomic. If an error is encountered, some of the files
766    /// may be left on disk. However, on Unix targets, no newly-created directories with part but
767    /// not all of their contents extracted will be readable, writable or usable as process working
768    /// directories by any non-root user except you.
769    ///
770    /// On Unix and Windows, symbolic links are extracted correctly. On other platforms such as
771    /// WebAssembly, symbolic links aren't supported, so they're extracted as normal files
772    /// containing the target path in UTF-8.
773    pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
774        self.extract_internal(directory, None::<fn(&Path) -> bool>)
775    }
776
777    /// Extracts a Zip archive into a directory in the same fashion as
778    /// [`ZipArchive::extract`], but detects a "root" directory in the archive
779    /// (a single top-level directory that contains the rest of the archive's
780    /// entries) and extracts its contents directly.
781    ///
782    /// For a sensible default `filter`, you can use [`root_dir_common_filter`].
783    /// For a custom `filter`, see [`RootDirFilter`].
784    ///
785    /// See [`ZipArchive::root_dir`] for more information on how the root
786    /// directory is detected and the meaning of the `filter` parameter.
787    ///
788    /// ## Example
789    ///
790    /// Imagine a Zip archive with the following structure:
791    ///
792    /// ```text
793    /// root/file1.txt
794    /// root/file2.txt
795    /// root/sub/file3.txt
796    /// root/sub/subsub/file4.txt
797    /// ```
798    ///
799    /// If the archive is extracted to `foo` using [`ZipArchive::extract`],
800    /// the resulting directory structure will be:
801    ///
802    /// ```text
803    /// foo/root/file1.txt
804    /// foo/root/file2.txt
805    /// foo/root/sub/file3.txt
806    /// foo/root/sub/subsub/file4.txt
807    /// ```
808    ///
809    /// If the archive is extracted to `foo` using
810    /// [`ZipArchive::extract_unwrapped_root_dir`], the resulting directory
811    /// structure will be:
812    ///
813    /// ```text
814    /// foo/file1.txt
815    /// foo/file2.txt
816    /// foo/sub/file3.txt
817    /// foo/sub/subsub/file4.txt
818    /// ```
819    ///
820    /// ## Example - No Root Directory
821    ///
822    /// Imagine a Zip archive with the following structure:
823    ///
824    /// ```text
825    /// root/file1.txt
826    /// root/file2.txt
827    /// root/sub/file3.txt
828    /// root/sub/subsub/file4.txt
829    /// other/file5.txt
830    /// ```
831    ///
832    /// Due to the presence of the `other` directory,
833    /// [`ZipArchive::extract_unwrapped_root_dir`] will extract this in the same
834    /// fashion as [`ZipArchive::extract`] as there is now no "root directory."
835    pub fn extract_unwrapped_root_dir<P: AsRef<Path>>(
836        &mut self,
837        directory: P,
838        root_dir_filter: impl RootDirFilter,
839    ) -> ZipResult<()> {
840        self.extract_internal(directory, Some(root_dir_filter))
841    }
842
843    fn extract_internal<P: AsRef<Path>>(
844        &mut self,
845        directory: P,
846        root_dir_filter: Option<impl RootDirFilter>,
847    ) -> ZipResult<()> {
848        use std::fs;
849
850        create_dir_all(&directory)?;
851        let directory = directory.as_ref().canonicalize()?;
852
853        let root_dir = root_dir_filter
854            .and_then(|filter| {
855                self.root_dir(&filter)
856                    .transpose()
857                    .map(|root_dir| root_dir.map(|root_dir| (root_dir, filter)))
858            })
859            .transpose()?;
860
861        // If we have a root dir, simplify the path components to be more
862        // appropriate for passing to `safe_prepare_path`
863        let root_dir = root_dir
864            .as_ref()
865            .map(|(root_dir, filter)| {
866                crate::path::simplified_components(root_dir)
867                    .ok_or_else(|| {
868                        // Should be unreachable
869                        debug_assert!(false, "Invalid root dir path");
870
871                        invalid!("Invalid root dir path")
872                    })
873                    .map(|root_dir| (root_dir, filter))
874            })
875            .transpose()?;
876
877        #[cfg(unix)]
878        let mut files_by_unix_mode = Vec::new();
879
880        for i in 0..self.len() {
881            let mut file = self.by_index(i)?;
882
883            let mut outpath = directory.clone();
884            file.safe_prepare_path(directory.as_ref(), &mut outpath, root_dir.as_ref())?;
885
886            let symlink_target = if file.is_symlink() && (cfg!(unix) || cfg!(windows)) {
887                let mut target = Vec::with_capacity(file.size() as usize);
888                file.read_to_end(&mut target)?;
889                Some(target)
890            } else {
891                if file.is_dir() {
892                    crate::read::make_writable_dir_all(&outpath)?;
893                    continue;
894                }
895                None
896            };
897
898            drop(file);
899
900            if let Some(target) = symlink_target {
901                make_symlink(&outpath, &target, &self.shared.files)?;
902                continue;
903            }
904            let mut file = self.by_index(i)?;
905            let mut outfile = fs::File::create(&outpath)?;
906            io::copy(&mut file, &mut outfile)?;
907            #[cfg(unix)]
908            {
909                // Check for real permissions, which we'll set in a second pass
910                if let Some(mode) = file.unix_mode() {
911                    files_by_unix_mode.push((outpath.clone(), mode));
912                }
913            }
914        }
915        #[cfg(unix)]
916        {
917            use std::cmp::Reverse;
918            use std::os::unix::fs::PermissionsExt;
919
920            if files_by_unix_mode.len() > 1 {
921                // Ensure we update children's permissions before making a parent unwritable
922                files_by_unix_mode.sort_by_key(|(path, _)| Reverse(path.clone()));
923            }
924            for (path, mode) in files_by_unix_mode.into_iter() {
925                fs::set_permissions(&path, fs::Permissions::from_mode(mode))?;
926            }
927        }
928        Ok(())
929    }
930
931    /// Number of files contained in this zip.
932    pub fn len(&self) -> usize {
933        self.shared.files.len()
934    }
935
936    /// Get the starting offset of the zip central directory.
937    pub fn central_directory_start(&self) -> u64 {
938        self.shared.dir_start
939    }
940
941    /// Whether this zip archive contains no files
942    pub fn is_empty(&self) -> bool {
943        self.len() == 0
944    }
945
946    /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
947    ///
948    /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
949    /// of that prepended data.
950    pub fn offset(&self) -> u64 {
951        self.shared.offset
952    }
953
954    /// Get the comment of the zip archive.
955    pub fn comment(&self) -> &[u8] {
956        &self.shared.comment
957    }
958
959    /// Get the ZIP64 comment of the zip archive, if it is ZIP64.
960    pub fn zip64_comment(&self) -> Option<&[u8]> {
961        self.shared.zip64_comment.as_deref()
962    }
963
964    /// Returns an iterator over all the file and directory names in this archive.
965    pub fn file_names(&self) -> impl Iterator<Item = &str> {
966        self.shared.files.keys().map(|s| s.as_ref())
967    }
968
969    /// Returns Ok(true) if any compressed data in this archive belongs to more than one file. This
970    /// doesn't make the archive invalid, but some programs will refuse to decompress it because the
971    /// copies would take up space independently in the destination.
972    pub fn has_overlapping_files(&mut self) -> ZipResult<bool> {
973        let mut ranges = Vec::<Range<u64>>::with_capacity(self.shared.files.len());
974        for file in self.shared.files.values() {
975            if file.compressed_size == 0 {
976                continue;
977            }
978            let start = file.data_start(&mut self.reader)?;
979            let end = start + file.compressed_size;
980            if ranges
981                .iter()
982                .any(|range| range.start <= end && start <= range.end)
983            {
984                return Ok(true);
985            }
986            ranges.push(start..end);
987        }
988        Ok(false)
989    }
990
991    /// Search for a file entry by name, decrypt with given password
992    ///
993    /// # Warning
994    ///
995    /// The implementation of the cryptographic algorithms has not
996    /// gone through a correctness review, and you should assume it is insecure:
997    /// passwords used with this API may be compromised.
998    ///
999    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1000    /// to check for a 1/256 chance that the password is correct.
1001    /// There are many passwords out there that will also pass the validity checks
1002    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1003    /// due to its fairly primitive approach to cryptography.
1004    pub fn by_name_decrypt(&mut self, name: &str, password: &[u8]) -> ZipResult<ZipFile<'_, R>> {
1005        self.by_name_with_optional_password(name, Some(password))
1006    }
1007
1008    /// Search for a file entry by name
1009    pub fn by_name(&mut self, name: &str) -> ZipResult<ZipFile<'_, R>> {
1010        self.by_name_with_optional_password(name, None)
1011    }
1012
1013    /// Get the index of a file entry by name, if it's present.
1014    #[inline(always)]
1015    pub fn index_for_name(&self, name: &str) -> Option<usize> {
1016        self.shared.files.get_index_of(name)
1017    }
1018
1019    /// Get the index of a file entry by path, if it's present.
1020    #[inline(always)]
1021    pub fn index_for_path<T: AsRef<Path>>(&self, path: T) -> Option<usize> {
1022        self.index_for_name(&path_to_string(path))
1023    }
1024
1025    /// Get the name of a file entry, if it's present.
1026    #[inline(always)]
1027    pub fn name_for_index(&self, index: usize) -> Option<&str> {
1028        self.shared
1029            .files
1030            .get_index(index)
1031            .map(|(name, _)| name.as_ref())
1032    }
1033
1034    /// Search for a file entry by name and return a seekable object.
1035    pub fn by_name_seek(&mut self, name: &str) -> ZipResult<ZipFileSeek<'_, R>> {
1036        self.by_index_seek(self.index_for_name(name).ok_or(ZipError::FileNotFound)?)
1037    }
1038
1039    /// Search for a file entry by index and return a seekable object.
1040    pub fn by_index_seek(&mut self, index: usize) -> ZipResult<ZipFileSeek<'_, R>> {
1041        let reader = &mut self.reader;
1042        self.shared
1043            .files
1044            .get_index(index)
1045            .ok_or(ZipError::FileNotFound)
1046            .and_then(move |(_, data)| {
1047                let seek_reader = match data.compression_method {
1048                    CompressionMethod::Stored => {
1049                        ZipFileSeekReader::Raw(find_content_seek(data, reader)?)
1050                    }
1051                    _ => {
1052                        return Err(ZipError::UnsupportedArchive(
1053                            "Seekable compressed files are not yet supported",
1054                        ))
1055                    }
1056                };
1057                Ok(ZipFileSeek {
1058                    reader: seek_reader,
1059                    data: Cow::Borrowed(data),
1060                })
1061            })
1062    }
1063
1064    fn by_name_with_optional_password<'a>(
1065        &'a mut self,
1066        name: &str,
1067        password: Option<&[u8]>,
1068    ) -> ZipResult<ZipFile<'a, R>> {
1069        let Some(index) = self.shared.files.get_index_of(name) else {
1070            return Err(ZipError::FileNotFound);
1071        };
1072        self.by_index_with_optional_password(index, password)
1073    }
1074
1075    /// Get a contained file by index, decrypt with given password
1076    ///
1077    /// # Warning
1078    ///
1079    /// The implementation of the cryptographic algorithms has not
1080    /// gone through a correctness review, and you should assume it is insecure:
1081    /// passwords used with this API may be compromised.
1082    ///
1083    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1084    /// to check for a 1/256 chance that the password is correct.
1085    /// There are many passwords out there that will also pass the validity checks
1086    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1087    /// due to its fairly primitive approach to cryptography.
1088    pub fn by_index_decrypt(
1089        &mut self,
1090        file_number: usize,
1091        password: &[u8],
1092    ) -> ZipResult<ZipFile<'_, R>> {
1093        self.by_index_with_optional_password(file_number, Some(password))
1094    }
1095
1096    /// Get a contained file by index
1097    pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_, R>> {
1098        self.by_index_with_optional_password(file_number, None)
1099    }
1100
1101    /// Get a contained file by index without decompressing it
1102    pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_, R>> {
1103        let reader = &mut self.reader;
1104        let (_, data) = self
1105            .shared
1106            .files
1107            .get_index(file_number)
1108            .ok_or(ZipError::FileNotFound)?;
1109        Ok(ZipFile {
1110            reader: ZipFileReader::Raw(find_content(data, reader)?),
1111            data: Cow::Borrowed(data),
1112        })
1113    }
1114
1115    fn by_index_with_optional_password(
1116        &mut self,
1117        file_number: usize,
1118        mut password: Option<&[u8]>,
1119    ) -> ZipResult<ZipFile<'_, R>> {
1120        let (_, data) = self
1121            .shared
1122            .files
1123            .get_index(file_number)
1124            .ok_or(ZipError::FileNotFound)?;
1125
1126        match (password, data.encrypted) {
1127            (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
1128            (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
1129            _ => {}
1130        }
1131        let limit_reader = find_content(data, &mut self.reader)?;
1132
1133        let crypto_reader = make_crypto_reader(data, limit_reader, password, data.aes_mode)?;
1134
1135        Ok(ZipFile {
1136            data: Cow::Borrowed(data),
1137            reader: make_reader(data.compression_method, data.crc32, crypto_reader)?,
1138        })
1139    }
1140
1141    /// Find the "root directory" of an archive if it exists, filtering out
1142    /// irrelevant entries when searching.
1143    ///
1144    /// Our definition of a "root directory" is a single top-level directory
1145    /// that contains the rest of the archive's entries. This is useful for
1146    /// extracting archives that contain a single top-level directory that
1147    /// you want to "unwrap" and extract directly.
1148    ///
1149    /// For a sensible default filter, you can use [`root_dir_common_filter`].
1150    /// For a custom filter, see [`RootDirFilter`].
1151    pub fn root_dir(&self, filter: impl RootDirFilter) -> ZipResult<Option<PathBuf>> {
1152        let mut root_dir: Option<PathBuf> = None;
1153
1154        for i in 0..self.len() {
1155            let (_, file) = self
1156                .shared
1157                .files
1158                .get_index(i)
1159                .ok_or(ZipError::FileNotFound)?;
1160
1161            let path = match file.enclosed_name() {
1162                Some(path) => path,
1163                None => return Ok(None),
1164            };
1165
1166            if !filter(&path) {
1167                continue;
1168            }
1169
1170            macro_rules! replace_root_dir {
1171                ($path:ident) => {
1172                    match &mut root_dir {
1173                        Some(root_dir) => {
1174                            if *root_dir != $path {
1175                                // We've found multiple root directories,
1176                                // abort.
1177                                return Ok(None);
1178                            } else {
1179                                continue;
1180                            }
1181                        }
1182
1183                        None => {
1184                            root_dir = Some($path.into());
1185                            continue;
1186                        }
1187                    }
1188                };
1189            }
1190
1191            // If this entry is located at the root of the archive...
1192            if path.components().count() == 1 {
1193                if file.is_dir() {
1194                    // If it's a directory, it could be the root directory.
1195                    replace_root_dir!(path);
1196                } else {
1197                    // If it's anything else, this archive does not have a
1198                    // root directory.
1199                    return Ok(None);
1200                }
1201            }
1202
1203            // Find the root directory for this entry.
1204            let mut path = path.as_path();
1205            while let Some(parent) = path.parent().filter(|path| *path != Path::new("")) {
1206                path = parent;
1207            }
1208
1209            replace_root_dir!(path);
1210        }
1211
1212        Ok(root_dir)
1213    }
1214
1215    /// Unwrap and return the inner reader object
1216    ///
1217    /// The position of the reader is undefined.
1218    pub fn into_inner(self) -> R {
1219        self.reader
1220    }
1221}
1222
1223/// Holds the AES information of a file in the zip archive
1224#[derive(Debug)]
1225#[cfg(feature = "aes-crypto")]
1226pub struct AesInfo {
1227    /// The AES encryption mode
1228    pub aes_mode: AesMode,
1229    /// The verification key
1230    pub verification_value: [u8; PWD_VERIFY_LENGTH],
1231    /// The salt
1232    pub salt: Vec<u8>,
1233}
1234
1235const fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
1236    Err(ZipError::UnsupportedArchive(detail))
1237}
1238
1239/// Parse a central directory entry to collect the information for the file.
1240pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
1241    reader: &mut R,
1242    central_directory: &CentralDirectoryInfo,
1243) -> ZipResult<ZipFileData> {
1244    let central_header_start = reader.stream_position()?;
1245
1246    // Parse central header
1247    let block = ZipCentralEntryBlock::parse(reader)?;
1248
1249    let file = central_header_to_zip_file_inner(
1250        reader,
1251        central_directory.archive_offset,
1252        central_header_start,
1253        block,
1254    )?;
1255
1256    let central_header_end = reader.stream_position()?;
1257
1258    if file.header_start >= central_directory.directory_start {
1259        return Err(invalid!(
1260            "A local file entry can't start after the central directory"
1261        ));
1262    }
1263
1264    reader.seek(SeekFrom::Start(central_header_end))?;
1265    Ok(file)
1266}
1267
1268#[inline]
1269fn read_variable_length_byte_field<R: Read>(reader: &mut R, len: usize) -> io::Result<Box<[u8]>> {
1270    let mut data = vec![0; len].into_boxed_slice();
1271    reader.read_exact(&mut data)?;
1272    Ok(data)
1273}
1274
1275/// Parse a central directory entry to collect the information for the file.
1276fn central_header_to_zip_file_inner<R: Read>(
1277    reader: &mut R,
1278    archive_offset: u64,
1279    central_header_start: u64,
1280    block: ZipCentralEntryBlock,
1281) -> ZipResult<ZipFileData> {
1282    let ZipCentralEntryBlock {
1283        // magic,
1284        version_made_by,
1285        // version_to_extract,
1286        flags,
1287        compression_method,
1288        last_mod_time,
1289        last_mod_date,
1290        crc32,
1291        compressed_size,
1292        uncompressed_size,
1293        file_name_length,
1294        extra_field_length,
1295        file_comment_length,
1296        // disk_number,
1297        // internal_file_attributes,
1298        external_file_attributes,
1299        offset,
1300        ..
1301    } = block;
1302
1303    let encrypted = flags & 1 == 1;
1304    let is_utf8 = flags & (1 << 11) != 0;
1305    let using_data_descriptor = flags & (1 << 3) != 0;
1306
1307    let file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?;
1308    let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?;
1309    let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?;
1310    let file_name: Box<str> = match is_utf8 {
1311        true => String::from_utf8_lossy(&file_name_raw).into(),
1312        false => file_name_raw.clone().from_cp437(),
1313    };
1314    let file_comment: Box<str> = match is_utf8 {
1315        true => String::from_utf8_lossy(&file_comment_raw).into(),
1316        false => file_comment_raw.from_cp437(),
1317    };
1318
1319    // Construct the result
1320    let mut result = ZipFileData {
1321        system: System::from((version_made_by >> 8) as u8),
1322        /* NB: this strips the top 8 bits! */
1323        version_made_by: version_made_by as u8,
1324        encrypted,
1325        using_data_descriptor,
1326        is_utf8,
1327        compression_method: CompressionMethod::parse_from_u16(compression_method),
1328        compression_level: None,
1329        last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
1330        crc32,
1331        compressed_size: compressed_size.into(),
1332        uncompressed_size: uncompressed_size.into(),
1333        file_name,
1334        file_name_raw,
1335        extra_field: Some(Arc::new(extra_field.to_vec())),
1336        central_extra_field: None,
1337        file_comment,
1338        header_start: offset.into(),
1339        extra_data_start: None,
1340        central_header_start,
1341        data_start: OnceLock::new(),
1342        external_attributes: external_file_attributes,
1343        large_file: false,
1344        aes_mode: None,
1345        aes_extra_data_start: 0,
1346        extra_fields: Vec::new(),
1347    };
1348    match parse_extra_field(&mut result) {
1349        Ok(stripped_extra_field) => {
1350            result.extra_field = stripped_extra_field;
1351        }
1352        Err(ZipError::Io(..)) => {}
1353        Err(e) => return Err(e),
1354    }
1355
1356    let aes_enabled = result.compression_method == CompressionMethod::AES;
1357    if aes_enabled && result.aes_mode.is_none() {
1358        return Err(invalid!("AES encryption without AES extra data field"));
1359    }
1360
1361    // Account for shifted zip offsets.
1362    result.header_start = result
1363        .header_start
1364        .checked_add(archive_offset)
1365        .ok_or(invalid!("Archive header is too large"))?;
1366
1367    Ok(result)
1368}
1369
1370pub(crate) fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<Option<Arc<Vec<u8>>>> {
1371    let Some(ref extra_field) = file.extra_field else {
1372        return Ok(None);
1373    };
1374    let extra_field = extra_field.clone();
1375    let mut processed_extra_field = extra_field.clone();
1376    let len = extra_field.len();
1377    let mut reader = io::Cursor::new(&**extra_field);
1378
1379    /* TODO: codify this structure into Zip64ExtraFieldBlock fields! */
1380    let mut position = reader.position() as usize;
1381    while (position) < len {
1382        let old_position = position;
1383        let remove = parse_single_extra_field(file, &mut reader, position as u64, false)?;
1384        position = reader.position() as usize;
1385        if remove {
1386            let remaining = len - (position - old_position);
1387            if remaining == 0 {
1388                return Ok(None);
1389            }
1390            let mut new_extra_field = Vec::with_capacity(remaining);
1391            new_extra_field.extend_from_slice(&extra_field[0..old_position]);
1392            new_extra_field.extend_from_slice(&extra_field[position..]);
1393            processed_extra_field = Arc::new(new_extra_field);
1394        }
1395    }
1396    Ok(Some(processed_extra_field))
1397}
1398
1399pub(crate) fn parse_single_extra_field<R: Read>(
1400    file: &mut ZipFileData,
1401    reader: &mut R,
1402    bytes_already_read: u64,
1403    disallow_zip64: bool,
1404) -> ZipResult<bool> {
1405    let kind = reader.read_u16_le()?;
1406    let len = reader.read_u16_le()?;
1407    match kind {
1408        // Zip64 extended information extra field
1409        0x0001 => {
1410            if disallow_zip64 {
1411                return Err(invalid!("Can't write a custom field using the ZIP64 ID"));
1412            }
1413            file.large_file = true;
1414            let mut consumed_len = 0;
1415            if len >= 24 || file.uncompressed_size == spec::ZIP64_BYTES_THR {
1416                file.uncompressed_size = reader.read_u64_le()?;
1417                consumed_len += size_of::<u64>();
1418            }
1419            if len >= 24 || file.compressed_size == spec::ZIP64_BYTES_THR {
1420                file.compressed_size = reader.read_u64_le()?;
1421                consumed_len += size_of::<u64>();
1422            }
1423            if len >= 24 || file.header_start == spec::ZIP64_BYTES_THR {
1424                file.header_start = reader.read_u64_le()?;
1425                consumed_len += size_of::<u64>();
1426            }
1427            let Some(leftover_len) = (len as usize).checked_sub(consumed_len) else {
1428                return Err(invalid!("ZIP64 extra-data field is the wrong length"));
1429            };
1430            reader.read_exact(&mut vec![0u8; leftover_len])?;
1431            return Ok(true);
1432        }
1433        0x000a => {
1434            // NTFS extra field
1435            file.extra_fields
1436                .push(ExtraField::Ntfs(Ntfs::try_from_reader(reader, len)?));
1437        }
1438        0x9901 => {
1439            // AES
1440            if len != 7 {
1441                return Err(ZipError::UnsupportedArchive(
1442                    "AES extra data field has an unsupported length",
1443                ));
1444            }
1445            let vendor_version = reader.read_u16_le()?;
1446            let vendor_id = reader.read_u16_le()?;
1447            let mut out = [0u8];
1448            reader.read_exact(&mut out)?;
1449            let aes_mode = out[0];
1450            let compression_method = CompressionMethod::parse_from_u16(reader.read_u16_le()?);
1451
1452            if vendor_id != 0x4541 {
1453                return Err(invalid!("Invalid AES vendor"));
1454            }
1455            let vendor_version = match vendor_version {
1456                0x0001 => AesVendorVersion::Ae1,
1457                0x0002 => AesVendorVersion::Ae2,
1458                _ => return Err(invalid!("Invalid AES vendor version")),
1459            };
1460            match aes_mode {
1461                0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version, compression_method)),
1462                0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version, compression_method)),
1463                0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version, compression_method)),
1464                _ => return Err(invalid!("Invalid AES encryption strength")),
1465            };
1466            file.compression_method = compression_method;
1467            file.aes_extra_data_start = bytes_already_read;
1468        }
1469        0x5455 => {
1470            // extended timestamp
1471            // https://libzip.org/specifications/extrafld.txt
1472
1473            file.extra_fields.push(ExtraField::ExtendedTimestamp(
1474                ExtendedTimestamp::try_from_reader(reader, len)?,
1475            ));
1476        }
1477        0x6375 => {
1478            // Info-ZIP Unicode Comment Extra Field
1479            // APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
1480            file.file_comment = String::from_utf8(
1481                UnicodeExtraField::try_from_reader(reader, len)?
1482                    .unwrap_valid(file.file_comment.as_bytes())?
1483                    .into_vec(),
1484            )?
1485            .into();
1486        }
1487        0x7075 => {
1488            // Info-ZIP Unicode Path Extra Field
1489            // APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
1490            file.file_name_raw = UnicodeExtraField::try_from_reader(reader, len)?
1491                .unwrap_valid(&file.file_name_raw)?;
1492            file.file_name =
1493                String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
1494            file.is_utf8 = true;
1495        }
1496        _ => {
1497            reader.read_exact(&mut vec![0u8; len as usize])?;
1498            // Other fields are ignored
1499        }
1500    }
1501    Ok(false)
1502}
1503
1504/// A trait for exposing file metadata inside the zip.
1505pub trait HasZipMetadata {
1506    /// Get the file metadata
1507    fn get_metadata(&self) -> &ZipFileData;
1508}
1509
1510/// Methods for retrieving information on zip files
1511impl<'a, R: Read> ZipFile<'a, R> {
1512    pub(crate) fn take_raw_reader(&mut self) -> io::Result<io::Take<&'a mut R>> {
1513        mem::replace(&mut self.reader, ZipFileReader::NoReader).into_inner()
1514    }
1515
1516    /// Get the version of the file
1517    pub fn version_made_by(&self) -> (u8, u8) {
1518        (
1519            self.get_metadata().version_made_by / 10,
1520            self.get_metadata().version_made_by % 10,
1521        )
1522    }
1523
1524    /// Get the name of the file
1525    ///
1526    /// # Warnings
1527    ///
1528    /// It is dangerous to use this name directly when extracting an archive.
1529    /// It may contain an absolute path (`/etc/shadow`), or break out of the
1530    /// current directory (`../runtime`). Carelessly writing to these paths
1531    /// allows an attacker to craft a ZIP archive that will overwrite critical
1532    /// files.
1533    ///
1534    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
1535    /// as a safe path.
1536    pub fn name(&self) -> &str {
1537        &self.get_metadata().file_name
1538    }
1539
1540    /// Get the name of the file, in the raw (internal) byte representation.
1541    ///
1542    /// The encoding of this data is currently undefined.
1543    pub fn name_raw(&self) -> &[u8] {
1544        &self.get_metadata().file_name_raw
1545    }
1546
1547    /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
1548    /// removes a leading '/' and removes '..' parts.
1549    #[deprecated(
1550        since = "0.5.7",
1551        note = "by stripping `..`s from the path, the meaning of paths can change.
1552                `mangled_name` can be used if this behaviour is desirable"
1553    )]
1554    pub fn sanitized_name(&self) -> PathBuf {
1555        self.mangled_name()
1556    }
1557
1558    /// Rewrite the path, ignoring any path components with special meaning.
1559    ///
1560    /// - Absolute paths are made relative
1561    /// - [`ParentDir`]s are ignored
1562    /// - Truncates the filename at a NULL byte
1563    ///
1564    /// This is appropriate if you need to be able to extract *something* from
1565    /// any archive, but will easily misrepresent trivial paths like
1566    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
1567    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
1568    ///
1569    /// [`ParentDir`]: `PathBuf::Component::ParentDir`
1570    pub fn mangled_name(&self) -> PathBuf {
1571        self.get_metadata().file_name_sanitized()
1572    }
1573
1574    /// Ensure the file path is safe to use as a [`Path`].
1575    ///
1576    /// - It can't contain NULL bytes
1577    /// - It can't resolve to a path outside the current directory
1578    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
1579    /// - It can't be an absolute path
1580    ///
1581    /// This will read well-formed ZIP files correctly, and is resistant
1582    /// to path-based exploits. It is recommended over
1583    /// [`ZipFile::mangled_name`].
1584    pub fn enclosed_name(&self) -> Option<PathBuf> {
1585        self.get_metadata().enclosed_name()
1586    }
1587
1588    pub(crate) fn simplified_components(&self) -> Option<Vec<&OsStr>> {
1589        self.get_metadata().simplified_components()
1590    }
1591
1592    /// Prepare the path for extraction by creating necessary missing directories and checking for symlinks to be contained within the base path.
1593    ///
1594    /// `base_path` parameter is assumed to be canonicalized.
1595    pub(crate) fn safe_prepare_path(
1596        &self,
1597        base_path: &Path,
1598        outpath: &mut PathBuf,
1599        root_dir: Option<&(Vec<&OsStr>, impl RootDirFilter)>,
1600    ) -> ZipResult<()> {
1601        let components = self
1602            .simplified_components()
1603            .ok_or(invalid!("Invalid file path"))?;
1604
1605        let components = match root_dir {
1606            Some((root_dir, filter)) => match components.strip_prefix(&**root_dir) {
1607                Some(components) => components,
1608
1609                // In this case, we expect that the file was not in the root
1610                // directory, but was filtered out when searching for the
1611                // root directory.
1612                None => {
1613                    // We could technically find ourselves at this code
1614                    // path if the user provides an unstable or
1615                    // non-deterministic `filter` function.
1616                    //
1617                    // If debug assertions are on, we should panic here.
1618                    // Otherwise, the safest thing to do here is to just
1619                    // extract as-is.
1620                    debug_assert!(
1621                        !filter(&PathBuf::from_iter(components.iter())),
1622                        "Root directory filter should not match at this point"
1623                    );
1624
1625                    // Extract as-is.
1626                    &components[..]
1627                }
1628            },
1629
1630            None => &components[..],
1631        };
1632
1633        let components_len = components.len();
1634
1635        for (is_last, component) in components
1636            .iter()
1637            .copied()
1638            .enumerate()
1639            .map(|(i, c)| (i == components_len - 1, c))
1640        {
1641            // we can skip the target directory itself because the base path is assumed to be "trusted" (if the user say extract to a symlink we can follow it)
1642            outpath.push(component);
1643
1644            // check if the path is a symlink, the target must be _inherently_ within the directory
1645            for limit in (0..5u8).rev() {
1646                let meta = match std::fs::symlink_metadata(&outpath) {
1647                    Ok(meta) => meta,
1648                    Err(e) if e.kind() == io::ErrorKind::NotFound => {
1649                        if !is_last {
1650                            crate::read::make_writable_dir_all(&outpath)?;
1651                        }
1652                        break;
1653                    }
1654                    Err(e) => return Err(e.into()),
1655                };
1656
1657                if !meta.is_symlink() {
1658                    break;
1659                }
1660
1661                if limit == 0 {
1662                    return Err(invalid!("Extraction followed a symlink too deep"));
1663                }
1664
1665                // note that we cannot accept links that do not inherently resolve to a path inside the directory to prevent:
1666                // - disclosure of unrelated path exists (no check for a path exist and then ../ out)
1667                // - issues with file-system specific path resolution (case sensitivity, etc)
1668                let target = std::fs::read_link(&outpath)?;
1669
1670                if !crate::path::simplified_components(&target)
1671                    .ok_or(invalid!("Invalid symlink target path"))?
1672                    .starts_with(
1673                        &crate::path::simplified_components(base_path)
1674                            .ok_or(invalid!("Invalid base path"))?,
1675                    )
1676                {
1677                    let is_absolute_enclosed = base_path
1678                        .components()
1679                        .map(Some)
1680                        .chain(std::iter::once(None))
1681                        .zip(target.components().map(Some).chain(std::iter::repeat(None)))
1682                        .all(|(a, b)| match (a, b) {
1683                            // both components are normal
1684                            (Some(Component::Normal(a)), Some(Component::Normal(b))) => a == b,
1685                            // both components consumed fully
1686                            (None, None) => true,
1687                            // target consumed fully but base path is not
1688                            (Some(_), None) => false,
1689                            // base path consumed fully but target is not (and normal)
1690                            (None, Some(Component::CurDir | Component::Normal(_))) => true,
1691                            _ => false,
1692                        });
1693
1694                    if !is_absolute_enclosed {
1695                        return Err(invalid!("Symlink is not inherently safe"));
1696                    }
1697                }
1698
1699                outpath.push(target);
1700            }
1701        }
1702        Ok(())
1703    }
1704
1705    /// Get the comment of the file
1706    pub fn comment(&self) -> &str {
1707        &self.get_metadata().file_comment
1708    }
1709
1710    /// Get the compression method used to store the file
1711    pub fn compression(&self) -> CompressionMethod {
1712        self.get_metadata().compression_method
1713    }
1714
1715    /// Get if the files is encrypted or not
1716    pub fn encrypted(&self) -> bool {
1717        self.data.encrypted
1718    }
1719
1720    /// Get the size of the file, in bytes, in the archive
1721    pub fn compressed_size(&self) -> u64 {
1722        self.get_metadata().compressed_size
1723    }
1724
1725    /// Get the size of the file, in bytes, when uncompressed
1726    pub fn size(&self) -> u64 {
1727        self.get_metadata().uncompressed_size
1728    }
1729
1730    /// Get the time the file was last modified
1731    pub fn last_modified(&self) -> Option<DateTime> {
1732        self.data.last_modified_time
1733    }
1734    /// Returns whether the file is actually a directory
1735    pub fn is_dir(&self) -> bool {
1736        is_dir(self.name())
1737    }
1738
1739    /// Returns whether the file is actually a symbolic link
1740    pub fn is_symlink(&self) -> bool {
1741        self.unix_mode()
1742            .is_some_and(|mode| mode & S_IFLNK == S_IFLNK)
1743    }
1744
1745    /// Returns whether the file is a normal file (i.e. not a directory or symlink)
1746    pub fn is_file(&self) -> bool {
1747        !self.is_dir() && !self.is_symlink()
1748    }
1749
1750    /// Get unix mode for the file
1751    pub fn unix_mode(&self) -> Option<u32> {
1752        self.get_metadata().unix_mode()
1753    }
1754
1755    /// Get the CRC32 hash of the original file
1756    pub fn crc32(&self) -> u32 {
1757        self.get_metadata().crc32
1758    }
1759
1760    /// Get the extra data of the zip header for this file
1761    pub fn extra_data(&self) -> Option<&[u8]> {
1762        self.get_metadata()
1763            .extra_field
1764            .as_ref()
1765            .map(|v| v.deref().deref())
1766    }
1767
1768    /// Get the starting offset of the data of the compressed file
1769    pub fn data_start(&self) -> u64 {
1770        *self.data.data_start.get().unwrap()
1771    }
1772
1773    /// Get the starting offset of the zip header for this file
1774    pub fn header_start(&self) -> u64 {
1775        self.get_metadata().header_start
1776    }
1777    /// Get the starting offset of the zip header in the central directory for this file
1778    pub fn central_header_start(&self) -> u64 {
1779        self.get_metadata().central_header_start
1780    }
1781
1782    /// Get the [`SimpleFileOptions`] that would be used to write this file to
1783    /// a new zip archive.
1784    pub fn options(&self) -> SimpleFileOptions {
1785        let mut options = SimpleFileOptions::default()
1786            .large_file(self.compressed_size().max(self.size()) > ZIP64_BYTES_THR)
1787            .compression_method(self.compression())
1788            .unix_permissions(self.unix_mode().unwrap_or(0o644) | S_IFREG)
1789            .last_modified_time(
1790                self.last_modified()
1791                    .filter(|m| m.is_valid())
1792                    .unwrap_or_else(DateTime::default_for_write),
1793            );
1794
1795        options.normalize();
1796        options
1797    }
1798}
1799
1800/// Methods for retrieving information on zip files
1801impl<R: Read> ZipFile<'_, R> {
1802    /// iterate through all extra fields
1803    pub fn extra_data_fields(&self) -> impl Iterator<Item = &ExtraField> {
1804        self.data.extra_fields.iter()
1805    }
1806}
1807
1808impl<R: Read> HasZipMetadata for ZipFile<'_, R> {
1809    fn get_metadata(&self) -> &ZipFileData {
1810        self.data.as_ref()
1811    }
1812}
1813
1814impl<R: Read> Read for ZipFile<'_, R> {
1815    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1816        self.reader.read(buf)
1817    }
1818
1819    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
1820        self.reader.read_exact(buf)
1821    }
1822
1823    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
1824        self.reader.read_to_end(buf)
1825    }
1826
1827    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
1828        self.reader.read_to_string(buf)
1829    }
1830}
1831
1832impl<R: Read> Read for ZipFileSeek<'_, R> {
1833    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1834        match &mut self.reader {
1835            ZipFileSeekReader::Raw(r) => r.read(buf),
1836        }
1837    }
1838}
1839
1840impl<R: Seek> Seek for ZipFileSeek<'_, R> {
1841    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
1842        match &mut self.reader {
1843            ZipFileSeekReader::Raw(r) => r.seek(pos),
1844        }
1845    }
1846}
1847
1848impl<R> HasZipMetadata for ZipFileSeek<'_, R> {
1849    fn get_metadata(&self) -> &ZipFileData {
1850        self.data.as_ref()
1851    }
1852}
1853
1854impl<R: Read> Drop for ZipFile<'_, R> {
1855    fn drop(&mut self) {
1856        // self.data is Owned, this reader is constructed by a streaming reader.
1857        // In this case, we want to exhaust the reader so that the next file is accessible.
1858        if let Cow::Owned(_) = self.data {
1859            // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
1860            if let Ok(mut inner) = self.take_raw_reader() {
1861                let _ = copy(&mut inner, &mut sink());
1862            }
1863        }
1864    }
1865}
1866
1867/// Read ZipFile structures from a non-seekable reader.
1868///
1869/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
1870/// as some information will be missing when reading this manner.
1871///
1872/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
1873/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
1874/// is encountered. No more files should be read after this.
1875///
1876/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
1877/// the structure is done.
1878///
1879/// Missing fields are:
1880/// * `comment`: set to an empty string
1881/// * `data_start`: set to 0
1882/// * `external_attributes`: `unix_mode()`: will return None
1883pub fn read_zipfile_from_stream<R: Read>(reader: &mut R) -> ZipResult<Option<ZipFile<'_, R>>> {
1884    // We can't use the typical ::parse() method, as we follow separate code paths depending on the
1885    // "magic" value (since the magic value will be from the central directory header if we've
1886    // finished iterating over all the actual files).
1887    /* TODO: smallvec? */
1888
1889    let mut block = ZipLocalEntryBlock::zeroed();
1890    reader.read_exact(block.as_bytes_mut())?;
1891
1892    match block.magic().from_le() {
1893        spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (),
1894        spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
1895        _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR),
1896    }
1897
1898    let block = block.from_le();
1899
1900    let mut result = ZipFileData::from_local_block(block, reader)?;
1901
1902    match parse_extra_field(&mut result) {
1903        Ok(..) | Err(ZipError::Io(..)) => {}
1904        Err(e) => return Err(e),
1905    }
1906
1907    let limit_reader = reader.take(result.compressed_size);
1908
1909    let result_crc32 = result.crc32;
1910    let result_compression_method = result.compression_method;
1911    let crypto_reader = make_crypto_reader(&result, limit_reader, None, None)?;
1912
1913    Ok(Some(ZipFile {
1914        data: Cow::Owned(result),
1915        reader: make_reader(result_compression_method, result_crc32, crypto_reader)?,
1916    }))
1917}
1918
1919/// A filter that determines whether an entry should be ignored when searching
1920/// for the root directory of a Zip archive.
1921///
1922/// Returns `true` if the entry should be considered, and `false` if it should
1923/// be ignored.
1924///
1925/// See [`root_dir_common_filter`] for a sensible default filter.
1926pub trait RootDirFilter: Fn(&Path) -> bool {}
1927impl<F: Fn(&Path) -> bool> RootDirFilter for F {}
1928
1929/// Common filters when finding the root directory of a Zip archive.
1930///
1931/// This filter is a sensible default for most use cases and filters out common
1932/// system files that are usually irrelevant to the contents of the archive.
1933///
1934/// Currently, the filter ignores:
1935/// - `/__MACOSX/`
1936/// - `/.DS_Store`
1937/// - `/Thumbs.db`
1938///
1939/// **This function is not guaranteed to be stable and may change in future versions.**
1940///
1941/// # Example
1942///
1943/// ```rust
1944/// # use std::path::Path;
1945/// assert!(zip::read::root_dir_common_filter(Path::new("foo.txt")));
1946/// assert!(!zip::read::root_dir_common_filter(Path::new(".DS_Store")));
1947/// assert!(!zip::read::root_dir_common_filter(Path::new("Thumbs.db")));
1948/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX")));
1949/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX/foo.txt")));
1950/// ```
1951pub fn root_dir_common_filter(path: &Path) -> bool {
1952    const COMMON_FILTER_ROOT_FILES: &[&str] = &[".DS_Store", "Thumbs.db"];
1953
1954    if path.starts_with("__MACOSX") {
1955        return false;
1956    }
1957
1958    if path.components().count() == 1
1959        && path.file_name().is_some_and(|file_name| {
1960            COMMON_FILTER_ROOT_FILES
1961                .iter()
1962                .map(OsStr::new)
1963                .any(|cmp| cmp == file_name)
1964        })
1965    {
1966        return false;
1967    }
1968
1969    true
1970}
1971
1972#[cfg(test)]
1973mod test {
1974    use crate::result::ZipResult;
1975    use crate::write::SimpleFileOptions;
1976    use crate::CompressionMethod::Stored;
1977    use crate::{ZipArchive, ZipWriter};
1978    use std::io::{Cursor, Read, Write};
1979    use tempfile::TempDir;
1980
1981    #[test]
1982    fn invalid_offset() {
1983        use super::ZipArchive;
1984
1985        let mut v = Vec::new();
1986        v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
1987        let reader = ZipArchive::new(Cursor::new(v));
1988        assert!(reader.is_err());
1989    }
1990
1991    #[test]
1992    fn invalid_offset2() {
1993        use super::ZipArchive;
1994
1995        let mut v = Vec::new();
1996        v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
1997        let reader = ZipArchive::new(Cursor::new(v));
1998        assert!(reader.is_err());
1999    }
2000
2001    #[test]
2002    fn zip64_with_leading_junk() {
2003        use super::ZipArchive;
2004
2005        let mut v = Vec::new();
2006        v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
2007        let reader = ZipArchive::new(Cursor::new(v)).unwrap();
2008        assert_eq!(reader.len(), 1);
2009    }
2010
2011    #[test]
2012    fn zip_contents() {
2013        use super::ZipArchive;
2014
2015        let mut v = Vec::new();
2016        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
2017        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2018        assert_eq!(reader.comment(), b"");
2019        assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
2020    }
2021
2022    #[test]
2023    fn zip_read_streaming() {
2024        use super::read_zipfile_from_stream;
2025
2026        let mut v = Vec::new();
2027        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
2028        let mut reader = Cursor::new(v);
2029        loop {
2030            if read_zipfile_from_stream(&mut reader).unwrap().is_none() {
2031                break;
2032            }
2033        }
2034    }
2035
2036    #[test]
2037    fn zip_clone() {
2038        use super::ZipArchive;
2039        use std::io::Read;
2040
2041        let mut v = Vec::new();
2042        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
2043        let mut reader1 = ZipArchive::new(Cursor::new(v)).unwrap();
2044        let mut reader2 = reader1.clone();
2045
2046        let mut file1 = reader1.by_index(0).unwrap();
2047        let mut file2 = reader2.by_index(0).unwrap();
2048
2049        let t = file1.last_modified().unwrap();
2050        assert_eq!(
2051            (
2052                t.year(),
2053                t.month(),
2054                t.day(),
2055                t.hour(),
2056                t.minute(),
2057                t.second()
2058            ),
2059            (1980, 1, 1, 0, 0, 0)
2060        );
2061
2062        let mut buf1 = [0; 5];
2063        let mut buf2 = [0; 5];
2064        let mut buf3 = [0; 5];
2065        let mut buf4 = [0; 5];
2066
2067        file1.read_exact(&mut buf1).unwrap();
2068        file2.read_exact(&mut buf2).unwrap();
2069        file1.read_exact(&mut buf3).unwrap();
2070        file2.read_exact(&mut buf4).unwrap();
2071
2072        assert_eq!(buf1, buf2);
2073        assert_eq!(buf3, buf4);
2074        assert_ne!(buf1, buf3);
2075    }
2076
2077    #[test]
2078    fn file_and_dir_predicates() {
2079        use super::ZipArchive;
2080
2081        let mut v = Vec::new();
2082        v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
2083        let mut zip = ZipArchive::new(Cursor::new(v)).unwrap();
2084
2085        for i in 0..zip.len() {
2086            let zip_file = zip.by_index(i).unwrap();
2087            let full_name = zip_file.enclosed_name().unwrap();
2088            let file_name = full_name.file_name().unwrap().to_str().unwrap();
2089            assert!(
2090                (file_name.starts_with("dir") && zip_file.is_dir())
2091                    || (file_name.starts_with("file") && zip_file.is_file())
2092            );
2093        }
2094    }
2095
2096    #[test]
2097    fn zip64_magic_in_filenames() {
2098        let files = vec![
2099            include_bytes!("../tests/data/zip64_magic_in_filename_1.zip").to_vec(),
2100            include_bytes!("../tests/data/zip64_magic_in_filename_2.zip").to_vec(),
2101            include_bytes!("../tests/data/zip64_magic_in_filename_3.zip").to_vec(),
2102            include_bytes!("../tests/data/zip64_magic_in_filename_4.zip").to_vec(),
2103            include_bytes!("../tests/data/zip64_magic_in_filename_5.zip").to_vec(),
2104        ];
2105        // Although we don't allow adding files whose names contain the ZIP64 CDB-end or
2106        // CDB-end-locator signatures, we still read them when they aren't genuinely ambiguous.
2107        for file in files {
2108            ZipArchive::new(Cursor::new(file)).unwrap();
2109        }
2110    }
2111
2112    /// test case to ensure we don't preemptively over allocate based on the
2113    /// declared number of files in the CDE of an invalid zip when the number of
2114    /// files declared is more than the alleged offset in the CDE
2115    #[test]
2116    fn invalid_cde_number_of_files_allocation_smaller_offset() {
2117        use super::ZipArchive;
2118
2119        let mut v = Vec::new();
2120        v.extend_from_slice(include_bytes!(
2121            "../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
2122        ));
2123        let reader = ZipArchive::new(Cursor::new(v));
2124        assert!(reader.is_err() || reader.unwrap().is_empty());
2125    }
2126
2127    /// test case to ensure we don't preemptively over allocate based on the
2128    /// declared number of files in the CDE of an invalid zip when the number of
2129    /// files declared is less than the alleged offset in the CDE
2130    #[test]
2131    fn invalid_cde_number_of_files_allocation_greater_offset() {
2132        use super::ZipArchive;
2133
2134        let mut v = Vec::new();
2135        v.extend_from_slice(include_bytes!(
2136            "../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
2137        ));
2138        let reader = ZipArchive::new(Cursor::new(v));
2139        assert!(reader.is_err());
2140    }
2141
2142    #[cfg(feature = "deflate64")]
2143    #[test]
2144    fn deflate64_index_out_of_bounds() -> std::io::Result<()> {
2145        let mut v = Vec::new();
2146        v.extend_from_slice(include_bytes!(
2147            "../tests/data/raw_deflate64_index_out_of_bounds.zip"
2148        ));
2149        let mut reader = ZipArchive::new(Cursor::new(v))?;
2150        std::io::copy(&mut reader.by_index(0)?, &mut std::io::sink()).expect_err("Invalid file");
2151        Ok(())
2152    }
2153
2154    #[cfg(feature = "deflate64")]
2155    #[test]
2156    fn deflate64_not_enough_space() {
2157        let mut v = Vec::new();
2158        v.extend_from_slice(include_bytes!("../tests/data/deflate64_issue_25.zip"));
2159        ZipArchive::new(Cursor::new(v)).expect_err("Invalid file");
2160    }
2161
2162    #[cfg(feature = "deflate-flate2")]
2163    #[test]
2164    fn test_read_with_data_descriptor() {
2165        use std::io::Read;
2166
2167        let mut v = Vec::new();
2168        v.extend_from_slice(include_bytes!("../tests/data/data_descriptor.zip"));
2169        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2170        let mut decompressed = [0u8; 16];
2171        let mut file = reader.by_index(0).unwrap();
2172        assert_eq!(file.read(&mut decompressed).unwrap(), 12);
2173    }
2174
2175    #[test]
2176    fn test_is_symlink() -> std::io::Result<()> {
2177        let mut v = Vec::new();
2178        v.extend_from_slice(include_bytes!("../tests/data/symlink.zip"));
2179        let mut reader = ZipArchive::new(Cursor::new(v))?;
2180        assert!(reader.by_index(0)?.is_symlink());
2181        let tempdir = TempDir::with_prefix("test_is_symlink")?;
2182        reader.extract(&tempdir)?;
2183        assert!(tempdir.path().join("bar").is_symlink());
2184        Ok(())
2185    }
2186
2187    #[test]
2188    #[cfg(feature = "deflate-flate2")]
2189    fn test_utf8_extra_field() {
2190        let mut v = Vec::new();
2191        v.extend_from_slice(include_bytes!("../tests/data/chinese.zip"));
2192        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2193        reader.by_name("七个房间.txt").unwrap();
2194    }
2195
2196    #[test]
2197    fn test_utf8() {
2198        let mut v = Vec::new();
2199        v.extend_from_slice(include_bytes!("../tests/data/linux-7z.zip"));
2200        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2201        reader.by_name("你好.txt").unwrap();
2202    }
2203
2204    #[test]
2205    fn test_utf8_2() {
2206        let mut v = Vec::new();
2207        v.extend_from_slice(include_bytes!("../tests/data/windows-7zip.zip"));
2208        let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
2209        reader.by_name("你好.txt").unwrap();
2210    }
2211
2212    #[test]
2213    fn test_64k_files() -> ZipResult<()> {
2214        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2215        let options = SimpleFileOptions {
2216            compression_method: Stored,
2217            ..Default::default()
2218        };
2219        for i in 0..=u16::MAX {
2220            let file_name = format!("{i}.txt");
2221            writer.start_file(&*file_name, options)?;
2222            writer.write_all(i.to_string().as_bytes())?;
2223        }
2224
2225        let mut reader = ZipArchive::new(writer.finish()?)?;
2226        for i in 0..=u16::MAX {
2227            let expected_name = format!("{i}.txt");
2228            let expected_contents = i.to_string();
2229            let expected_contents = expected_contents.as_bytes();
2230            let mut file = reader.by_name(&expected_name)?;
2231            let mut contents = Vec::with_capacity(expected_contents.len());
2232            file.read_to_end(&mut contents)?;
2233            assert_eq!(contents, expected_contents);
2234            drop(file);
2235            contents.clear();
2236            let mut file = reader.by_index(i as usize)?;
2237            file.read_to_end(&mut contents)?;
2238            assert_eq!(contents, expected_contents);
2239        }
2240        Ok(())
2241    }
2242
2243    /// Symlinks being extracted shouldn't be followed out of the destination directory.
2244    #[test]
2245    fn test_cannot_symlink_outside_destination() -> ZipResult<()> {
2246        use std::fs::create_dir;
2247
2248        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2249        writer.add_symlink("symlink/", "../dest-sibling/", SimpleFileOptions::default())?;
2250        writer.start_file("symlink/dest-file", SimpleFileOptions::default())?;
2251        let mut reader = writer.finish_into_readable()?;
2252        let dest_parent = TempDir::with_prefix("read__test_cannot_symlink_outside_destination")?;
2253        let dest_sibling = dest_parent.path().join("dest-sibling");
2254        create_dir(&dest_sibling)?;
2255        let dest = dest_parent.path().join("dest");
2256        create_dir(&dest)?;
2257        assert!(reader.extract(dest).is_err());
2258        assert!(!dest_sibling.join("dest-file").exists());
2259        Ok(())
2260    }
2261
2262    #[test]
2263    fn test_can_create_destination() -> ZipResult<()> {
2264        let mut v = Vec::new();
2265        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
2266        let mut reader = ZipArchive::new(Cursor::new(v))?;
2267        let dest = TempDir::with_prefix("read__test_can_create_destination")?;
2268        reader.extract(&dest)?;
2269        assert!(dest.path().join("mimetype").exists());
2270        Ok(())
2271    }
2272}