refactor/inline fastzip implementation for patching (upstream issues)

This commit is contained in:
Caelan Sayler
2025-05-18 20:19:59 +01:00
committed by Caelan
parent 8d8c1e198f
commit cf68dcb0ea
20 changed files with 2266 additions and 1029 deletions

975
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -54,7 +54,7 @@ strum = { version = "0.27", features = ["derive"] }
file-rotate = "0.8"
simple-stopwatch = "0.1"
enum-flags = "0.4"
remove_dir_all = { git = "https://github.com/caesay/remove_dir_all.git", features = ["log"] }
remove_dir_all = "1.0"
sha1 = "0.10"
sha2 = "0.10"
sha1_smol = "1.0"
@@ -84,9 +84,12 @@ core-foundation = "0.10"
core-foundation-sys = "0.8"
uuid = { version = "1.13.1", features = ["v4", "fast-rng", "macro-diagnostics"] }
walkdir = "2.5"
mtzip = "=4.0.2"
ripunzip = "=2.0.1"
zerofrom = "=0.1.5"
rayon = "1.6"
progress-streams = "1.1"
flate2 = { version = "1.0", default-features = false }
# mtzip = "=4.0.2"
# ripunzip = "=2.0.1"
# zerofrom = "=0.1.5"
# default to small, optimized workspace release binaries
[profile.release]

View File

@@ -67,9 +67,9 @@ zstd.workspace = true
zip.workspace = true
walkdir.workspace = true
sha1_smol.workspace = true
mtzip.workspace = true
ripunzip.workspace = true
zerofrom.workspace = true
rayon.workspace = true
progress-streams.workspace = true
flate2.workspace = true
[target.'cfg(target_os="linux")'.dependencies]
waitpid-any.workspace = true

View File

@@ -1,12 +1,10 @@
use crate::shared::fastzip;
use anyhow::{anyhow, bail, Result};
use mtzip::level::CompressionLevel;
use ripunzip::{NullProgressReporter, UnzipEngine, UnzipOptions};
use std::{
collections::HashSet,
fs, io,
path::{Path, PathBuf},
};
use walkdir::WalkDir;
pub fn zstd_patch_single<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(old_file: P1, patch_file: P2, output_file: P3) -> Result<()> {
let old_file = old_file.as_ref();
@@ -53,22 +51,6 @@ fn fio_highbit64(v: u64) -> u32 {
return count;
}
fn zip_extract<P1: AsRef<Path>, P2: AsRef<Path>>(archive_file: P1, target_dir: P2) -> Result<()> {
let target_dir = target_dir.as_ref().to_path_buf();
let file = fs::File::open(archive_file)?;
let engine = UnzipEngine::for_file(file)?;
let null_progress = Box::new(NullProgressReporter {});
let options = UnzipOptions {
filename_filter: None,
progress_reporter: null_progress,
output_directory: Some(target_dir),
password: None,
single_threaded: false,
};
engine.unzip(options)?;
Ok(())
}
pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
old_file: P1,
delta_files: Vec<&PathBuf>,
@@ -98,7 +80,7 @@ pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
info!("Extracting base package for delta patching: {}", temp_dir.to_string_lossy());
let work_dir = temp_dir.join("_work");
fs::create_dir_all(&work_dir)?;
zip_extract(&old_file, &work_dir)?;
fastzip::extract_to_directory(&old_file, &work_dir, None)?;
info!("Base package extracted. {} delta packages to apply.", delta_files.len());
@@ -106,9 +88,9 @@ pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
info!("{}: extracting apply delta patch: {}", i, delta_file.to_string_lossy());
let delta_dir = temp_dir.join(format!("delta_{}", i));
fs::create_dir_all(&delta_dir)?;
zip_extract(delta_file, &delta_dir)?;
fastzip::extract_to_directory(&delta_file, &delta_dir, None)?;
let delta_relative_paths = enumerate_files_relative(&delta_dir);
let delta_relative_paths = fastzip::enumerate_files_relative(&delta_dir);
let mut visited_paths = HashSet::new();
// apply all the zsdiff patches for files which exist in both the delta and the base package
@@ -160,7 +142,7 @@ pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
}
// anything in the work dir which was not visited is an old / deleted file and should be removed
let workdir_relative_paths = enumerate_files_relative(&work_dir);
let workdir_relative_paths = fastzip::enumerate_files_relative(&work_dir);
for relative_path in &workdir_relative_paths {
if !visited_paths.contains(relative_path) {
let file_to_delete = work_dir.join(relative_path);
@@ -172,32 +154,12 @@ pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
info!("All delta patches applied. Asembling output package at: {}", output_file.to_string_lossy());
let mut zipper = mtzip::ZipArchive::new();
let workdir_relative_paths = enumerate_files_relative(&work_dir);
for relative_path in &workdir_relative_paths {
zipper
.add_file_from_fs(work_dir.join(&relative_path), relative_path.to_string_lossy().to_string())
.compression_level(CompressionLevel::fast())
.done();
}
let mut file = fs::File::create(&output_file)?;
zipper.write(&mut file)?;
fastzip::compress_directory(&work_dir, &output_file, fastzip::CompressionLevel::fast())?;
info!("Successfully applied {} delta patches in {}s.", delta_files.len(), time.s());
Ok(())
}
fn enumerate_files_relative<P: AsRef<Path>>(dir: P) -> Vec<PathBuf> {
WalkDir::new(&dir)
.follow_links(false)
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.map(|entry| entry.path().strip_prefix(&dir).map(|p| p.to_path_buf()))
.filter_map(|entry| entry.ok())
.collect()
}
// NOTE: this is some code to do checksum verification, but it is not being used
// by the current implementation because zstd patching already has checksum verification
//

View File

@@ -10,46 +10,28 @@ pub fn uninstall(locator: &VelopackLocator, delete_self: bool) -> Result<()> {
let root_path = locator.get_root_dir();
fn _uninstall_impl(locator: &VelopackLocator) -> bool {
let root_path = locator.get_root_dir();
// the real app could be running at the moment
let _ = shared::force_stop_package(&root_path);
// the real app could be running at the moment
let _ = shared::force_stop_package(&root_path);
// run uninstall hook
windows::run_hook(&locator, constants::HOOK_CLI_UNINSTALL, 60);
let mut finished_with_errors = false;
// remove all shortcuts pointing to the app
windows::remove_all_shortcuts_for_root_dir(&root_path);
// run uninstall hook
windows::run_hook(&locator, constants::HOOK_CLI_UNINSTALL, 60);
info!("Removing directory '{}'", root_path.to_string_lossy());
let _ = remove_dir_all::remove_dir_all(&root_path);
// remove all shortcuts pointing to the app
windows::remove_all_shortcuts_for_root_dir(&root_path);
info!("Removing directory '{}'", root_path.to_string_lossy());
if let Err(e) = shared::retry_io(|| remove_dir_all::remove_dir_but_not_self(&root_path)) {
error!("Unable to remove directory, some files may be in use ({}).", e);
finished_with_errors = true;
}
if let Err(e) = windows::registry::remove_uninstall_entry(&locator) {
error!("Unable to remove uninstall registry entry ({}).", e);
// finished_with_errors = true;
}
!finished_with_errors
if let Err(e) = windows::registry::remove_uninstall_entry(&locator) {
error!("Unable to remove uninstall registry entry ({}).", e);
}
// if it returns true, it was a success.
// if it returns false, it was completed with errors which the user should be notified of.
let result = _uninstall_impl(&locator);
let app_title = locator.get_manifest_title();
if result {
info!("Finished successfully.");
shared::dialogs::show_info(format!("{} Uninstall", app_title).as_str(), None, "The application was successfully uninstalled.");
} else {
error!("Finished with errors.");
shared::dialogs::show_uninstall_complete_with_errors_dialog(&app_title, None);
}
info!("Finished successfully.");
shared::dialogs::show_info(format!("{} Uninstall", app_title).as_str(), None, "The application was successfully uninstalled.");
let dead_path = root_path.join(".dead");
let _ = File::create(dead_path);

View File

@@ -0,0 +1,168 @@
// Copyright 2022 Google LLC
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::{
io::{Read, Seek, SeekFrom},
sync::{Arc, Mutex},
};
use super::ripunzip::determine_stream_len;
struct Inner<R: Read + Seek> {
/// The underlying Read implementation.
r: R,
/// The position of r.
pos: u64,
/// The length of r, lazily loaded.
len: Option<u64>,
}
impl<R: Read + Seek> Inner<R> {
fn new(r: R) -> Self {
Self { r, pos: 0, len: None }
}
/// Get the length of the data stream. This is assumed to be constant.
fn len(&mut self) -> std::io::Result<u64> {
// Return cached size
if let Some(len) = self.len {
return Ok(len);
}
let len = determine_stream_len(&mut self.r)?;
self.len = Some(len);
Ok(len)
}
/// Read into the given buffer, starting at the given offset in the data stream.
fn read_at(&mut self, offset: u64, buf: &mut [u8]) -> std::io::Result<usize> {
if offset != self.pos {
self.r.seek(SeekFrom::Start(offset))?;
}
let read_result = self.r.read(buf);
if let Ok(bytes_read) = read_result {
// TODO, once stabilised, use checked_add_signed
self.pos += bytes_read as u64;
}
read_result
}
}
/// A [`Read`] which refers to its underlying stream by reference count,
/// and thus can be cloned cheaply. It supports seeking; each cloned instance
/// maintains its own pointer into the file, and the underlying instance
/// is seeked prior to each read.
pub(crate) struct CloneableSeekableReader<R: Read + Seek> {
/// The wrapper around the Read implementation, shared between threads.
inner: Arc<Mutex<Inner<R>>>,
/// The position of _this_ reader.
pos: u64,
}
impl<R: Read + Seek> Clone for CloneableSeekableReader<R> {
fn clone(&self) -> Self {
Self { inner: self.inner.clone(), pos: self.pos }
}
}
impl<R: Read + Seek> CloneableSeekableReader<R> {
/// Constructor. Takes ownership of the underlying `Read`.
/// You should pass in only streams whose total length you expect
/// to be fixed and unchanging. Odd behavior may occur if the length
/// of the stream changes; any subsequent seeks will not take account
/// of the changed stream length.
pub(crate) fn new(r: R) -> Self {
Self { inner: Arc::new(Mutex::new(Inner::new(r))), pos: 0u64 }
}
}
impl<R: Read + Seek> Read for CloneableSeekableReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let mut inner = self.inner.lock().unwrap();
let read_result = inner.read_at(self.pos, buf);
if let Ok(bytes_read) = read_result {
self.pos = self
.pos
.checked_add(bytes_read as u64)
.ok_or(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Read too far forward"))?;
}
read_result
}
}
impl<R: Read + Seek> Seek for CloneableSeekableReader<R> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
let new_pos = match pos {
SeekFrom::Start(pos) => pos,
SeekFrom::End(offset_from_end) => {
let file_len = self.inner.lock().unwrap().len()?;
if -offset_from_end as u64 > file_len {
return Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Seek too far backwards"));
}
file_len
.checked_add_signed(offset_from_end)
.ok_or(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Seek too far backward from end"))?
}
SeekFrom::Current(offset_from_pos) => self
.pos
.checked_add_signed(offset_from_pos)
.ok_or(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Seek too far forward from current pos"))?,
};
self.pos = new_pos;
Ok(new_pos)
}
}
#[cfg(test)]
mod test {
use super::CloneableSeekableReader;
use std::io::{Cursor, Read, Seek, SeekFrom};
// use test_log::test;
#[test]
fn test_cloneable_seekable_reader() -> std::io::Result<()> {
let buf: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
let buf = Cursor::new(buf);
let mut reader = CloneableSeekableReader::new(buf);
let mut out = vec![0; 2];
reader.read_exact(&mut out)?;
assert_eq!(&out, &[0, 1]);
reader.rewind()?;
reader.read_exact(&mut out)?;
assert_eq!(&out, &[0, 1]);
reader.stream_position()?;
reader.read_exact(&mut out)?;
assert_eq!(&out, &[2, 3]);
reader.seek(SeekFrom::End(-2))?;
reader.read_exact(&mut out)?;
assert_eq!(&out, &[8, 9]);
assert!(reader.read_exact(&mut out).is_err());
Ok(())
}
#[test]
fn test_cloned_independent_positions() -> std::io::Result<()> {
let buf: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
let buf = Cursor::new(buf);
let mut r1 = CloneableSeekableReader::new(buf);
let mut r2 = r1.clone();
let mut out = vec![0; 2];
r1.read_exact(&mut out)?;
assert_eq!(&out, &[0, 1]);
r2.read_exact(&mut out)?;
assert_eq!(&out, &[0, 1]);
r1.read_exact(&mut out)?;
assert_eq!(&out, &[2, 3]);
r2.seek(SeekFrom::End(-2))?;
r2.read_exact(&mut out)?;
assert_eq!(&out, &[8, 9]);
r1.read_exact(&mut out)?;
assert_eq!(&out, &[4, 5]);
Ok(())
}
}

View File

@@ -0,0 +1,86 @@
#![allow(dead_code)]
mod cloneable_seekable_reader;
mod mtzip;
mod progress_updater;
mod ripunzip;
use anyhow::Result;
pub use mtzip::level::CompressionLevel;
use ripunzip::{UnzipEngine, UnzipOptions};
use std::{
fs::File,
path::{Path, PathBuf},
};
use walkdir::WalkDir;
/// A trait of types which wish to hear progress updates on the unzip.
pub trait UnzipProgressReporter: Sync {
/// Extraction has begun on a file.
fn extraction_starting(&self, _display_name: &str) {}
/// Extraction has finished on a file.
fn extraction_finished(&self, _display_name: &str) {}
/// The total number of compressed bytes we expect to extract.
fn total_bytes_expected(&self, _expected: u64) {}
/// Some bytes of a file have been decompressed. This is probably
/// the best way to display an overall progress bar. This should eventually
/// add up to the number you're given using `total_bytes_expected`.
/// The 'count' parameter is _not_ a running total - you must add up
/// each call to this function into the running total.
/// It's a bit unfortunate that we give compressed bytes rather than
/// uncompressed bytes, but currently we can't calculate uncompressed
/// bytes without downloading the whole zip file first, which rather
/// defeats the point.
fn bytes_extracted(&self, _count: u64) {}
}
/// A progress reporter which does nothing.
struct NullProgressReporter;
impl UnzipProgressReporter for NullProgressReporter {}
pub fn extract_to_directory<'b, P1: AsRef<Path>, P2: AsRef<Path>>(
archive_file: P1,
target_dir: P2,
progress_reporter: Option<Box<dyn UnzipProgressReporter + Sync + 'b>>,
) -> Result<()> {
let target_dir = target_dir.as_ref().to_path_buf();
let file = File::open(archive_file)?;
let engine = UnzipEngine::for_file(file)?;
let null_progress = Box::new(NullProgressReporter {});
let options = UnzipOptions {
filename_filter: None,
progress_reporter: progress_reporter.unwrap_or(null_progress),
output_directory: Some(target_dir),
password: None,
single_threaded: false,
};
engine.unzip(options)?;
Ok(())
}
pub fn compress_directory<'b, P1: AsRef<Path>, P2: AsRef<Path>>(target_dir: P1, output_file: P2, level: CompressionLevel) -> Result<()> {
let target_dir = target_dir.as_ref().to_path_buf();
let mut zipper = mtzip::ZipArchive::new();
let workdir_relative_paths = enumerate_files_relative(&target_dir);
for relative_path in &workdir_relative_paths {
zipper
.add_file_from_fs(target_dir.join(&relative_path), relative_path.to_string_lossy().to_string())
.compression_level(level)
.done();
}
let mut file = File::create(&output_file)?;
zipper.write_with_rayon(&mut file)?;
Ok(())
}
pub fn enumerate_files_relative<P: AsRef<Path>>(dir: P) -> Vec<PathBuf> {
WalkDir::new(&dir)
.follow_links(false)
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.map(|entry| entry.path().strip_prefix(&dir).map(|p| p.to_path_buf()))
.filter_map(|entry| entry.ok())
.collect()
}

View File

@@ -0,0 +1,126 @@
//! Compression level
use core::fmt::Display;
use std::error::Error;
use flate2::Compression;
/// Compression level that should be used when compressing a file or data.
///
/// Current compression providers support only levels from 0 to 9, so these are the only ones being
/// supported.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct CompressionLevel(u8);
impl CompressionLevel {
/// Construct a new value of a compression level setting.
///
/// The integer value must be less than or equal to 9, otherwise `None` is returned
#[inline]
pub const fn new(level: u8) -> Option<Self> {
if level <= 9 { Some(Self(level)) } else { None }
}
/// Construct a new value of a compression level setting without checking the value.
///
/// # Safety
///
/// The value must be a valid supported compression level
#[inline]
pub const unsafe fn new_unchecked(level: u8) -> Self {
Self(level)
}
/// No compression
#[inline]
pub const fn none() -> Self {
Self(0)
}
/// Fastest compression
#[inline]
pub const fn fast() -> Self {
Self(1)
}
/// Balanced level with moderate compression and speed. The raw value is 6.
#[inline]
pub const fn balanced() -> Self {
Self(6)
}
/// Best compression ratio, comes at a worse performance
#[inline]
pub const fn best() -> Self {
Self(9)
}
/// Get the compression level as an integer
#[inline]
pub const fn get(self) -> u8 {
self.0
}
}
impl Default for CompressionLevel {
/// Equivalent to [`Self::balanced`]
fn default() -> Self {
Self::balanced()
}
}
/// The number for compression level was invalid
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct InvalidCompressionLevel(u32);
impl InvalidCompressionLevel {
/// The value which was supplied
pub fn value(self) -> u32 {
self.0
}
}
impl Display for InvalidCompressionLevel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Invalid compression level number: {}", self.0)
}
}
impl Error for InvalidCompressionLevel {}
impl From<CompressionLevel> for Compression {
#[inline]
fn from(value: CompressionLevel) -> Self {
Compression::new(value.0.into())
}
}
impl TryFrom<Compression> for CompressionLevel {
type Error = InvalidCompressionLevel;
fn try_from(value: Compression) -> Result<Self, Self::Error> {
let level = value.level();
Self::new(
level
.try_into()
.map_err(|_| InvalidCompressionLevel(level))?,
)
.ok_or(InvalidCompressionLevel(level))
}
}
impl From<CompressionLevel> for u8 {
#[inline]
fn from(value: CompressionLevel) -> Self {
value.0
}
}
impl TryFrom<u8> for CompressionLevel {
type Error = InvalidCompressionLevel;
#[inline]
fn try_from(value: u8) -> Result<Self, Self::Error> {
Self::new(value).ok_or(InvalidCompressionLevel(value.into()))
}
}

View File

@@ -0,0 +1,433 @@
//! # mtzip
//!
//! MTZIP (Stands for Multi-Threaded ZIP) is a library for making zip archives while utilising all
//! available performance available with multithreading. The amount of threads can be limited by
//! the user or detected automatically.
//!
//! Example usage:
//!
//! ```ignore
//! # use std::path::Path;
//! # use std::fs::File;
//! use mtzip::ZipArchive;
//!
//! // Creating the zipper that holds data and handles compression
//! let mut zipper = ZipArchive::new();
//!
//! // Adding a file from filesystem
//! zipper.add_file_from_fs(
//! Path::new("input/test_text_file.txt"),
//! "test_text_file.txt".to_owned(),
//! );
//!
//! // Adding a file with data from a memory location
//! zipper.add_file_from_memory(b"Hello, world!", "hello_world.txt".to_owned());
//!
//! // Adding a directory and a file to it
//! zipper.add_directory("test_dir".to_owned());
//! zipper.add_file_from_fs(
//! Path::new("input/file_that_goes_to_a_dir.txt"),
//! "test_dir/file_that_goes_to_a_dir.txt".to_owned(),
//! );
//!
//! // Writing to a file
//! // First, open the file
//! let mut file = File::create("output.zip").unwrap();
//! // Then, write to it
//! zipper.write(&mut file); // Amount of threads is chosen automatically
//! ```
use std::{
borrow::Cow,
io::{Read, Seek, Write},
num::NonZeroUsize,
panic::{RefUnwindSafe, UnwindSafe},
path::Path,
sync::{mpsc, Mutex},
};
use level::CompressionLevel;
use rayon::prelude::*;
use zip_archive_parts::{
data::ZipData,
extra_field::{ExtraField, ExtraFields},
file::ZipFile,
job::{ZipJob, ZipJobOrigin},
};
pub mod level;
mod platform;
mod zip_archive_parts;
// TODO: tests, maybe examples
/// Compression type for the file. Directories always use [`Stored`](CompressionType::Stored).
/// Default is [`Deflate`](CompressionType::Deflate).
#[repr(u16)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub enum CompressionType {
/// No compression at all, the data is stored as-is.
///
/// This is used for directories because they have no data (no payload)
Stored = 0,
#[default]
/// Deflate compression, the most common in ZIP files.
Deflate = 8,
}
/// Builder used to optionally add additional attributes to a file or directory.
/// The default compression type is [`CompressionType::Deflate`] and default compression level is
/// [`CompressionLevel::best`]
#[must_use]
#[derive(Debug)]
pub struct ZipFileBuilder<'a, 'b> {
archive_handle: &'a mut ZipArchive<'b>,
job: ZipJob<'b>,
}
impl<'a, 'b> ZipFileBuilder<'a, 'b> {
/// Call this when you're done configuring the file entry and it will be added to the job list,
/// or directly into the resulting dataset if it's a directory. Always needs to be called.
pub fn done(self) {
let Self { archive_handle, job } = self;
match &job.data_origin {
ZipJobOrigin::Directory => {
let file = job.into_file().expect("No failing code path");
archive_handle.push_file(file);
}
_ => archive_handle.push_job(job),
}
}
/// Read filesystem metadata from filesystem and add the properties to this file. It sets
/// external attributes (as with [`Self::external_attributes`]) and adds extra fields generated
/// with [`ExtraFields::new_from_fs`]
pub fn metadata_from_fs(self, fs_path: &Path) -> std::io::Result<Self> {
let metadata = std::fs::metadata(fs_path)?;
let external_attributes = platform::attributes_from_fs(&metadata);
let extra_fields = ExtraFields::new_from_fs(&metadata);
Ok(self.external_attributes(external_attributes).extra_fields(extra_fields))
}
/// Add a file comment.
pub fn file_comment(mut self, comment: String) -> Self {
self.job.file_comment = Some(comment);
self
}
/// Add additional [`ExtraField`].
pub fn extra_field(mut self, extra_field: ExtraField) -> Self {
self.job.extra_fields.values.push(extra_field);
self
}
/// Add additional [`ExtraField`]s.
pub fn extra_fields(mut self, extra_fields: impl IntoIterator<Item = ExtraField>) -> Self {
self.job.extra_fields.extend(extra_fields);
self
}
/// Set compression type. Ignored for directories, as they use no compression.
///
/// Default is [`CompressionType::Deflate`].
pub fn compression_type(mut self, compression_type: CompressionType) -> Self {
self.job.compression_type = compression_type;
self
}
/// Set compression level. Ignored for directories, as they use no compression.
///
/// Default is [`CompressionLevel::best`]
pub fn compression_level(mut self, compression_level: CompressionLevel) -> Self {
self.job.compression_level = compression_level;
self
}
/// Set external attributes. The format depends on a filesystem and is mostly a legacy
/// mechanism, usually a default value is used if this is not a filesystem source. When a file
/// is added from the filesystem, these attributes will be read and used and the ones set wit
/// hthis method are ignored.
pub fn external_attributes(mut self, external_attributes: u16) -> Self {
self.job.external_attributes = external_attributes;
self
}
/// Set external file attributes from a filesystem item. Use of this method is discouraged in
/// favor of [`Self::metadata_from_fs`], which also sets extra fields which contain modern
/// filesystem attributes instead of using old 16-bit system-dependent format.
pub fn external_attributes_from_fs(mut self, fs_path: &Path) -> std::io::Result<Self> {
let metadata = std::fs::metadata(fs_path)?;
self.job.external_attributes = platform::attributes_from_fs(&metadata);
Ok(self)
}
#[inline]
fn new(archive: &'a mut ZipArchive<'b>, filename: String, origin: ZipJobOrigin<'b>) -> Self {
Self {
archive_handle: archive,
job: ZipJob {
data_origin: origin,
archive_path: filename,
extra_fields: ExtraFields::default(),
file_comment: None,
external_attributes: platform::default_file_attrs(),
compression_type: CompressionType::Deflate,
compression_level: CompressionLevel::best(),
},
}
}
#[inline]
fn new_dir(archive: &'a mut ZipArchive<'b>, filename: String) -> Self {
Self {
archive_handle: archive,
job: ZipJob {
data_origin: ZipJobOrigin::Directory,
archive_path: filename,
extra_fields: ExtraFields::default(),
file_comment: None,
external_attributes: platform::default_dir_attrs(),
compression_type: CompressionType::Deflate,
compression_level: CompressionLevel::best(),
},
}
}
}
/// Structure that holds the current state of ZIP archive creation.
///
/// # Lifetimes
///
/// Because some of the methods allow supplying borrowed data, the lifetimes are used to indicate
/// that [`Self`](ZipArchive) borrows them. If you only provide owned data, such as
/// [`Vec<u8>`](Vec) or [`PathBuf`](std::path::PathBuf), you won't have to worry about lifetimes
/// and can simply use `'static`, if you ever need to specify them in your code.
///
/// The lifetime `'a` is for the borrowed data passed in
/// [`add_file_from_memory`](Self::add_file_from_memory),
/// [`add_file_from_fs`](Self::add_file_from_fs) and
/// [`add_file_from_reader`](Self::add_file_from_reader)
#[derive(Debug, Default)]
pub struct ZipArchive<'a> {
jobs_queue: Vec<ZipJob<'a>>,
data: ZipData,
}
impl<'a> ZipArchive<'a> {
fn push_job(&mut self, job: ZipJob<'a>) {
self.jobs_queue.push(job);
}
fn push_file(&mut self, file: ZipFile) {
self.data.files.push(file);
}
/// Create an empty [`ZipArchive`]
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Add file from filesystem.
///
/// Opens the file and reads data from it when [`compress`](Self::compress) is called.
///
/// ```
/// # use mtzip::ZipArchive;
/// # use std::path::Path;
/// let mut zipper = ZipArchive::new();
/// zipper
/// .add_file_from_fs(Path::new("input.txt"), "input.txt".to_owned())
/// .done();
/// ```
#[inline]
pub fn add_file_from_fs(&mut self, fs_path: impl Into<Cow<'a, Path>>, archived_path: String) -> ZipFileBuilder<'_, 'a> {
ZipFileBuilder::new(self, archived_path, ZipJobOrigin::Filesystem { path: fs_path.into() })
}
/// Add file with data from memory.
///
/// The data can be either borrowed or owned by the [`ZipArchive`] struct to avoid lifetime
/// hell.
///
/// ```
/// # use mtzip::ZipArchive;
/// # use std::path::Path;
/// let mut zipper = ZipArchive::new();
/// let data: &[u8] = "Hello, world!".as_ref();
/// zipper
/// .add_file_from_memory(data, "hello_world.txt".to_owned())
/// .done();
/// ```
#[inline]
pub fn add_file_from_memory(&mut self, data: impl Into<Cow<'a, [u8]>>, archived_path: String) -> ZipFileBuilder<'_, 'a> {
ZipFileBuilder::new(self, archived_path, ZipJobOrigin::RawData(data.into()))
}
/// Add a file with data from a reader.
///
/// This method takes any type implementing [`Read`] and allows it to have borrowed data (`'r`)
///
/// ```
/// # use mtzip::ZipArchive;
/// # use std::path::Path;
/// let mut zipper = ZipArchive::new();
/// let data_input = std::io::stdin();
/// zipper
/// .add_file_from_reader(data_input, "stdin_file.txt".to_owned())
/// .done();
/// ```
#[inline]
pub fn add_file_from_reader<R: Read + Send + Sync + UnwindSafe + RefUnwindSafe + 'a>(
&mut self,
reader: R,
archived_path: String,
) -> ZipFileBuilder<'_, 'a> {
ZipFileBuilder::new(self, archived_path, ZipJobOrigin::Reader(Box::new(reader)))
}
/// Add a directory entry.
///
/// All directories in the tree should be added. This method does not asssociate any filesystem
/// properties to the entry.
///
/// ```
/// # use mtzip::ZipArchive;
/// # use std::path::Path;
/// let mut zipper = ZipArchive::new();
/// zipper.add_directory("test_dir/".to_owned()).done();
/// ```
#[inline]
pub fn add_directory(&mut self, archived_path: String) -> ZipFileBuilder<'_, 'a> {
ZipFileBuilder::new_dir(self, archived_path)
}
/// Compress contents. Will be done automatically on [`write`](Self::write) call if files were
/// added between last compression and [`write`](Self::write) call. Automatically chooses
/// amount of threads to use based on how much are available.
#[inline]
pub fn compress(&mut self) {
self.compress_with_threads(Self::get_threads());
}
/// Compress contents. Will be done automatically on
/// [`write_with_threads`](Self::write_with_threads) call if files were added between last
/// compression and [`write`](Self::write). Allows specifying amount of threads that will be
/// used.
///
/// Example of getting amount of threads that this library uses in
/// [`compress`](Self::compress):
///
/// ```
/// # use std::num::NonZeroUsize;
/// # use mtzip::ZipArchive;
/// # let mut zipper = ZipArchive::new();
/// let threads = std::thread::available_parallelism()
/// .map(NonZeroUsize::get)
/// .unwrap_or(1);
///
/// zipper.compress_with_threads(threads);
/// ```
#[inline]
pub fn compress_with_threads(&mut self, threads: usize) {
if !self.jobs_queue.is_empty() {
self.compress_with_consumer(threads, |zip_data, rx| zip_data.files.extend(rx))
}
}
/// Write compressed data to a writer (usually a file). Executes [`compress`](Self::compress)
/// if files were added between last [`compress`](Self::compress) call and this call.
/// Automatically chooses the amount of threads cpu has.
#[inline]
pub fn write<W: Write + Seek>(&mut self, writer: &mut W) -> std::io::Result<()> {
self.write_with_threads(writer, Self::get_threads())
}
/// Write compressed data to a writer (usually a file). Executes
/// [`compress_with_threads`](Self::compress_with_threads) if files were added between last
/// [`compress`](Self::compress) call and this call. Allows specifying amount of threads that
/// will be used.
///
/// Example of getting amount of threads that this library uses in [`write`](Self::write):
///
/// ```
/// # use std::num::NonZeroUsize;
/// # use mtzip::ZipArchive;
/// # let mut zipper = ZipArchive::new();
/// let threads = std::thread::available_parallelism()
/// .map(NonZeroUsize::get)
/// .unwrap_or(1);
///
/// zipper.compress_with_threads(threads);
/// ```
#[inline]
pub fn write_with_threads<W: Write + Seek>(&mut self, writer: &mut W, threads: usize) -> std::io::Result<()> {
if !self.jobs_queue.is_empty() {
self.compress_with_consumer(threads, |zip_data, rx| zip_data.write(writer, rx))
} else {
self.data.write(writer, std::iter::empty())
}
}
/// Starts the compression jobs and passes teh mpsc receiver to teh consumer function, which
/// might either store the data in [`ZipData`] - [`Self::compress_with_threads`]; or write the
/// zip data as soon as it's available - [`Self::write_with_threads`]
fn compress_with_consumer<F, T>(&mut self, threads: usize, consumer: F) -> T
where
F: FnOnce(&mut ZipData, mpsc::Receiver<ZipFile>) -> T,
{
let jobs_drain = Mutex::new(self.jobs_queue.drain(..));
let jobs_drain_ref = &jobs_drain;
std::thread::scope(|s| {
let rx = {
let (tx, rx) = mpsc::channel();
for _ in 0..threads {
let thread_tx = tx.clone();
s.spawn(move || loop {
let next_job = jobs_drain_ref.lock().unwrap().next_back();
if let Some(job) = next_job {
thread_tx.send(job.into_file().unwrap()).unwrap();
} else {
break;
}
});
}
rx
};
consumer(&mut self.data, rx)
})
}
fn get_threads() -> usize {
std::thread::available_parallelism().map(NonZeroUsize::get).unwrap_or(1)
}
}
impl ZipArchive<'_> {
/// Compress contents and use rayon for parallelism.
///
/// Uses whatever thread pool this function is executed in.
///
/// If you want to limit the amount of threads to be used, use
/// [`rayon::ThreadPoolBuilder::num_threads`] and either set it as a global pool, or
/// [`rayon::ThreadPool::install`] the call to this method in it.
pub fn compress_with_rayon(&mut self) {
if !self.jobs_queue.is_empty() {
let files_par_iter = self.jobs_queue.par_drain(..).map(|job| job.into_file().unwrap());
self.data.files.par_extend(files_par_iter)
}
}
/// Write the contents to a writer.
///
/// This method uses teh same thread logic as [`Self::compress_with_rayon`], refer to its
/// documentation for details on how to control the parallelism and thread allocation.
pub fn write_with_rayon<W: Write + Seek + Send>(&mut self, writer: &mut W) -> std::io::Result<()> {
if !self.jobs_queue.is_empty() {
let files_par_iter = self.jobs_queue.par_drain(..).map(|job| job.into_file().unwrap());
self.data.write_rayon(writer, files_par_iter)
} else {
self.data.write_rayon(writer, rayon::iter::empty())
}
}
}

View File

@@ -0,0 +1,96 @@
//! Platform-specific stuff
use std::fs::Metadata;
#[cfg(target_os = "windows")]
/// OS - Windows, id 11 per Info-Zip spec
/// Specification version 6.2
pub(crate) const VERSION_MADE_BY: u16 = (11 << 8) + 62;
#[cfg(target_os = "macos")]
/// OS - MacOS darwin, id 19
/// Specification version 6.2
pub(crate) const VERSION_MADE_BY: u16 = (19 << 8) + 62;
#[cfg(not(any(target_os = "windows", target_os = "macos")))]
// Fallback
/// OS - Unix assumed, id 3
/// Specification version 6.2
pub(crate) const VERSION_MADE_BY: u16 = (3 << 8) + 62;
#[allow(dead_code)]
pub(crate) const DEFAULT_UNIX_FILE_ATTRS: u16 = 0o100644;
#[allow(dead_code)]
pub(crate) const DEFAULT_UNIX_DIR_ATTRS: u16 = 0o040755;
#[cfg(target_os = "windows")]
pub(crate) const DEFAULT_WINDOWS_FILE_ATTRS: u16 = 128;
#[cfg(target_os = "windows")]
pub(crate) const DEFAULT_WINDOWS_DIR_ATTRS: u16 = 16;
#[inline]
#[allow(dead_code)]
const fn convert_attrs(attrs: u32) -> u16 {
attrs as u16
}
pub(crate) fn attributes_from_fs(metadata: &Metadata) -> u16 {
#[cfg(target_os = "windows")]
{
use std::os::windows::fs::MetadataExt;
return convert_attrs(metadata.file_attributes());
}
#[cfg(target_os = "linux")]
{
use std::os::linux::fs::MetadataExt;
return convert_attrs(metadata.st_mode());
}
#[cfg(target_os = "macos")]
{
use std::os::darwin::fs::MetadataExt;
return convert_attrs(metadata.st_mode());
}
#[cfg(all(unix, not(target_os = "linux"), not(target_os = "macos")))]
{
use std::os::unix::fs::PermissionsExt;
return convert_attrs(metadata.permissions().mode());
}
#[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos", unix)))]
{
if metadata.is_dir() {
return DEFAULT_UNIX_DIR_ATTRS;
} else {
return DEFAULT_UNIX_FILE_ATTRS;
}
}
}
#[cfg(target_os = "windows")]
pub(crate) const fn default_file_attrs() -> u16 {
DEFAULT_WINDOWS_FILE_ATTRS
}
#[cfg(not(windows))]
pub(crate) const fn default_file_attrs() -> u16 {
DEFAULT_UNIX_FILE_ATTRS
}
#[cfg(target_os = "windows")]
pub(crate) const fn default_dir_attrs() -> u16 {
DEFAULT_WINDOWS_DIR_ATTRS
}
#[cfg(any(target_os = "linux", unix))]
#[cfg(not(target_os = "windows"))]
pub(crate) const fn default_dir_attrs() -> u16 {
DEFAULT_UNIX_DIR_ATTRS
}
#[cfg(not(any(target_os = "windows", target_os = "linux", unix)))]
pub(crate) const fn default_dir_attrs() -> u16 {
0
}

View File

@@ -0,0 +1,156 @@
use std::io::{Seek, Write};
use std::sync::Mutex;
use rayon::prelude::*;
use super::file::{ZipFile, ZipFileNoData};
const END_OF_CENTRAL_DIR_SIGNATURE: u32 = 0x06054B50;
#[derive(Debug, Default)]
pub struct ZipData {
pub files: Vec<ZipFile>,
}
impl ZipData {
pub fn write<W: Write + Seek, I: IntoIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_file_iter: I,
) -> std::io::Result<()> {
let zip_files = self.write_files_contained_and_iter(buf, zip_file_iter)?;
let files_amount = super::files_amount_u16(&zip_files);
let central_dir_offset = super::stream_position_u32(buf)?;
self.write_central_dir(zip_files, buf)?;
let central_dir_start = super::stream_position_u32(buf)?;
self.write_end_of_central_directory(
buf,
central_dir_offset,
central_dir_start,
files_amount,
)
}
pub fn write_rayon<W: Write + Seek + Send, I: ParallelIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_file_iter: I,
) -> std::io::Result<()> {
let zip_files = self.write_files_contained_and_par_iter(buf, zip_file_iter)?;
let files_amount = super::files_amount_u16(&zip_files);
let central_dir_offset = super::stream_position_u32(buf)?;
self.write_central_dir(zip_files, buf)?;
let central_dir_start = super::stream_position_u32(buf)?;
self.write_end_of_central_directory(
buf,
central_dir_offset,
central_dir_start,
files_amount,
)
}
#[inline]
fn write_files_contained_and_iter<W: Write + Seek, I: IntoIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_files_iter: I,
) -> std::io::Result<Vec<ZipFileNoData>> {
let zip_files = std::mem::take(&mut self.files);
self.write_files_iter(buf, zip_files.into_iter().chain(zip_files_iter))
}
#[inline]
pub fn write_files_contained_and_par_iter<
W: Write + Seek + Send,
I: ParallelIterator<Item = ZipFile>,
>(
&mut self,
buf: &mut W,
zip_files_iter: I,
) -> std::io::Result<Vec<ZipFileNoData>> {
let zip_files = std::mem::take(&mut self.files);
self.write_files_par_iter(buf, zip_files.into_par_iter().chain(zip_files_iter))
}
pub fn write_files_iter<W: Write + Seek, I: IntoIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_files: I,
) -> std::io::Result<Vec<ZipFileNoData>> {
zip_files
.into_iter()
.map(|zipfile| zipfile.write_local_file_header_with_data_consuming(buf))
.collect::<std::io::Result<Vec<_>>>()
}
pub fn write_files_par_iter<W: Write + Seek + Send, I: ParallelIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_files: I,
) -> std::io::Result<Vec<ZipFileNoData>> {
let buf = Mutex::new(buf);
zip_files
.map(|zipfile| {
let mut buf_lock = buf.lock().unwrap();
zipfile.write_local_file_header_with_data_consuming(*buf_lock)
})
.collect::<std::io::Result<Vec<_>>>()
}
fn write_central_dir<W: Write, I: IntoIterator<Item = ZipFileNoData>>(
&self,
zip_files: I,
buf: &mut W,
) -> std::io::Result<()> {
zip_files
.into_iter()
.try_for_each(|zip_file| zip_file.write_central_directory_entry(buf))
}
const FOOTER_LENGTH: usize = 22;
fn write_end_of_central_directory<W: Write>(
&self,
buf: &mut W,
central_dir_offset: u32,
central_dir_start: u32,
files_amount: u16,
) -> std::io::Result<()> {
// Temporary in-memory statically sized array
let mut central_dir = [0; Self::FOOTER_LENGTH];
{
let mut central_dir_buf: &mut [u8] = &mut central_dir;
// Signature
central_dir_buf.write_all(&END_OF_CENTRAL_DIR_SIGNATURE.to_le_bytes())?;
// number of this disk
central_dir_buf.write_all(&0_u16.to_le_bytes())?;
// number of the disk with start
central_dir_buf.write_all(&0_u16.to_le_bytes())?;
// Number of entries on this disk
central_dir_buf.write_all(&files_amount.to_le_bytes())?;
// Number of entries
central_dir_buf.write_all(&files_amount.to_le_bytes())?;
// Central dir size
central_dir_buf.write_all(&(central_dir_start - central_dir_offset).to_le_bytes())?;
// Central dir offset
central_dir_buf.write_all(&central_dir_offset.to_le_bytes())?;
// Comment length
central_dir_buf.write_all(&0_u16.to_le_bytes())?;
}
buf.write_all(&central_dir)?;
Ok(())
}
}

View File

@@ -0,0 +1,273 @@
//! ZIP file extra field
use std::{fs::Metadata, io::Write};
/// This is a structure containing [`ExtraField`]s associated with a file or directory in a zip
/// file, mostly used for filesystem properties, and this is the only functionality implemented
/// here.
///
/// The [`new_from_fs`](Self::new_from_fs) method will use the metadata the filesystem provides to
/// construct the collection.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct ExtraFields {
pub(crate) values: Vec<ExtraField>,
}
impl Extend<ExtraField> for ExtraFields {
fn extend<T: IntoIterator<Item = ExtraField>>(&mut self, iter: T) {
self.values.extend(iter)
}
}
impl IntoIterator for ExtraFields {
type Item = <Vec<ExtraField> as IntoIterator>::Item;
type IntoIter = <Vec<ExtraField> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.values.into_iter()
}
}
impl ExtraFields {
/// Create a new set of [`ExtraField`]s. [`Self::new_from_fs`] should be preferred.
///
/// # Safety
///
/// All fields must have valid values depending on the field type.
pub unsafe fn new<I>(fields: I) -> Self
where
I: IntoIterator<Item = ExtraField>,
{
Self { values: fields.into_iter().collect() }
}
/// This method will use the filesystem metadata to get the properties that can be stored in
/// ZIP [`ExtraFields`].
///
/// The behavior is dependent on the target platform. Will return an empty set if the target os
/// is not Windows or Linux and not of UNIX family.
pub fn new_from_fs(metadata: &Metadata) -> Self {
#[cfg(target_os = "windows")]
{
return Self::new_windows(metadata);
}
#[cfg(target_os = "linux")]
{
return Self::new_linux(metadata);
}
#[cfg(all(unix, not(target_os = "linux")))]
{
return Self::new_unix(metadata);
}
}
#[cfg(target_os = "linux")]
fn new_linux(metadata: &Metadata) -> Self {
use std::os::linux::fs::MetadataExt;
let mod_time = Some(metadata.st_mtime() as i32);
let ac_time = Some(metadata.st_atime() as i32);
let cr_time = Some(metadata.st_ctime() as i32);
let uid = metadata.st_uid();
let gid = metadata.st_gid();
Self { values: vec![ExtraField::UnixExtendedTimestamp { mod_time, ac_time, cr_time }, ExtraField::UnixAttrs { uid, gid }] }
}
#[cfg(all(unix, not(target_os = "linux")))]
#[allow(dead_code)]
fn new_unix(metadata: &Metadata) -> Self {
use std::os::unix::fs::MetadataExt;
let mod_time = Some(metadata.mtime() as i32);
let ac_time = Some(metadata.atime() as i32);
let cr_time = Some(metadata.ctime() as i32);
let uid = metadata.uid();
let gid = metadata.gid();
Self { values: vec![ExtraField::UnixExtendedTimestamp { mod_time, ac_time, cr_time }, ExtraField::UnixAttrs { uid, gid }] }
}
#[cfg(target_os = "windows")]
fn new_windows(metadata: &Metadata) -> Self {
use std::os::windows::fs::MetadataExt;
let mtime = metadata.last_write_time();
let atime = metadata.last_access_time();
let ctime = metadata.creation_time();
Self { values: vec![ExtraField::Ntfs { mtime, atime, ctime }] }
}
pub(crate) fn data_length<const CENTRAL_HEADER: bool>(&self) -> u16 {
self.values.iter().map(|f| 4 + f.field_size::<CENTRAL_HEADER>()).sum()
}
pub(crate) fn write<W: Write, const CENTRAL_HEADER: bool>(&self, writer: &mut W) -> std::io::Result<()> {
for field in &self.values {
field.write::<_, CENTRAL_HEADER>(writer)?;
}
Ok(())
}
}
/// Extra data that can be associated with a file or directory.
///
/// This library only implements the filesystem properties in NTFS and UNIX format.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExtraField {
/// NTFS file properties.
Ntfs {
/// Last modification timestamp
mtime: u64,
/// Last access timestamp
atime: u64,
/// File/directory creation timestamp
ctime: u64,
},
/// Info-Zip extended unix timestamp. Each part is optional by definition, but will be
/// populated by [`ExtraFields::new_from_fs`].
UnixExtendedTimestamp {
/// Last modification timestamp
mod_time: Option<i32>,
/// Last access timestamp
ac_time: Option<i32>,
/// Creation timestamp
cr_time: Option<i32>,
},
/// UNIX file/directory attributes defined by Info-Zip.
UnixAttrs {
/// UID of the owner
uid: u32,
/// GID of the group
gid: u32,
},
}
const MOD_TIME_PRESENT: u8 = 1;
const AC_TIME_PRESENT: u8 = 1 << 1;
const CR_TIME_PRESENT: u8 = 1 << 2;
impl ExtraField {
#[inline]
fn header_id(&self) -> u16 {
match self {
Self::Ntfs { mtime: _, atime: _, ctime: _ } => 0x000a,
Self::UnixExtendedTimestamp { mod_time: _, ac_time: _, cr_time: _ } => 0x5455,
Self::UnixAttrs { uid: _, gid: _ } => 0x7875,
}
}
#[inline]
const fn optional_field_size<T: Sized>(field: &Option<T>) -> u16 {
match field {
Some(_) => std::mem::size_of::<T>() as u16,
None => 0,
}
}
#[inline]
const fn field_size<const CENTRAL_HEADER: bool>(&self) -> u16 {
match self {
Self::Ntfs { mtime: _, atime: _, ctime: _ } => 32,
Self::UnixExtendedTimestamp { mod_time, ac_time, cr_time } => {
1 + Self::optional_field_size(mod_time) + {
if !CENTRAL_HEADER {
Self::optional_field_size(ac_time) + Self::optional_field_size(cr_time)
} else {
0
}
}
}
Self::UnixAttrs { uid: _, gid: _ } => 11,
}
}
#[inline]
const fn if_present(val: Option<i32>, if_present: u8) -> u8 {
match val {
Some(_) => if_present,
None => 0,
}
}
const NTFS_FIELD_LEN: usize = 32;
const UNIX_ATTRS_LEN: usize = 11;
pub(crate) fn write<W: Write, const CENTRAL_HEADER: bool>(self, writer: &mut W) -> std::io::Result<()> {
// Header ID
writer.write_all(&self.header_id().to_le_bytes())?;
// Field data size
writer.write_all(&self.field_size::<CENTRAL_HEADER>().to_le_bytes())?;
match self {
Self::Ntfs { mtime, atime, ctime } => {
// Writing to a temporary in-memory array
let mut field = [0; Self::NTFS_FIELD_LEN];
{
let mut field_buf: &mut [u8] = &mut field;
// Reserved field
field_buf.write_all(&0_u32.to_le_bytes())?;
// Tag1 number
field_buf.write_all(&1_u16.to_le_bytes())?;
// Tag1 size
field_buf.write_all(&24_u16.to_le_bytes())?;
// Mtime
field_buf.write_all(&mtime.to_le_bytes())?;
// Atime
field_buf.write_all(&atime.to_le_bytes())?;
// Ctime
field_buf.write_all(&ctime.to_le_bytes())?;
}
writer.write_all(&field)?;
}
Self::UnixExtendedTimestamp { mod_time, ac_time, cr_time } => {
let flags = Self::if_present(mod_time, MOD_TIME_PRESENT)
| Self::if_present(ac_time, AC_TIME_PRESENT)
| Self::if_present(cr_time, CR_TIME_PRESENT);
writer.write_all(&[flags])?;
if let Some(mod_time) = mod_time {
writer.write_all(&mod_time.to_le_bytes())?;
}
if !CENTRAL_HEADER {
if let Some(ac_time) = ac_time {
writer.write_all(&ac_time.to_le_bytes())?;
}
if let Some(cr_time) = cr_time {
writer.write_all(&cr_time.to_le_bytes())?;
}
}
}
Self::UnixAttrs { uid, gid } => {
// Writing to a temporary in-memory array
let mut field = [0; Self::UNIX_ATTRS_LEN];
{
let mut field_buf: &mut [u8] = &mut field;
// Version of the field
field_buf.write_all(&[1])?;
// UID size
field_buf.write_all(&[4])?;
// UID
field_buf.write_all(&uid.to_le_bytes())?;
// GID size
field_buf.write_all(&[4])?;
// GID
field_buf.write_all(&gid.to_le_bytes())?;
}
writer.write_all(&field)?;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,201 @@
use std::io::{Seek, Write};
use super::extra_field::ExtraFields;
use super::super::{CompressionType, platform::VERSION_MADE_BY};
const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034B50;
const CENTRAL_FILE_HEADER_SIGNATURE: u32 = 0x02014B50;
const VERSION_NEEDED_TO_EXTRACT: u16 = 20;
/// Set bit 11 to indicate that the file names are in UTF-8, because all strings in rust are valid
/// UTF-8
const GENERAL_PURPOSE_BIT_FLAG: u16 = 1 << 11;
#[derive(Debug)]
pub struct ZipFile {
pub header: ZipFileHeader,
pub data: Vec<u8>,
}
#[derive(Debug)]
pub struct ZipFileHeader {
pub compression_type: CompressionType,
pub crc: u32,
pub uncompressed_size: u32,
pub filename: String,
pub file_comment: Option<String>,
pub external_file_attributes: u32,
pub extra_fields: ExtraFields,
}
#[derive(Debug)]
pub struct ZipFileNoData {
pub header: ZipFileHeader,
pub local_header_offset: u32,
pub compressed_size: u32,
}
impl ZipFile {
pub fn write_local_file_header_with_data_consuming<W: Write + Seek>(
self,
buf: &mut W,
) -> std::io::Result<ZipFileNoData> {
let local_header_offset = super::stream_position_u32(buf)?;
self.write_local_file_header_and_data(buf)?;
let Self { header, data } = self;
Ok(ZipFileNoData {
header,
local_header_offset,
compressed_size: data.len() as u32,
})
}
const LOCAL_FILE_HEADER_LEN: usize = 30;
pub fn write_local_file_header_and_data<W: Write>(&self, buf: &mut W) -> std::io::Result<()> {
// Writing to a temporary in-memory statically sized array first
let mut header = [0; Self::LOCAL_FILE_HEADER_LEN];
{
let mut header_buf: &mut [u8] = &mut header;
// signature
header_buf.write_all(&LOCAL_FILE_HEADER_SIGNATURE.to_le_bytes())?;
// version needed to extract
header_buf.write_all(&VERSION_NEEDED_TO_EXTRACT.to_le_bytes())?;
// general purpose bit flag
header_buf.write_all(&GENERAL_PURPOSE_BIT_FLAG.to_le_bytes())?;
// compression type
header_buf.write_all(&(self.header.compression_type as u16).to_le_bytes())?;
// Last modification time // moved to extra fields
header_buf.write_all(&0_u16.to_le_bytes())?;
// Last modification date // moved to extra fields
header_buf.write_all(&0_u16.to_le_bytes())?;
// crc
header_buf.write_all(&self.header.crc.to_le_bytes())?;
// Compressed size
debug_assert!(self.data.len() <= u32::MAX as usize);
header_buf.write_all(&(self.data.len() as u32).to_le_bytes())?;
// Uncompressed size
header_buf.write_all(&self.header.uncompressed_size.to_le_bytes())?;
// Filename size
debug_assert!(self.header.filename.len() <= u16::MAX as usize);
header_buf.write_all(&(self.header.filename.len() as u16).to_le_bytes())?;
// extra field size
header_buf.write_all(
&self
.header
.extra_fields
.data_length::<false>()
.to_le_bytes(),
)?;
}
buf.write_all(&header)?;
// Filename
buf.write_all(self.header.filename.as_bytes())?;
// Extra field
self.header.extra_fields.write::<_, false>(buf)?;
// Data
buf.write_all(&self.data)?;
Ok(())
}
#[inline]
pub fn directory(
mut name: String,
extra_fields: ExtraFields,
external_attributes: u16,
file_comment: Option<String>,
) -> Self {
if !(name.ends_with('/') || name.ends_with('\\')) {
name += "/"
};
Self {
header: ZipFileHeader {
compression_type: CompressionType::Stored,
crc: 0,
uncompressed_size: 0,
filename: name,
external_file_attributes: (external_attributes as u32) << 16,
extra_fields,
file_comment,
},
data: vec![],
}
}
}
impl ZipFileNoData {
const CENTRAL_DIR_ENTRY_LEN: usize = 46;
pub fn write_central_directory_entry<W: Write>(&self, buf: &mut W) -> std::io::Result<()> {
// Writing to a temporary in-memory statically sized array first
let mut central_dir_entry_header = [0; Self::CENTRAL_DIR_ENTRY_LEN];
{
let mut central_dir_entry_buf: &mut [u8] = &mut central_dir_entry_header;
// signature
central_dir_entry_buf.write_all(&CENTRAL_FILE_HEADER_SIGNATURE.to_le_bytes())?;
// version made by
central_dir_entry_buf.write_all(&VERSION_MADE_BY.to_le_bytes())?;
// version needed to extract
central_dir_entry_buf.write_all(&VERSION_NEEDED_TO_EXTRACT.to_le_bytes())?;
// general purpose bit flag
central_dir_entry_buf.write_all(&GENERAL_PURPOSE_BIT_FLAG.to_le_bytes())?;
// compression type
central_dir_entry_buf
.write_all(&(self.header.compression_type as u16).to_le_bytes())?;
// Last modification time // moved to extra fields
central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
// Last modification date // moved to extra fields
central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
// crc
central_dir_entry_buf.write_all(&self.header.crc.to_le_bytes())?;
// Compressed size
central_dir_entry_buf.write_all(&self.compressed_size.to_le_bytes())?;
// Uncompressed size
central_dir_entry_buf.write_all(&self.header.uncompressed_size.to_le_bytes())?;
// Filename size
debug_assert!(self.header.filename.len() <= u16::MAX as usize);
central_dir_entry_buf.write_all(&(self.header.filename.len() as u16).to_le_bytes())?;
// extra field size
central_dir_entry_buf
.write_all(&self.header.extra_fields.data_length::<true>().to_le_bytes())?;
// comment size
central_dir_entry_buf.write_all(
&(self
.header
.file_comment
.as_ref()
.map(|fc| fc.len())
.unwrap_or(0) as u16)
.to_le_bytes(),
)?;
// disk number start
central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
// internal file attributes
central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
// external file attributes
central_dir_entry_buf.write_all(&self.header.external_file_attributes.to_le_bytes())?;
// relative offset of local header
central_dir_entry_buf.write_all(&self.local_header_offset.to_le_bytes())?;
}
buf.write_all(&central_dir_entry_header)?;
// Filename
buf.write_all(self.header.filename.as_bytes())?;
// Extra field
self.header.extra_fields.write::<_, true>(buf)?;
// File comment
if let Some(file_comment) = &self.header.file_comment {
buf.write_all(file_comment.as_bytes())?;
}
Ok(())
}
}

View File

@@ -0,0 +1,179 @@
use std::{
borrow::Cow,
fs::File,
io::Read,
panic::{RefUnwindSafe, UnwindSafe},
path::Path,
};
use flate2::{CrcReader, read::DeflateEncoder};
use super::{extra_field::ExtraFields, file::ZipFile};
use super::super::{
CompressionType, level::CompressionLevel, platform::attributes_from_fs,
zip_archive_parts::file::ZipFileHeader,
};
pub enum ZipJobOrigin<'a> {
Directory,
Filesystem { path: Cow<'a, Path> },
RawData(Cow<'a, [u8]>),
Reader(Box<dyn Read + Send + Sync + UnwindSafe + RefUnwindSafe + 'a>),
}
impl core::fmt::Debug for ZipJobOrigin<'_> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::Directory => f.write_str("Directory"),
Self::Filesystem { path } => f.debug_struct("Filesystem").field("path", &path).finish(),
Self::RawData(raw_data) => f.debug_tuple("RawData").field(&raw_data).finish(),
Self::Reader(_reader) => f.debug_tuple("Reader").finish_non_exhaustive(),
}
}
}
#[derive(Debug)]
struct FileDigest {
data: Vec<u8>,
uncompressed_size: u32,
crc: u32,
}
#[derive(Debug)]
pub struct ZipJob<'a> {
pub data_origin: ZipJobOrigin<'a>,
pub extra_fields: ExtraFields,
pub archive_path: String,
pub file_comment: Option<String>,
pub external_attributes: u16,
/// Ignored when [`data_origin`](Self::data_origin) is a [`ZipJobOrigin::Directory`]
pub compression_level: CompressionLevel,
/// Ignored when [`data_origin`](Self::data_origin) is a [`ZipJobOrigin::Directory`]
pub compression_type: CompressionType,
}
impl ZipJob<'_> {
fn compress_file<R: Read>(
source: R,
uncompressed_size_approx: Option<u32>,
compression_type: CompressionType,
compression_level: CompressionLevel,
) -> std::io::Result<FileDigest> {
let mut crc_reader = CrcReader::new(source);
let mut data = Vec::with_capacity(uncompressed_size_approx.unwrap_or(0) as usize);
let uncompressed_size = match compression_type {
CompressionType::Deflate => {
let mut encoder = DeflateEncoder::new(&mut crc_reader, compression_level.into());
encoder.read_to_end(&mut data)?;
encoder.total_in() as usize
}
CompressionType::Stored => crc_reader.read_to_end(&mut data)?,
};
debug_assert!(uncompressed_size <= u32::MAX as usize);
let uncompressed_size = uncompressed_size as u32;
data.shrink_to_fit();
let crc = crc_reader.crc().sum();
Ok(FileDigest {
data,
uncompressed_size,
crc,
})
}
pub fn into_file(self) -> std::io::Result<ZipFile> {
match self.data_origin {
ZipJobOrigin::Directory => Ok(ZipFile::directory(
self.archive_path,
self.extra_fields,
self.external_attributes,
self.file_comment,
)),
ZipJobOrigin::Filesystem { path } => {
let file = File::open(path).unwrap();
let file_metadata = file.metadata().unwrap();
let uncompressed_size_approx = file_metadata.len();
debug_assert!(uncompressed_size_approx <= u32::MAX.into());
let uncompressed_size_approx = uncompressed_size_approx as u32;
let external_file_attributes = attributes_from_fs(&file_metadata);
let mut extra_fields = ExtraFields::new_from_fs(&file_metadata);
extra_fields.extend(self.extra_fields);
let FileDigest {
data,
uncompressed_size,
crc,
} = Self::compress_file(
file,
Some(uncompressed_size_approx),
self.compression_type,
self.compression_level,
)?;
Ok(ZipFile {
header: ZipFileHeader {
compression_type: CompressionType::Deflate,
crc,
uncompressed_size,
filename: self.archive_path,
external_file_attributes: (external_file_attributes as u32) << 16,
extra_fields,
file_comment: self.file_comment,
},
data,
})
}
ZipJobOrigin::RawData(data) => {
let uncompressed_size_approx = data.len();
debug_assert!(uncompressed_size_approx <= u32::MAX as usize);
let uncompressed_size_approx = uncompressed_size_approx as u32;
let FileDigest {
data,
uncompressed_size,
crc,
} = Self::compress_file(
data.as_ref(),
Some(uncompressed_size_approx),
self.compression_type,
self.compression_level,
)?;
Ok(ZipFile {
header: ZipFileHeader {
compression_type: CompressionType::Deflate,
crc,
uncompressed_size,
filename: self.archive_path,
external_file_attributes: (self.external_attributes as u32) << 16,
extra_fields: self.extra_fields,
file_comment: self.file_comment,
},
data,
})
}
ZipJobOrigin::Reader(reader) => {
let FileDigest {
data,
uncompressed_size,
crc,
} = Self::compress_file(
reader,
None,
self.compression_type,
self.compression_level,
)?;
Ok(ZipFile {
header: ZipFileHeader {
compression_type: CompressionType::Deflate,
crc,
uncompressed_size,
filename: self.archive_path,
external_file_attributes: (self.external_attributes as u32) << 16,
extra_fields: self.extra_fields,
file_comment: self.file_comment,
},
data,
})
}
}
}
}

View File

@@ -0,0 +1,17 @@
pub mod data;
pub mod extra_field;
pub mod file;
pub mod job;
use std::io::Seek;
#[inline]
pub fn stream_position_u32<W: Seek>(buf: &mut W) -> std::io::Result<u32> {
let offset = buf.stream_position()?;
debug_assert!(offset <= u32::MAX.into());
Ok(offset as u32)
}
#[inline]
pub fn files_amount_u16<T>(files: &[T]) -> u16 {
let amount = files.len();
debug_assert!(amount <= u16::MAX as usize);
amount as u16
}

View File

@@ -0,0 +1,142 @@
// Copyright 2023 Google LLC
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::cmp::min;
/// A struct which can issue periodic updates indicating progress towards
/// an external total, based on updates towards an internal goal.
pub struct ProgressUpdater<F: Fn(u64)> {
callback: F,
internal_progress: u64,
per_update_internal: u64,
update_external_amount: u64,
external_updates_sent: u64,
remainder_external: u64,
internal_total: u64,
}
impl<F: Fn(u64)> ProgressUpdater<F> {
/// Create a new progress updater, with a callback to be called periodically.
pub fn new(callback: F, external_total: u64, internal_total: u64, per_update_internal: u64) -> Self {
let per_update_internal = min(internal_total, per_update_internal);
let total_updates_expected = if per_update_internal == 0 { 0 } else { internal_total / per_update_internal };
let (update_external_amount, remainder_external) = if total_updates_expected == 0 {
(0, external_total)
} else {
(external_total / total_updates_expected, external_total % total_updates_expected)
};
Self {
callback,
internal_progress: 0u64,
per_update_internal,
update_external_amount,
external_updates_sent: 0u64,
remainder_external,
internal_total,
}
}
/// Indicate some progress towards the internal goal. May call back the
/// external callback function to show some progress towards the external
/// goal.
pub fn progress(&mut self, amount_internal: u64) {
self.internal_progress += amount_internal;
self.send_due_updates();
}
fn send_due_updates(&mut self) {
let updates_due = if self.per_update_internal == 0 { 0 } else { self.internal_progress / self.per_update_internal };
while updates_due > self.external_updates_sent {
(self.callback)(self.update_external_amount);
self.external_updates_sent += 1;
}
}
/// Indicate completion of the task. Fully update the callback towards the
/// external state.
pub fn finish(&mut self) {
self.internal_progress = self.internal_total;
self.send_due_updates();
if self.remainder_external > 0 {
(self.callback)(self.remainder_external);
}
}
}
#[test]
fn test_progress_updater() {
let amount_received = std::rc::Rc::new(std::cell::RefCell::new(0u64));
let mut progresser = ProgressUpdater::new(
|progress| {
*(amount_received.borrow_mut()) += progress;
},
100,
1000,
100,
);
assert_eq!(*amount_received.borrow(), 0);
progresser.progress(1);
assert_eq!(*amount_received.borrow(), 0);
progresser.progress(100);
assert_eq!(*amount_received.borrow(), 10);
progresser.progress(800);
assert_eq!(*amount_received.borrow(), 90);
progresser.finish();
assert_eq!(*amount_received.borrow(), 100);
}
#[test]
fn test_progress_updater_zero_external() {
let amount_received = std::rc::Rc::new(std::cell::RefCell::new(0u64));
let mut progresser = ProgressUpdater::new(
|progress| {
*(amount_received.borrow_mut()) += progress;
},
0,
1000,
100,
);
assert_eq!(*amount_received.borrow(), 0);
progresser.progress(1);
progresser.progress(800);
progresser.finish();
assert_eq!(*amount_received.borrow(), 0);
}
#[test]
fn test_progress_updater_small_internal() {
let amount_received = std::rc::Rc::new(std::cell::RefCell::new(0u64));
let mut progresser = ProgressUpdater::new(
|progress| {
*(amount_received.borrow_mut()) += progress;
},
100,
5,
100,
);
assert_eq!(*amount_received.borrow(), 0);
progresser.progress(1);
progresser.finish();
assert_eq!(*amount_received.borrow(), 100);
}
#[test]
fn test_progress_updater_zero_internal() {
let amount_received = std::rc::Rc::new(std::cell::RefCell::new(0u64));
let mut progresser = ProgressUpdater::new(
|progress| {
*(amount_received.borrow_mut()) += progress;
},
100,
0,
100,
);
assert_eq!(*amount_received.borrow(), 0);
progresser.finish();
assert_eq!(*amount_received.borrow(), 100);
}

View File

@@ -0,0 +1,327 @@
// Copyright 2022 Google LLC
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::{
borrow::Cow,
fs::File,
io::{ErrorKind, Read, Seek, SeekFrom},
path::{Path, PathBuf},
sync::Mutex,
};
use anyhow::{Context, Result};
use rayon::prelude::*;
use zip::{read::ZipFile, ZipArchive};
use super::{cloneable_seekable_reader::CloneableSeekableReader, progress_updater::ProgressUpdater, UnzipProgressReporter};
pub(crate) fn determine_stream_len<R: Seek>(stream: &mut R) -> std::io::Result<u64> {
let old_pos = stream.stream_position()?;
let len = stream.seek(SeekFrom::End(0))?;
if old_pos != len {
stream.seek(SeekFrom::Start(old_pos))?;
}
Ok(len)
}
/// Options for unzipping.
pub struct UnzipOptions<'a, 'b> {
/// The destination directory.
pub output_directory: Option<PathBuf>,
/// Password if encrypted.
pub password: Option<String>,
/// Whether to run in single-threaded mode.
pub single_threaded: bool,
/// A filename filter, optionally
pub filename_filter: Option<Box<dyn FilenameFilter + Sync + 'a>>,
/// An object to receive notifications of unzip progress.
pub progress_reporter: Box<dyn UnzipProgressReporter + Sync + 'b>,
}
/// An object which can unzip a zip file, in its entirety, from a local
/// file or from a network stream. It tries to do this in parallel wherever
/// possible.
pub struct UnzipEngine {
zipfile: Box<dyn UnzipEngineImpl>,
compressed_length: u64,
directory_creator: DirectoryCreator,
}
/// Code which can determine whether to unzip a given filename.
pub trait FilenameFilter {
/// Returns true if the given filename should be unzipped.
fn should_unzip(&self, filename: &str) -> bool;
}
/// The underlying engine used by the unzipper. This is different
/// for files and URIs.
trait UnzipEngineImpl {
fn unzip(&mut self, options: UnzipOptions, directory_creator: &DirectoryCreator) -> Vec<anyhow::Error>;
// Due to lack of RPITIT we'll return a Vec<String> here
fn list(&self) -> Result<Vec<String>, anyhow::Error>;
}
/// Engine which knows how to unzip a file.
#[derive(Clone)]
struct UnzipFileEngine(ZipArchive<CloneableSeekableReader<File>>);
impl UnzipEngineImpl for UnzipFileEngine {
fn unzip(&mut self, options: UnzipOptions, directory_creator: &DirectoryCreator) -> Vec<anyhow::Error> {
unzip_serial_or_parallel(self.0.len(), options, directory_creator, || self.0.clone(), || {})
}
fn list(&self) -> Result<Vec<String>, anyhow::Error> {
list(&self.0)
}
}
impl UnzipEngine {
/// Create an unzip engine which knows how to unzip a file.
pub fn for_file(mut zipfile: File) -> Result<Self> {
// The following line doesn't actually seem to make any significant
// performance difference.
// let zipfile = BufReader::new(zipfile);
let compressed_length = determine_stream_len(&mut zipfile)?;
let zipfile = CloneableSeekableReader::new(zipfile);
Ok(Self {
zipfile: Box::new(UnzipFileEngine(ZipArchive::new(zipfile)?)),
compressed_length,
directory_creator: DirectoryCreator::default(),
})
}
/// The total compressed length that we expect to retrieve over
/// the network or from the compressed file.
pub fn zip_length(&self) -> u64 {
self.compressed_length
}
// Perform the unzip.
pub fn unzip(mut self, options: UnzipOptions) -> Result<()> {
log::debug!("Starting extract");
options.progress_reporter.total_bytes_expected(self.compressed_length);
let errors = self.zipfile.unzip(options, &self.directory_creator);
// Return the first error code, if any.
errors.into_iter().next().map(Result::Err).unwrap_or(Ok(()))
}
/// List the filenames in the archive
pub fn list(self) -> Result<impl Iterator<Item = String>> {
// In future this might be a more dynamic iterator type.
self.zipfile.list().map(|mut v| {
// Names are returned in a HashMap iteration order so let's
// sort thme to be more reasonable
v.sort();
v.into_iter()
})
}
}
/// Return a list of filenames from the zip. For now this is infallible
/// but provide the option of an error code in case we do something
/// smarter in future.
fn list<'a, T: Read + Seek + 'a>(zip_archive: &ZipArchive<T>) -> Result<Vec<String>> {
Ok(zip_archive.file_names().map(|s| s.to_string()).collect())
}
fn unzip_serial_or_parallel<'a, T: Read + Seek + 'a>(
len: usize,
options: UnzipOptions,
directory_creator: &DirectoryCreator,
get_ziparchive_clone: impl Fn() -> ZipArchive<T> + Sync,
// Call when a file is going to be skipped
file_skip_callback: impl Fn() + Sync + Send + Clone,
) -> Vec<anyhow::Error> {
let progress_reporter: &dyn UnzipProgressReporter = options.progress_reporter.as_ref();
match (options.filename_filter, options.single_threaded) {
(None, true) => (0..len)
.map(|i| {
extract_file_by_index(
&get_ziparchive_clone,
i,
&options.output_directory,
&options.password,
progress_reporter,
directory_creator,
)
})
.filter_map(Result::err)
.collect(),
(None, false) => {
// We use par_bridge here rather than into_par_iter because it turns
// out to better preserve ordering of the IDs in the input range,
// i.e. we're more likely to ask our initial threads to act upon
// file IDs 0, 1, 2, 3, 4, 5 rather than 0, 1000, 2000, 3000 etc.
// On a device which is CPU-bound or IO-bound (rather than network
// bound) that's beneficial because we can start to decompress
// and write data to disk as soon as it arrives from the network.
(0..len)
.par_bridge()
.map(|i| {
extract_file_by_index(
&get_ziparchive_clone,
i,
&options.output_directory,
&options.password,
progress_reporter,
directory_creator,
)
})
.filter_map(Result::err)
.collect()
}
(Some(filename_filter), single_threaded) => {
// If we have a filename filter, an easy thing would be to
// iterate through each file index as above, and check to see if its
// name matches. Unfortunately, that seeks all over the place
// to get the filename from the local header.
// Instead, let's get a list of the filenames we need
// and request them from the zip library directly.
// As we can't predict their order in the file, this may involve
// arbitrary rewinds, so let's do it single-threaded.
if !single_threaded {
log::warn!("Unzipping specific files - assuming --single-threaded since we currently cannot unzip specific files in a multi-threaded mode. If you need that, consider launching multiple copies of ripunzip in parallel.");
}
let mut filenames: Vec<_> = get_ziparchive_clone()
.file_names()
.filter(|name| filename_filter.as_ref().should_unzip(name))
.map(|s| s.to_string())
.collect();
// The filenames returned by the file_names() method above are in
// HashMap iteration order (i.e. random). To avoid creating lots
// of HTTPS streams for files which are nearby each other in the
// zip, we'd ideally extract them in order of file position.
// We have no way of knowing file position (without iterating the
// whole file) so instead let's sort them and hope that files were
// zipped in alphabetical order, or close to it. If we're wrong,
// we'll just end up rewinding, that is, creating extra redundant
// HTTP(S) streams.
filenames.sort();
log::info!("Will unzip {} matching filenames", filenames.len());
file_skip_callback();
// let progress_reporter: &dyn UnzipProgressReporter = options.progress_reporter.as_ref();
filenames
.into_iter()
.map(|name| {
let myzip: &mut zip::ZipArchive<T> = &mut get_ziparchive_clone();
let file: ZipFile<T> = match &options.password {
None => myzip.by_name(&name)?,
Some(string) => myzip.by_name_decrypt(&name, string.as_bytes())?,
};
let r = extract_file(file, &options.output_directory, progress_reporter, directory_creator);
file_skip_callback();
r
})
.filter_map(Result::err)
.collect()
}
}
}
fn extract_file_by_index<'a, T: Read + Seek + 'a>(
get_ziparchive_clone: impl Fn() -> ZipArchive<T> + Sync,
i: usize,
output_directory: &Option<PathBuf>,
password: &Option<String>,
progress_reporter: &dyn UnzipProgressReporter,
directory_creator: &DirectoryCreator,
) -> Result<(), anyhow::Error> {
let myzip: &mut zip::ZipArchive<T> = &mut get_ziparchive_clone();
let file: ZipFile<T> = match password {
None => myzip.by_index(i)?,
Some(string) => myzip.by_index_decrypt(i, string.as_bytes())?,
};
extract_file(file, output_directory, progress_reporter, directory_creator)
}
fn extract_file<R: Read>(
file: ZipFile<R>,
output_directory: &Option<PathBuf>,
progress_reporter: &dyn UnzipProgressReporter,
directory_creator: &DirectoryCreator,
) -> Result<(), anyhow::Error> {
let name = file.enclosed_name().as_deref().map(Path::to_string_lossy).unwrap_or_else(|| Cow::Borrowed("<unprintable>")).to_string();
extract_file_inner(file, output_directory, progress_reporter, directory_creator).with_context(|| format!("Failed to extract {name}"))
}
/// Extracts a file from a zip file.
fn extract_file_inner<R: Read>(
mut file: ZipFile<R>,
output_directory: &Option<PathBuf>,
progress_reporter: &dyn UnzipProgressReporter,
directory_creator: &DirectoryCreator,
) -> Result<()> {
let name = file.enclosed_name().ok_or_else(|| std::io::Error::new(ErrorKind::Unsupported, "path not safe to extract"))?;
let display_name = name.display().to_string();
let out_path = match output_directory {
Some(output_directory) => output_directory.join(name),
None => name,
};
progress_reporter.extraction_starting(&display_name);
log::debug!("Start extract of file at {:x}, length {:x}, name {}", file.data_start(), file.compressed_size(), display_name);
if file.name().ends_with('/') {
directory_creator.create_dir_all(&out_path)?;
} else {
if let Some(parent) = out_path.parent() {
directory_creator.create_dir_all(parent)?;
}
let out_file = File::create(&out_path).with_context(|| "Failed to create file")?;
// Progress bar strategy. The overall progress across the entire zip file must be
// denoted in terms of *compressed* bytes, since at the outset we don't know the uncompressed
// size of each file. Yet, within a given file, we update progress based on the bytes
// of uncompressed data written, once per 1MB, because that's the information that we happen
// to have available. So, calculate how many compressed bytes relate to 1MB of uncompressed
// data, and the remainder.
let uncompressed_size = file.size();
let compressed_size = file.compressed_size();
let mut progress_updater = ProgressUpdater::new(
|external_progress| {
progress_reporter.bytes_extracted(external_progress);
},
compressed_size,
uncompressed_size,
1024 * 1024,
);
let mut out_file = progress_streams::ProgressWriter::new(out_file, |bytes_written| progress_updater.progress(bytes_written as u64));
// Using a BufWriter here doesn't improve performance even on a VM with
// spinny disks.
std::io::copy(&mut file, &mut out_file).with_context(|| "Failed to write directory")?;
progress_updater.finish();
}
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Some(mode) = file.unix_mode() {
std::fs::set_permissions(&out_path, std::fs::Permissions::from_mode(mode)).with_context(|| "Failed to set permissions")?;
}
}
log::debug!("Finished extract of file at {:x}, length {:x}, name {}", file.data_start(), file.compressed_size(), display_name);
progress_reporter.extraction_finished(&display_name);
Ok(())
}
/// An engine used to ensure we don't conflict in creating directories
/// between threads
#[derive(Default)]
struct DirectoryCreator(Mutex<()>);
impl DirectoryCreator {
fn create_dir_all(&self, path: &Path) -> Result<()> {
// Fast path - avoid locking if the directory exists
if path.exists() {
return Ok(());
}
let _exclusivity = self.0.lock().unwrap();
if path.exists() {
return Ok(());
}
std::fs::create_dir_all(path).with_context(|| "Failed to create directory")
}
}

View File

@@ -1,5 +1,6 @@
pub mod runtime_arch;
pub mod cli_host;
pub mod fastzip;
mod dialogs_const;
mod dialogs_common;

View File

@@ -78,7 +78,7 @@ fn check_arch_windows() -> Option<RuntimeArch> {
#[cfg(target_os = "windows")]
type IsWow64Process2Fn = unsafe extern "system" fn(
hProcess: windows::Win32::Foundation::HANDLE,
hprocess: windows::Win32::Foundation::HANDLE,
pprocessmachine: *mut windows::Win32::System::SystemInformation::IMAGE_FILE_MACHINE,
pnativemachine: *mut windows::Win32::System::SystemInformation::IMAGE_FILE_MACHINE,
) -> windows::core::BOOL;

View File

@@ -3,7 +3,7 @@ use windows::Win32::System::LibraryLoader::LOAD_LIBRARY_SEARCH_SYSTEM32;
use windows::Win32::System::LibraryLoader::LOAD_LIBRARY_FLAGS;
#[cfg(target_os = "windows")]
type SetDefaultDllDirectoriesFn = unsafe extern "system" fn(DirectoryFlags: u32) -> BOOL;
type SetDefaultDllDirectoriesFn = unsafe extern "system" fn(directory_flags: u32) -> BOOL;
#[cfg(target_os = "windows")]
unsafe fn set_default_dll_directories(flags: LOAD_LIBRARY_FLAGS) {