refactor/inline fastzip implementation for patching (upstream issues)

This commit is contained in:
Caelan Sayler
2025-05-18 20:19:59 +01:00
committed by Caelan
parent 8d8c1e198f
commit cf68dcb0ea
20 changed files with 2266 additions and 1029 deletions

View File

@@ -67,9 +67,9 @@ zstd.workspace = true
zip.workspace = true
walkdir.workspace = true
sha1_smol.workspace = true
mtzip.workspace = true
ripunzip.workspace = true
zerofrom.workspace = true
rayon.workspace = true
progress-streams.workspace = true
flate2.workspace = true
[target.'cfg(target_os="linux")'.dependencies]
waitpid-any.workspace = true

View File

@@ -1,12 +1,10 @@
use crate::shared::fastzip;
use anyhow::{anyhow, bail, Result};
use mtzip::level::CompressionLevel;
use ripunzip::{NullProgressReporter, UnzipEngine, UnzipOptions};
use std::{
collections::HashSet,
fs, io,
path::{Path, PathBuf},
};
use walkdir::WalkDir;
pub fn zstd_patch_single<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(old_file: P1, patch_file: P2, output_file: P3) -> Result<()> {
let old_file = old_file.as_ref();
@@ -53,22 +51,6 @@ fn fio_highbit64(v: u64) -> u32 {
return count;
}
fn zip_extract<P1: AsRef<Path>, P2: AsRef<Path>>(archive_file: P1, target_dir: P2) -> Result<()> {
let target_dir = target_dir.as_ref().to_path_buf();
let file = fs::File::open(archive_file)?;
let engine = UnzipEngine::for_file(file)?;
let null_progress = Box::new(NullProgressReporter {});
let options = UnzipOptions {
filename_filter: None,
progress_reporter: null_progress,
output_directory: Some(target_dir),
password: None,
single_threaded: false,
};
engine.unzip(options)?;
Ok(())
}
pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
old_file: P1,
delta_files: Vec<&PathBuf>,
@@ -98,7 +80,7 @@ pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
info!("Extracting base package for delta patching: {}", temp_dir.to_string_lossy());
let work_dir = temp_dir.join("_work");
fs::create_dir_all(&work_dir)?;
zip_extract(&old_file, &work_dir)?;
fastzip::extract_to_directory(&old_file, &work_dir, None)?;
info!("Base package extracted. {} delta packages to apply.", delta_files.len());
@@ -106,9 +88,9 @@ pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
info!("{}: extracting apply delta patch: {}", i, delta_file.to_string_lossy());
let delta_dir = temp_dir.join(format!("delta_{}", i));
fs::create_dir_all(&delta_dir)?;
zip_extract(delta_file, &delta_dir)?;
fastzip::extract_to_directory(&delta_file, &delta_dir, None)?;
let delta_relative_paths = enumerate_files_relative(&delta_dir);
let delta_relative_paths = fastzip::enumerate_files_relative(&delta_dir);
let mut visited_paths = HashSet::new();
// apply all the zsdiff patches for files which exist in both the delta and the base package
@@ -160,7 +142,7 @@ pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
}
// anything in the work dir which was not visited is an old / deleted file and should be removed
let workdir_relative_paths = enumerate_files_relative(&work_dir);
let workdir_relative_paths = fastzip::enumerate_files_relative(&work_dir);
for relative_path in &workdir_relative_paths {
if !visited_paths.contains(relative_path) {
let file_to_delete = work_dir.join(relative_path);
@@ -172,32 +154,12 @@ pub fn delta<P1: AsRef<Path>, P2: AsRef<Path>, P3: AsRef<Path>>(
info!("All delta patches applied. Asembling output package at: {}", output_file.to_string_lossy());
let mut zipper = mtzip::ZipArchive::new();
let workdir_relative_paths = enumerate_files_relative(&work_dir);
for relative_path in &workdir_relative_paths {
zipper
.add_file_from_fs(work_dir.join(&relative_path), relative_path.to_string_lossy().to_string())
.compression_level(CompressionLevel::fast())
.done();
}
let mut file = fs::File::create(&output_file)?;
zipper.write(&mut file)?;
fastzip::compress_directory(&work_dir, &output_file, fastzip::CompressionLevel::fast())?;
info!("Successfully applied {} delta patches in {}s.", delta_files.len(), time.s());
Ok(())
}
fn enumerate_files_relative<P: AsRef<Path>>(dir: P) -> Vec<PathBuf> {
WalkDir::new(&dir)
.follow_links(false)
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.map(|entry| entry.path().strip_prefix(&dir).map(|p| p.to_path_buf()))
.filter_map(|entry| entry.ok())
.collect()
}
// NOTE: this is some code to do checksum verification, but it is not being used
// by the current implementation because zstd patching already has checksum verification
//

View File

@@ -7,49 +7,31 @@ use std::fs::File;
pub fn uninstall(locator: &VelopackLocator, delete_self: bool) -> Result<()> {
info!("Command: Uninstall");
let root_path = locator.get_root_dir();
fn _uninstall_impl(locator: &VelopackLocator) -> bool {
let root_path = locator.get_root_dir();
// the real app could be running at the moment
let _ = shared::force_stop_package(&root_path);
// the real app could be running at the moment
let _ = shared::force_stop_package(&root_path);
let mut finished_with_errors = false;
// run uninstall hook
windows::run_hook(&locator, constants::HOOK_CLI_UNINSTALL, 60);
// run uninstall hook
windows::run_hook(&locator, constants::HOOK_CLI_UNINSTALL, 60);
// remove all shortcuts pointing to the app
windows::remove_all_shortcuts_for_root_dir(&root_path);
// remove all shortcuts pointing to the app
windows::remove_all_shortcuts_for_root_dir(&root_path);
info!("Removing directory '{}'", root_path.to_string_lossy());
let _ = remove_dir_all::remove_dir_all(&root_path);
info!("Removing directory '{}'", root_path.to_string_lossy());
if let Err(e) = shared::retry_io(|| remove_dir_all::remove_dir_but_not_self(&root_path)) {
error!("Unable to remove directory, some files may be in use ({}).", e);
finished_with_errors = true;
}
if let Err(e) = windows::registry::remove_uninstall_entry(&locator) {
error!("Unable to remove uninstall registry entry ({}).", e);
// finished_with_errors = true;
}
!finished_with_errors
if let Err(e) = windows::registry::remove_uninstall_entry(&locator) {
error!("Unable to remove uninstall registry entry ({}).", e);
}
// if it returns true, it was a success.
// if it returns false, it was completed with errors which the user should be notified of.
let result = _uninstall_impl(&locator);
let app_title = locator.get_manifest_title();
if result {
info!("Finished successfully.");
shared::dialogs::show_info(format!("{} Uninstall", app_title).as_str(), None, "The application was successfully uninstalled.");
} else {
error!("Finished with errors.");
shared::dialogs::show_uninstall_complete_with_errors_dialog(&app_title, None);
}
info!("Finished successfully.");
shared::dialogs::show_info(format!("{} Uninstall", app_title).as_str(), None, "The application was successfully uninstalled.");
let dead_path = root_path.join(".dead");
let _ = File::create(dead_path);

View File

@@ -0,0 +1,168 @@
// Copyright 2022 Google LLC
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::{
io::{Read, Seek, SeekFrom},
sync::{Arc, Mutex},
};
use super::ripunzip::determine_stream_len;
struct Inner<R: Read + Seek> {
/// The underlying Read implementation.
r: R,
/// The position of r.
pos: u64,
/// The length of r, lazily loaded.
len: Option<u64>,
}
impl<R: Read + Seek> Inner<R> {
fn new(r: R) -> Self {
Self { r, pos: 0, len: None }
}
/// Get the length of the data stream. This is assumed to be constant.
fn len(&mut self) -> std::io::Result<u64> {
// Return cached size
if let Some(len) = self.len {
return Ok(len);
}
let len = determine_stream_len(&mut self.r)?;
self.len = Some(len);
Ok(len)
}
/// Read into the given buffer, starting at the given offset in the data stream.
fn read_at(&mut self, offset: u64, buf: &mut [u8]) -> std::io::Result<usize> {
if offset != self.pos {
self.r.seek(SeekFrom::Start(offset))?;
}
let read_result = self.r.read(buf);
if let Ok(bytes_read) = read_result {
// TODO, once stabilised, use checked_add_signed
self.pos += bytes_read as u64;
}
read_result
}
}
/// A [`Read`] which refers to its underlying stream by reference count,
/// and thus can be cloned cheaply. It supports seeking; each cloned instance
/// maintains its own pointer into the file, and the underlying instance
/// is seeked prior to each read.
pub(crate) struct CloneableSeekableReader<R: Read + Seek> {
/// The wrapper around the Read implementation, shared between threads.
inner: Arc<Mutex<Inner<R>>>,
/// The position of _this_ reader.
pos: u64,
}
impl<R: Read + Seek> Clone for CloneableSeekableReader<R> {
fn clone(&self) -> Self {
Self { inner: self.inner.clone(), pos: self.pos }
}
}
impl<R: Read + Seek> CloneableSeekableReader<R> {
/// Constructor. Takes ownership of the underlying `Read`.
/// You should pass in only streams whose total length you expect
/// to be fixed and unchanging. Odd behavior may occur if the length
/// of the stream changes; any subsequent seeks will not take account
/// of the changed stream length.
pub(crate) fn new(r: R) -> Self {
Self { inner: Arc::new(Mutex::new(Inner::new(r))), pos: 0u64 }
}
}
impl<R: Read + Seek> Read for CloneableSeekableReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let mut inner = self.inner.lock().unwrap();
let read_result = inner.read_at(self.pos, buf);
if let Ok(bytes_read) = read_result {
self.pos = self
.pos
.checked_add(bytes_read as u64)
.ok_or(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Read too far forward"))?;
}
read_result
}
}
impl<R: Read + Seek> Seek for CloneableSeekableReader<R> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
let new_pos = match pos {
SeekFrom::Start(pos) => pos,
SeekFrom::End(offset_from_end) => {
let file_len = self.inner.lock().unwrap().len()?;
if -offset_from_end as u64 > file_len {
return Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Seek too far backwards"));
}
file_len
.checked_add_signed(offset_from_end)
.ok_or(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Seek too far backward from end"))?
}
SeekFrom::Current(offset_from_pos) => self
.pos
.checked_add_signed(offset_from_pos)
.ok_or(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Seek too far forward from current pos"))?,
};
self.pos = new_pos;
Ok(new_pos)
}
}
#[cfg(test)]
mod test {
use super::CloneableSeekableReader;
use std::io::{Cursor, Read, Seek, SeekFrom};
// use test_log::test;
#[test]
fn test_cloneable_seekable_reader() -> std::io::Result<()> {
let buf: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
let buf = Cursor::new(buf);
let mut reader = CloneableSeekableReader::new(buf);
let mut out = vec![0; 2];
reader.read_exact(&mut out)?;
assert_eq!(&out, &[0, 1]);
reader.rewind()?;
reader.read_exact(&mut out)?;
assert_eq!(&out, &[0, 1]);
reader.stream_position()?;
reader.read_exact(&mut out)?;
assert_eq!(&out, &[2, 3]);
reader.seek(SeekFrom::End(-2))?;
reader.read_exact(&mut out)?;
assert_eq!(&out, &[8, 9]);
assert!(reader.read_exact(&mut out).is_err());
Ok(())
}
#[test]
fn test_cloned_independent_positions() -> std::io::Result<()> {
let buf: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
let buf = Cursor::new(buf);
let mut r1 = CloneableSeekableReader::new(buf);
let mut r2 = r1.clone();
let mut out = vec![0; 2];
r1.read_exact(&mut out)?;
assert_eq!(&out, &[0, 1]);
r2.read_exact(&mut out)?;
assert_eq!(&out, &[0, 1]);
r1.read_exact(&mut out)?;
assert_eq!(&out, &[2, 3]);
r2.seek(SeekFrom::End(-2))?;
r2.read_exact(&mut out)?;
assert_eq!(&out, &[8, 9]);
r1.read_exact(&mut out)?;
assert_eq!(&out, &[4, 5]);
Ok(())
}
}

View File

@@ -0,0 +1,86 @@
#![allow(dead_code)]
mod cloneable_seekable_reader;
mod mtzip;
mod progress_updater;
mod ripunzip;
use anyhow::Result;
pub use mtzip::level::CompressionLevel;
use ripunzip::{UnzipEngine, UnzipOptions};
use std::{
fs::File,
path::{Path, PathBuf},
};
use walkdir::WalkDir;
/// A trait of types which wish to hear progress updates on the unzip.
pub trait UnzipProgressReporter: Sync {
/// Extraction has begun on a file.
fn extraction_starting(&self, _display_name: &str) {}
/// Extraction has finished on a file.
fn extraction_finished(&self, _display_name: &str) {}
/// The total number of compressed bytes we expect to extract.
fn total_bytes_expected(&self, _expected: u64) {}
/// Some bytes of a file have been decompressed. This is probably
/// the best way to display an overall progress bar. This should eventually
/// add up to the number you're given using `total_bytes_expected`.
/// The 'count' parameter is _not_ a running total - you must add up
/// each call to this function into the running total.
/// It's a bit unfortunate that we give compressed bytes rather than
/// uncompressed bytes, but currently we can't calculate uncompressed
/// bytes without downloading the whole zip file first, which rather
/// defeats the point.
fn bytes_extracted(&self, _count: u64) {}
}
/// A progress reporter which does nothing.
struct NullProgressReporter;
impl UnzipProgressReporter for NullProgressReporter {}
pub fn extract_to_directory<'b, P1: AsRef<Path>, P2: AsRef<Path>>(
archive_file: P1,
target_dir: P2,
progress_reporter: Option<Box<dyn UnzipProgressReporter + Sync + 'b>>,
) -> Result<()> {
let target_dir = target_dir.as_ref().to_path_buf();
let file = File::open(archive_file)?;
let engine = UnzipEngine::for_file(file)?;
let null_progress = Box::new(NullProgressReporter {});
let options = UnzipOptions {
filename_filter: None,
progress_reporter: progress_reporter.unwrap_or(null_progress),
output_directory: Some(target_dir),
password: None,
single_threaded: false,
};
engine.unzip(options)?;
Ok(())
}
pub fn compress_directory<'b, P1: AsRef<Path>, P2: AsRef<Path>>(target_dir: P1, output_file: P2, level: CompressionLevel) -> Result<()> {
let target_dir = target_dir.as_ref().to_path_buf();
let mut zipper = mtzip::ZipArchive::new();
let workdir_relative_paths = enumerate_files_relative(&target_dir);
for relative_path in &workdir_relative_paths {
zipper
.add_file_from_fs(target_dir.join(&relative_path), relative_path.to_string_lossy().to_string())
.compression_level(level)
.done();
}
let mut file = File::create(&output_file)?;
zipper.write_with_rayon(&mut file)?;
Ok(())
}
pub fn enumerate_files_relative<P: AsRef<Path>>(dir: P) -> Vec<PathBuf> {
WalkDir::new(&dir)
.follow_links(false)
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.map(|entry| entry.path().strip_prefix(&dir).map(|p| p.to_path_buf()))
.filter_map(|entry| entry.ok())
.collect()
}

View File

@@ -0,0 +1,126 @@
//! Compression level
use core::fmt::Display;
use std::error::Error;
use flate2::Compression;
/// Compression level that should be used when compressing a file or data.
///
/// Current compression providers support only levels from 0 to 9, so these are the only ones being
/// supported.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct CompressionLevel(u8);
impl CompressionLevel {
/// Construct a new value of a compression level setting.
///
/// The integer value must be less than or equal to 9, otherwise `None` is returned
#[inline]
pub const fn new(level: u8) -> Option<Self> {
if level <= 9 { Some(Self(level)) } else { None }
}
/// Construct a new value of a compression level setting without checking the value.
///
/// # Safety
///
/// The value must be a valid supported compression level
#[inline]
pub const unsafe fn new_unchecked(level: u8) -> Self {
Self(level)
}
/// No compression
#[inline]
pub const fn none() -> Self {
Self(0)
}
/// Fastest compression
#[inline]
pub const fn fast() -> Self {
Self(1)
}
/// Balanced level with moderate compression and speed. The raw value is 6.
#[inline]
pub const fn balanced() -> Self {
Self(6)
}
/// Best compression ratio, comes at a worse performance
#[inline]
pub const fn best() -> Self {
Self(9)
}
/// Get the compression level as an integer
#[inline]
pub const fn get(self) -> u8 {
self.0
}
}
impl Default for CompressionLevel {
/// Equivalent to [`Self::balanced`]
fn default() -> Self {
Self::balanced()
}
}
/// The number for compression level was invalid
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct InvalidCompressionLevel(u32);
impl InvalidCompressionLevel {
/// The value which was supplied
pub fn value(self) -> u32 {
self.0
}
}
impl Display for InvalidCompressionLevel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Invalid compression level number: {}", self.0)
}
}
impl Error for InvalidCompressionLevel {}
impl From<CompressionLevel> for Compression {
#[inline]
fn from(value: CompressionLevel) -> Self {
Compression::new(value.0.into())
}
}
impl TryFrom<Compression> for CompressionLevel {
type Error = InvalidCompressionLevel;
fn try_from(value: Compression) -> Result<Self, Self::Error> {
let level = value.level();
Self::new(
level
.try_into()
.map_err(|_| InvalidCompressionLevel(level))?,
)
.ok_or(InvalidCompressionLevel(level))
}
}
impl From<CompressionLevel> for u8 {
#[inline]
fn from(value: CompressionLevel) -> Self {
value.0
}
}
impl TryFrom<u8> for CompressionLevel {
type Error = InvalidCompressionLevel;
#[inline]
fn try_from(value: u8) -> Result<Self, Self::Error> {
Self::new(value).ok_or(InvalidCompressionLevel(value.into()))
}
}

View File

@@ -0,0 +1,433 @@
//! # mtzip
//!
//! MTZIP (Stands for Multi-Threaded ZIP) is a library for making zip archives while utilising all
//! available performance available with multithreading. The amount of threads can be limited by
//! the user or detected automatically.
//!
//! Example usage:
//!
//! ```ignore
//! # use std::path::Path;
//! # use std::fs::File;
//! use mtzip::ZipArchive;
//!
//! // Creating the zipper that holds data and handles compression
//! let mut zipper = ZipArchive::new();
//!
//! // Adding a file from filesystem
//! zipper.add_file_from_fs(
//! Path::new("input/test_text_file.txt"),
//! "test_text_file.txt".to_owned(),
//! );
//!
//! // Adding a file with data from a memory location
//! zipper.add_file_from_memory(b"Hello, world!", "hello_world.txt".to_owned());
//!
//! // Adding a directory and a file to it
//! zipper.add_directory("test_dir".to_owned());
//! zipper.add_file_from_fs(
//! Path::new("input/file_that_goes_to_a_dir.txt"),
//! "test_dir/file_that_goes_to_a_dir.txt".to_owned(),
//! );
//!
//! // Writing to a file
//! // First, open the file
//! let mut file = File::create("output.zip").unwrap();
//! // Then, write to it
//! zipper.write(&mut file); // Amount of threads is chosen automatically
//! ```
use std::{
borrow::Cow,
io::{Read, Seek, Write},
num::NonZeroUsize,
panic::{RefUnwindSafe, UnwindSafe},
path::Path,
sync::{mpsc, Mutex},
};
use level::CompressionLevel;
use rayon::prelude::*;
use zip_archive_parts::{
data::ZipData,
extra_field::{ExtraField, ExtraFields},
file::ZipFile,
job::{ZipJob, ZipJobOrigin},
};
pub mod level;
mod platform;
mod zip_archive_parts;
// TODO: tests, maybe examples
/// Compression type for the file. Directories always use [`Stored`](CompressionType::Stored).
/// Default is [`Deflate`](CompressionType::Deflate).
#[repr(u16)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub enum CompressionType {
/// No compression at all, the data is stored as-is.
///
/// This is used for directories because they have no data (no payload)
Stored = 0,
#[default]
/// Deflate compression, the most common in ZIP files.
Deflate = 8,
}
/// Builder used to optionally add additional attributes to a file or directory.
/// The default compression type is [`CompressionType::Deflate`] and default compression level is
/// [`CompressionLevel::best`]
#[must_use]
#[derive(Debug)]
pub struct ZipFileBuilder<'a, 'b> {
archive_handle: &'a mut ZipArchive<'b>,
job: ZipJob<'b>,
}
impl<'a, 'b> ZipFileBuilder<'a, 'b> {
/// Call this when you're done configuring the file entry and it will be added to the job list,
/// or directly into the resulting dataset if it's a directory. Always needs to be called.
pub fn done(self) {
let Self { archive_handle, job } = self;
match &job.data_origin {
ZipJobOrigin::Directory => {
let file = job.into_file().expect("No failing code path");
archive_handle.push_file(file);
}
_ => archive_handle.push_job(job),
}
}
/// Read filesystem metadata from filesystem and add the properties to this file. It sets
/// external attributes (as with [`Self::external_attributes`]) and adds extra fields generated
/// with [`ExtraFields::new_from_fs`]
pub fn metadata_from_fs(self, fs_path: &Path) -> std::io::Result<Self> {
let metadata = std::fs::metadata(fs_path)?;
let external_attributes = platform::attributes_from_fs(&metadata);
let extra_fields = ExtraFields::new_from_fs(&metadata);
Ok(self.external_attributes(external_attributes).extra_fields(extra_fields))
}
/// Add a file comment.
pub fn file_comment(mut self, comment: String) -> Self {
self.job.file_comment = Some(comment);
self
}
/// Add additional [`ExtraField`].
pub fn extra_field(mut self, extra_field: ExtraField) -> Self {
self.job.extra_fields.values.push(extra_field);
self
}
/// Add additional [`ExtraField`]s.
pub fn extra_fields(mut self, extra_fields: impl IntoIterator<Item = ExtraField>) -> Self {
self.job.extra_fields.extend(extra_fields);
self
}
/// Set compression type. Ignored for directories, as they use no compression.
///
/// Default is [`CompressionType::Deflate`].
pub fn compression_type(mut self, compression_type: CompressionType) -> Self {
self.job.compression_type = compression_type;
self
}
/// Set compression level. Ignored for directories, as they use no compression.
///
/// Default is [`CompressionLevel::best`]
pub fn compression_level(mut self, compression_level: CompressionLevel) -> Self {
self.job.compression_level = compression_level;
self
}
/// Set external attributes. The format depends on a filesystem and is mostly a legacy
/// mechanism, usually a default value is used if this is not a filesystem source. When a file
/// is added from the filesystem, these attributes will be read and used and the ones set wit
/// hthis method are ignored.
pub fn external_attributes(mut self, external_attributes: u16) -> Self {
self.job.external_attributes = external_attributes;
self
}
/// Set external file attributes from a filesystem item. Use of this method is discouraged in
/// favor of [`Self::metadata_from_fs`], which also sets extra fields which contain modern
/// filesystem attributes instead of using old 16-bit system-dependent format.
pub fn external_attributes_from_fs(mut self, fs_path: &Path) -> std::io::Result<Self> {
let metadata = std::fs::metadata(fs_path)?;
self.job.external_attributes = platform::attributes_from_fs(&metadata);
Ok(self)
}
#[inline]
fn new(archive: &'a mut ZipArchive<'b>, filename: String, origin: ZipJobOrigin<'b>) -> Self {
Self {
archive_handle: archive,
job: ZipJob {
data_origin: origin,
archive_path: filename,
extra_fields: ExtraFields::default(),
file_comment: None,
external_attributes: platform::default_file_attrs(),
compression_type: CompressionType::Deflate,
compression_level: CompressionLevel::best(),
},
}
}
#[inline]
fn new_dir(archive: &'a mut ZipArchive<'b>, filename: String) -> Self {
Self {
archive_handle: archive,
job: ZipJob {
data_origin: ZipJobOrigin::Directory,
archive_path: filename,
extra_fields: ExtraFields::default(),
file_comment: None,
external_attributes: platform::default_dir_attrs(),
compression_type: CompressionType::Deflate,
compression_level: CompressionLevel::best(),
},
}
}
}
/// Structure that holds the current state of ZIP archive creation.
///
/// # Lifetimes
///
/// Because some of the methods allow supplying borrowed data, the lifetimes are used to indicate
/// that [`Self`](ZipArchive) borrows them. If you only provide owned data, such as
/// [`Vec<u8>`](Vec) or [`PathBuf`](std::path::PathBuf), you won't have to worry about lifetimes
/// and can simply use `'static`, if you ever need to specify them in your code.
///
/// The lifetime `'a` is for the borrowed data passed in
/// [`add_file_from_memory`](Self::add_file_from_memory),
/// [`add_file_from_fs`](Self::add_file_from_fs) and
/// [`add_file_from_reader`](Self::add_file_from_reader)
#[derive(Debug, Default)]
pub struct ZipArchive<'a> {
jobs_queue: Vec<ZipJob<'a>>,
data: ZipData,
}
impl<'a> ZipArchive<'a> {
fn push_job(&mut self, job: ZipJob<'a>) {
self.jobs_queue.push(job);
}
fn push_file(&mut self, file: ZipFile) {
self.data.files.push(file);
}
/// Create an empty [`ZipArchive`]
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Add file from filesystem.
///
/// Opens the file and reads data from it when [`compress`](Self::compress) is called.
///
/// ```
/// # use mtzip::ZipArchive;
/// # use std::path::Path;
/// let mut zipper = ZipArchive::new();
/// zipper
/// .add_file_from_fs(Path::new("input.txt"), "input.txt".to_owned())
/// .done();
/// ```
#[inline]
pub fn add_file_from_fs(&mut self, fs_path: impl Into<Cow<'a, Path>>, archived_path: String) -> ZipFileBuilder<'_, 'a> {
ZipFileBuilder::new(self, archived_path, ZipJobOrigin::Filesystem { path: fs_path.into() })
}
/// Add file with data from memory.
///
/// The data can be either borrowed or owned by the [`ZipArchive`] struct to avoid lifetime
/// hell.
///
/// ```
/// # use mtzip::ZipArchive;
/// # use std::path::Path;
/// let mut zipper = ZipArchive::new();
/// let data: &[u8] = "Hello, world!".as_ref();
/// zipper
/// .add_file_from_memory(data, "hello_world.txt".to_owned())
/// .done();
/// ```
#[inline]
pub fn add_file_from_memory(&mut self, data: impl Into<Cow<'a, [u8]>>, archived_path: String) -> ZipFileBuilder<'_, 'a> {
ZipFileBuilder::new(self, archived_path, ZipJobOrigin::RawData(data.into()))
}
/// Add a file with data from a reader.
///
/// This method takes any type implementing [`Read`] and allows it to have borrowed data (`'r`)
///
/// ```
/// # use mtzip::ZipArchive;
/// # use std::path::Path;
/// let mut zipper = ZipArchive::new();
/// let data_input = std::io::stdin();
/// zipper
/// .add_file_from_reader(data_input, "stdin_file.txt".to_owned())
/// .done();
/// ```
#[inline]
pub fn add_file_from_reader<R: Read + Send + Sync + UnwindSafe + RefUnwindSafe + 'a>(
&mut self,
reader: R,
archived_path: String,
) -> ZipFileBuilder<'_, 'a> {
ZipFileBuilder::new(self, archived_path, ZipJobOrigin::Reader(Box::new(reader)))
}
/// Add a directory entry.
///
/// All directories in the tree should be added. This method does not asssociate any filesystem
/// properties to the entry.
///
/// ```
/// # use mtzip::ZipArchive;
/// # use std::path::Path;
/// let mut zipper = ZipArchive::new();
/// zipper.add_directory("test_dir/".to_owned()).done();
/// ```
#[inline]
pub fn add_directory(&mut self, archived_path: String) -> ZipFileBuilder<'_, 'a> {
ZipFileBuilder::new_dir(self, archived_path)
}
/// Compress contents. Will be done automatically on [`write`](Self::write) call if files were
/// added between last compression and [`write`](Self::write) call. Automatically chooses
/// amount of threads to use based on how much are available.
#[inline]
pub fn compress(&mut self) {
self.compress_with_threads(Self::get_threads());
}
/// Compress contents. Will be done automatically on
/// [`write_with_threads`](Self::write_with_threads) call if files were added between last
/// compression and [`write`](Self::write). Allows specifying amount of threads that will be
/// used.
///
/// Example of getting amount of threads that this library uses in
/// [`compress`](Self::compress):
///
/// ```
/// # use std::num::NonZeroUsize;
/// # use mtzip::ZipArchive;
/// # let mut zipper = ZipArchive::new();
/// let threads = std::thread::available_parallelism()
/// .map(NonZeroUsize::get)
/// .unwrap_or(1);
///
/// zipper.compress_with_threads(threads);
/// ```
#[inline]
pub fn compress_with_threads(&mut self, threads: usize) {
if !self.jobs_queue.is_empty() {
self.compress_with_consumer(threads, |zip_data, rx| zip_data.files.extend(rx))
}
}
/// Write compressed data to a writer (usually a file). Executes [`compress`](Self::compress)
/// if files were added between last [`compress`](Self::compress) call and this call.
/// Automatically chooses the amount of threads cpu has.
#[inline]
pub fn write<W: Write + Seek>(&mut self, writer: &mut W) -> std::io::Result<()> {
self.write_with_threads(writer, Self::get_threads())
}
/// Write compressed data to a writer (usually a file). Executes
/// [`compress_with_threads`](Self::compress_with_threads) if files were added between last
/// [`compress`](Self::compress) call and this call. Allows specifying amount of threads that
/// will be used.
///
/// Example of getting amount of threads that this library uses in [`write`](Self::write):
///
/// ```
/// # use std::num::NonZeroUsize;
/// # use mtzip::ZipArchive;
/// # let mut zipper = ZipArchive::new();
/// let threads = std::thread::available_parallelism()
/// .map(NonZeroUsize::get)
/// .unwrap_or(1);
///
/// zipper.compress_with_threads(threads);
/// ```
#[inline]
pub fn write_with_threads<W: Write + Seek>(&mut self, writer: &mut W, threads: usize) -> std::io::Result<()> {
if !self.jobs_queue.is_empty() {
self.compress_with_consumer(threads, |zip_data, rx| zip_data.write(writer, rx))
} else {
self.data.write(writer, std::iter::empty())
}
}
/// Starts the compression jobs and passes teh mpsc receiver to teh consumer function, which
/// might either store the data in [`ZipData`] - [`Self::compress_with_threads`]; or write the
/// zip data as soon as it's available - [`Self::write_with_threads`]
fn compress_with_consumer<F, T>(&mut self, threads: usize, consumer: F) -> T
where
F: FnOnce(&mut ZipData, mpsc::Receiver<ZipFile>) -> T,
{
let jobs_drain = Mutex::new(self.jobs_queue.drain(..));
let jobs_drain_ref = &jobs_drain;
std::thread::scope(|s| {
let rx = {
let (tx, rx) = mpsc::channel();
for _ in 0..threads {
let thread_tx = tx.clone();
s.spawn(move || loop {
let next_job = jobs_drain_ref.lock().unwrap().next_back();
if let Some(job) = next_job {
thread_tx.send(job.into_file().unwrap()).unwrap();
} else {
break;
}
});
}
rx
};
consumer(&mut self.data, rx)
})
}
fn get_threads() -> usize {
std::thread::available_parallelism().map(NonZeroUsize::get).unwrap_or(1)
}
}
impl ZipArchive<'_> {
/// Compress contents and use rayon for parallelism.
///
/// Uses whatever thread pool this function is executed in.
///
/// If you want to limit the amount of threads to be used, use
/// [`rayon::ThreadPoolBuilder::num_threads`] and either set it as a global pool, or
/// [`rayon::ThreadPool::install`] the call to this method in it.
pub fn compress_with_rayon(&mut self) {
if !self.jobs_queue.is_empty() {
let files_par_iter = self.jobs_queue.par_drain(..).map(|job| job.into_file().unwrap());
self.data.files.par_extend(files_par_iter)
}
}
/// Write the contents to a writer.
///
/// This method uses teh same thread logic as [`Self::compress_with_rayon`], refer to its
/// documentation for details on how to control the parallelism and thread allocation.
pub fn write_with_rayon<W: Write + Seek + Send>(&mut self, writer: &mut W) -> std::io::Result<()> {
if !self.jobs_queue.is_empty() {
let files_par_iter = self.jobs_queue.par_drain(..).map(|job| job.into_file().unwrap());
self.data.write_rayon(writer, files_par_iter)
} else {
self.data.write_rayon(writer, rayon::iter::empty())
}
}
}

View File

@@ -0,0 +1,96 @@
//! Platform-specific stuff
use std::fs::Metadata;
#[cfg(target_os = "windows")]
/// OS - Windows, id 11 per Info-Zip spec
/// Specification version 6.2
pub(crate) const VERSION_MADE_BY: u16 = (11 << 8) + 62;
#[cfg(target_os = "macos")]
/// OS - MacOS darwin, id 19
/// Specification version 6.2
pub(crate) const VERSION_MADE_BY: u16 = (19 << 8) + 62;
#[cfg(not(any(target_os = "windows", target_os = "macos")))]
// Fallback
/// OS - Unix assumed, id 3
/// Specification version 6.2
pub(crate) const VERSION_MADE_BY: u16 = (3 << 8) + 62;
#[allow(dead_code)]
pub(crate) const DEFAULT_UNIX_FILE_ATTRS: u16 = 0o100644;
#[allow(dead_code)]
pub(crate) const DEFAULT_UNIX_DIR_ATTRS: u16 = 0o040755;
#[cfg(target_os = "windows")]
pub(crate) const DEFAULT_WINDOWS_FILE_ATTRS: u16 = 128;
#[cfg(target_os = "windows")]
pub(crate) const DEFAULT_WINDOWS_DIR_ATTRS: u16 = 16;
#[inline]
#[allow(dead_code)]
const fn convert_attrs(attrs: u32) -> u16 {
attrs as u16
}
pub(crate) fn attributes_from_fs(metadata: &Metadata) -> u16 {
#[cfg(target_os = "windows")]
{
use std::os::windows::fs::MetadataExt;
return convert_attrs(metadata.file_attributes());
}
#[cfg(target_os = "linux")]
{
use std::os::linux::fs::MetadataExt;
return convert_attrs(metadata.st_mode());
}
#[cfg(target_os = "macos")]
{
use std::os::darwin::fs::MetadataExt;
return convert_attrs(metadata.st_mode());
}
#[cfg(all(unix, not(target_os = "linux"), not(target_os = "macos")))]
{
use std::os::unix::fs::PermissionsExt;
return convert_attrs(metadata.permissions().mode());
}
#[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos", unix)))]
{
if metadata.is_dir() {
return DEFAULT_UNIX_DIR_ATTRS;
} else {
return DEFAULT_UNIX_FILE_ATTRS;
}
}
}
#[cfg(target_os = "windows")]
pub(crate) const fn default_file_attrs() -> u16 {
DEFAULT_WINDOWS_FILE_ATTRS
}
#[cfg(not(windows))]
pub(crate) const fn default_file_attrs() -> u16 {
DEFAULT_UNIX_FILE_ATTRS
}
#[cfg(target_os = "windows")]
pub(crate) const fn default_dir_attrs() -> u16 {
DEFAULT_WINDOWS_DIR_ATTRS
}
#[cfg(any(target_os = "linux", unix))]
#[cfg(not(target_os = "windows"))]
pub(crate) const fn default_dir_attrs() -> u16 {
DEFAULT_UNIX_DIR_ATTRS
}
#[cfg(not(any(target_os = "windows", target_os = "linux", unix)))]
pub(crate) const fn default_dir_attrs() -> u16 {
0
}

View File

@@ -0,0 +1,156 @@
use std::io::{Seek, Write};
use std::sync::Mutex;
use rayon::prelude::*;
use super::file::{ZipFile, ZipFileNoData};
const END_OF_CENTRAL_DIR_SIGNATURE: u32 = 0x06054B50;
#[derive(Debug, Default)]
pub struct ZipData {
pub files: Vec<ZipFile>,
}
impl ZipData {
pub fn write<W: Write + Seek, I: IntoIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_file_iter: I,
) -> std::io::Result<()> {
let zip_files = self.write_files_contained_and_iter(buf, zip_file_iter)?;
let files_amount = super::files_amount_u16(&zip_files);
let central_dir_offset = super::stream_position_u32(buf)?;
self.write_central_dir(zip_files, buf)?;
let central_dir_start = super::stream_position_u32(buf)?;
self.write_end_of_central_directory(
buf,
central_dir_offset,
central_dir_start,
files_amount,
)
}
pub fn write_rayon<W: Write + Seek + Send, I: ParallelIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_file_iter: I,
) -> std::io::Result<()> {
let zip_files = self.write_files_contained_and_par_iter(buf, zip_file_iter)?;
let files_amount = super::files_amount_u16(&zip_files);
let central_dir_offset = super::stream_position_u32(buf)?;
self.write_central_dir(zip_files, buf)?;
let central_dir_start = super::stream_position_u32(buf)?;
self.write_end_of_central_directory(
buf,
central_dir_offset,
central_dir_start,
files_amount,
)
}
#[inline]
fn write_files_contained_and_iter<W: Write + Seek, I: IntoIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_files_iter: I,
) -> std::io::Result<Vec<ZipFileNoData>> {
let zip_files = std::mem::take(&mut self.files);
self.write_files_iter(buf, zip_files.into_iter().chain(zip_files_iter))
}
#[inline]
pub fn write_files_contained_and_par_iter<
W: Write + Seek + Send,
I: ParallelIterator<Item = ZipFile>,
>(
&mut self,
buf: &mut W,
zip_files_iter: I,
) -> std::io::Result<Vec<ZipFileNoData>> {
let zip_files = std::mem::take(&mut self.files);
self.write_files_par_iter(buf, zip_files.into_par_iter().chain(zip_files_iter))
}
pub fn write_files_iter<W: Write + Seek, I: IntoIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_files: I,
) -> std::io::Result<Vec<ZipFileNoData>> {
zip_files
.into_iter()
.map(|zipfile| zipfile.write_local_file_header_with_data_consuming(buf))
.collect::<std::io::Result<Vec<_>>>()
}
pub fn write_files_par_iter<W: Write + Seek + Send, I: ParallelIterator<Item = ZipFile>>(
&mut self,
buf: &mut W,
zip_files: I,
) -> std::io::Result<Vec<ZipFileNoData>> {
let buf = Mutex::new(buf);
zip_files
.map(|zipfile| {
let mut buf_lock = buf.lock().unwrap();
zipfile.write_local_file_header_with_data_consuming(*buf_lock)
})
.collect::<std::io::Result<Vec<_>>>()
}
fn write_central_dir<W: Write, I: IntoIterator<Item = ZipFileNoData>>(
&self,
zip_files: I,
buf: &mut W,
) -> std::io::Result<()> {
zip_files
.into_iter()
.try_for_each(|zip_file| zip_file.write_central_directory_entry(buf))
}
const FOOTER_LENGTH: usize = 22;
fn write_end_of_central_directory<W: Write>(
&self,
buf: &mut W,
central_dir_offset: u32,
central_dir_start: u32,
files_amount: u16,
) -> std::io::Result<()> {
// Temporary in-memory statically sized array
let mut central_dir = [0; Self::FOOTER_LENGTH];
{
let mut central_dir_buf: &mut [u8] = &mut central_dir;
// Signature
central_dir_buf.write_all(&END_OF_CENTRAL_DIR_SIGNATURE.to_le_bytes())?;
// number of this disk
central_dir_buf.write_all(&0_u16.to_le_bytes())?;
// number of the disk with start
central_dir_buf.write_all(&0_u16.to_le_bytes())?;
// Number of entries on this disk
central_dir_buf.write_all(&files_amount.to_le_bytes())?;
// Number of entries
central_dir_buf.write_all(&files_amount.to_le_bytes())?;
// Central dir size
central_dir_buf.write_all(&(central_dir_start - central_dir_offset).to_le_bytes())?;
// Central dir offset
central_dir_buf.write_all(&central_dir_offset.to_le_bytes())?;
// Comment length
central_dir_buf.write_all(&0_u16.to_le_bytes())?;
}
buf.write_all(&central_dir)?;
Ok(())
}
}

View File

@@ -0,0 +1,273 @@
//! ZIP file extra field
use std::{fs::Metadata, io::Write};
/// This is a structure containing [`ExtraField`]s associated with a file or directory in a zip
/// file, mostly used for filesystem properties, and this is the only functionality implemented
/// here.
///
/// The [`new_from_fs`](Self::new_from_fs) method will use the metadata the filesystem provides to
/// construct the collection.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct ExtraFields {
pub(crate) values: Vec<ExtraField>,
}
impl Extend<ExtraField> for ExtraFields {
fn extend<T: IntoIterator<Item = ExtraField>>(&mut self, iter: T) {
self.values.extend(iter)
}
}
impl IntoIterator for ExtraFields {
type Item = <Vec<ExtraField> as IntoIterator>::Item;
type IntoIter = <Vec<ExtraField> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.values.into_iter()
}
}
impl ExtraFields {
/// Create a new set of [`ExtraField`]s. [`Self::new_from_fs`] should be preferred.
///
/// # Safety
///
/// All fields must have valid values depending on the field type.
pub unsafe fn new<I>(fields: I) -> Self
where
I: IntoIterator<Item = ExtraField>,
{
Self { values: fields.into_iter().collect() }
}
/// This method will use the filesystem metadata to get the properties that can be stored in
/// ZIP [`ExtraFields`].
///
/// The behavior is dependent on the target platform. Will return an empty set if the target os
/// is not Windows or Linux and not of UNIX family.
pub fn new_from_fs(metadata: &Metadata) -> Self {
#[cfg(target_os = "windows")]
{
return Self::new_windows(metadata);
}
#[cfg(target_os = "linux")]
{
return Self::new_linux(metadata);
}
#[cfg(all(unix, not(target_os = "linux")))]
{
return Self::new_unix(metadata);
}
}
#[cfg(target_os = "linux")]
fn new_linux(metadata: &Metadata) -> Self {
use std::os::linux::fs::MetadataExt;
let mod_time = Some(metadata.st_mtime() as i32);
let ac_time = Some(metadata.st_atime() as i32);
let cr_time = Some(metadata.st_ctime() as i32);
let uid = metadata.st_uid();
let gid = metadata.st_gid();
Self { values: vec![ExtraField::UnixExtendedTimestamp { mod_time, ac_time, cr_time }, ExtraField::UnixAttrs { uid, gid }] }
}
#[cfg(all(unix, not(target_os = "linux")))]
#[allow(dead_code)]
fn new_unix(metadata: &Metadata) -> Self {
use std::os::unix::fs::MetadataExt;
let mod_time = Some(metadata.mtime() as i32);
let ac_time = Some(metadata.atime() as i32);
let cr_time = Some(metadata.ctime() as i32);
let uid = metadata.uid();
let gid = metadata.gid();
Self { values: vec![ExtraField::UnixExtendedTimestamp { mod_time, ac_time, cr_time }, ExtraField::UnixAttrs { uid, gid }] }
}
#[cfg(target_os = "windows")]
fn new_windows(metadata: &Metadata) -> Self {
use std::os::windows::fs::MetadataExt;
let mtime = metadata.last_write_time();
let atime = metadata.last_access_time();
let ctime = metadata.creation_time();
Self { values: vec![ExtraField::Ntfs { mtime, atime, ctime }] }
}
pub(crate) fn data_length<const CENTRAL_HEADER: bool>(&self) -> u16 {
self.values.iter().map(|f| 4 + f.field_size::<CENTRAL_HEADER>()).sum()
}
pub(crate) fn write<W: Write, const CENTRAL_HEADER: bool>(&self, writer: &mut W) -> std::io::Result<()> {
for field in &self.values {
field.write::<_, CENTRAL_HEADER>(writer)?;
}
Ok(())
}
}
/// Extra data that can be associated with a file or directory.
///
/// This library only implements the filesystem properties in NTFS and UNIX format.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExtraField {
/// NTFS file properties.
Ntfs {
/// Last modification timestamp
mtime: u64,
/// Last access timestamp
atime: u64,
/// File/directory creation timestamp
ctime: u64,
},
/// Info-Zip extended unix timestamp. Each part is optional by definition, but will be
/// populated by [`ExtraFields::new_from_fs`].
UnixExtendedTimestamp {
/// Last modification timestamp
mod_time: Option<i32>,
/// Last access timestamp
ac_time: Option<i32>,
/// Creation timestamp
cr_time: Option<i32>,
},
/// UNIX file/directory attributes defined by Info-Zip.
UnixAttrs {
/// UID of the owner
uid: u32,
/// GID of the group
gid: u32,
},
}
const MOD_TIME_PRESENT: u8 = 1;
const AC_TIME_PRESENT: u8 = 1 << 1;
const CR_TIME_PRESENT: u8 = 1 << 2;
impl ExtraField {
#[inline]
fn header_id(&self) -> u16 {
match self {
Self::Ntfs { mtime: _, atime: _, ctime: _ } => 0x000a,
Self::UnixExtendedTimestamp { mod_time: _, ac_time: _, cr_time: _ } => 0x5455,
Self::UnixAttrs { uid: _, gid: _ } => 0x7875,
}
}
#[inline]
const fn optional_field_size<T: Sized>(field: &Option<T>) -> u16 {
match field {
Some(_) => std::mem::size_of::<T>() as u16,
None => 0,
}
}
#[inline]
const fn field_size<const CENTRAL_HEADER: bool>(&self) -> u16 {
match self {
Self::Ntfs { mtime: _, atime: _, ctime: _ } => 32,
Self::UnixExtendedTimestamp { mod_time, ac_time, cr_time } => {
1 + Self::optional_field_size(mod_time) + {
if !CENTRAL_HEADER {
Self::optional_field_size(ac_time) + Self::optional_field_size(cr_time)
} else {
0
}
}
}
Self::UnixAttrs { uid: _, gid: _ } => 11,
}
}
#[inline]
const fn if_present(val: Option<i32>, if_present: u8) -> u8 {
match val {
Some(_) => if_present,
None => 0,
}
}
const NTFS_FIELD_LEN: usize = 32;
const UNIX_ATTRS_LEN: usize = 11;
pub(crate) fn write<W: Write, const CENTRAL_HEADER: bool>(self, writer: &mut W) -> std::io::Result<()> {
// Header ID
writer.write_all(&self.header_id().to_le_bytes())?;
// Field data size
writer.write_all(&self.field_size::<CENTRAL_HEADER>().to_le_bytes())?;
match self {
Self::Ntfs { mtime, atime, ctime } => {
// Writing to a temporary in-memory array
let mut field = [0; Self::NTFS_FIELD_LEN];
{
let mut field_buf: &mut [u8] = &mut field;
// Reserved field
field_buf.write_all(&0_u32.to_le_bytes())?;
// Tag1 number
field_buf.write_all(&1_u16.to_le_bytes())?;
// Tag1 size
field_buf.write_all(&24_u16.to_le_bytes())?;
// Mtime
field_buf.write_all(&mtime.to_le_bytes())?;
// Atime
field_buf.write_all(&atime.to_le_bytes())?;
// Ctime
field_buf.write_all(&ctime.to_le_bytes())?;
}
writer.write_all(&field)?;
}
Self::UnixExtendedTimestamp { mod_time, ac_time, cr_time } => {
let flags = Self::if_present(mod_time, MOD_TIME_PRESENT)
| Self::if_present(ac_time, AC_TIME_PRESENT)
| Self::if_present(cr_time, CR_TIME_PRESENT);
writer.write_all(&[flags])?;
if let Some(mod_time) = mod_time {
writer.write_all(&mod_time.to_le_bytes())?;
}
if !CENTRAL_HEADER {
if let Some(ac_time) = ac_time {
writer.write_all(&ac_time.to_le_bytes())?;
}
if let Some(cr_time) = cr_time {
writer.write_all(&cr_time.to_le_bytes())?;
}
}
}
Self::UnixAttrs { uid, gid } => {
// Writing to a temporary in-memory array
let mut field = [0; Self::UNIX_ATTRS_LEN];
{
let mut field_buf: &mut [u8] = &mut field;
// Version of the field
field_buf.write_all(&[1])?;
// UID size
field_buf.write_all(&[4])?;
// UID
field_buf.write_all(&uid.to_le_bytes())?;
// GID size
field_buf.write_all(&[4])?;
// GID
field_buf.write_all(&gid.to_le_bytes())?;
}
writer.write_all(&field)?;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,201 @@
use std::io::{Seek, Write};
use super::extra_field::ExtraFields;
use super::super::{CompressionType, platform::VERSION_MADE_BY};
const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034B50;
const CENTRAL_FILE_HEADER_SIGNATURE: u32 = 0x02014B50;
const VERSION_NEEDED_TO_EXTRACT: u16 = 20;
/// Set bit 11 to indicate that the file names are in UTF-8, because all strings in rust are valid
/// UTF-8
const GENERAL_PURPOSE_BIT_FLAG: u16 = 1 << 11;
#[derive(Debug)]
pub struct ZipFile {
pub header: ZipFileHeader,
pub data: Vec<u8>,
}
#[derive(Debug)]
pub struct ZipFileHeader {
pub compression_type: CompressionType,
pub crc: u32,
pub uncompressed_size: u32,
pub filename: String,
pub file_comment: Option<String>,
pub external_file_attributes: u32,
pub extra_fields: ExtraFields,
}
#[derive(Debug)]
pub struct ZipFileNoData {
pub header: ZipFileHeader,
pub local_header_offset: u32,
pub compressed_size: u32,
}
impl ZipFile {
pub fn write_local_file_header_with_data_consuming<W: Write + Seek>(
self,
buf: &mut W,
) -> std::io::Result<ZipFileNoData> {
let local_header_offset = super::stream_position_u32(buf)?;
self.write_local_file_header_and_data(buf)?;
let Self { header, data } = self;
Ok(ZipFileNoData {
header,
local_header_offset,
compressed_size: data.len() as u32,
})
}
const LOCAL_FILE_HEADER_LEN: usize = 30;
pub fn write_local_file_header_and_data<W: Write>(&self, buf: &mut W) -> std::io::Result<()> {
// Writing to a temporary in-memory statically sized array first
let mut header = [0; Self::LOCAL_FILE_HEADER_LEN];
{
let mut header_buf: &mut [u8] = &mut header;
// signature
header_buf.write_all(&LOCAL_FILE_HEADER_SIGNATURE.to_le_bytes())?;
// version needed to extract
header_buf.write_all(&VERSION_NEEDED_TO_EXTRACT.to_le_bytes())?;
// general purpose bit flag
header_buf.write_all(&GENERAL_PURPOSE_BIT_FLAG.to_le_bytes())?;
// compression type
header_buf.write_all(&(self.header.compression_type as u16).to_le_bytes())?;
// Last modification time // moved to extra fields
header_buf.write_all(&0_u16.to_le_bytes())?;
// Last modification date // moved to extra fields
header_buf.write_all(&0_u16.to_le_bytes())?;
// crc
header_buf.write_all(&self.header.crc.to_le_bytes())?;
// Compressed size
debug_assert!(self.data.len() <= u32::MAX as usize);
header_buf.write_all(&(self.data.len() as u32).to_le_bytes())?;
// Uncompressed size
header_buf.write_all(&self.header.uncompressed_size.to_le_bytes())?;
// Filename size
debug_assert!(self.header.filename.len() <= u16::MAX as usize);
header_buf.write_all(&(self.header.filename.len() as u16).to_le_bytes())?;
// extra field size
header_buf.write_all(
&self
.header
.extra_fields
.data_length::<false>()
.to_le_bytes(),
)?;
}
buf.write_all(&header)?;
// Filename
buf.write_all(self.header.filename.as_bytes())?;
// Extra field
self.header.extra_fields.write::<_, false>(buf)?;
// Data
buf.write_all(&self.data)?;
Ok(())
}
#[inline]
pub fn directory(
mut name: String,
extra_fields: ExtraFields,
external_attributes: u16,
file_comment: Option<String>,
) -> Self {
if !(name.ends_with('/') || name.ends_with('\\')) {
name += "/"
};
Self {
header: ZipFileHeader {
compression_type: CompressionType::Stored,
crc: 0,
uncompressed_size: 0,
filename: name,
external_file_attributes: (external_attributes as u32) << 16,
extra_fields,
file_comment,
},
data: vec![],
}
}
}
impl ZipFileNoData {
const CENTRAL_DIR_ENTRY_LEN: usize = 46;
pub fn write_central_directory_entry<W: Write>(&self, buf: &mut W) -> std::io::Result<()> {
// Writing to a temporary in-memory statically sized array first
let mut central_dir_entry_header = [0; Self::CENTRAL_DIR_ENTRY_LEN];
{
let mut central_dir_entry_buf: &mut [u8] = &mut central_dir_entry_header;
// signature
central_dir_entry_buf.write_all(&CENTRAL_FILE_HEADER_SIGNATURE.to_le_bytes())?;
// version made by
central_dir_entry_buf.write_all(&VERSION_MADE_BY.to_le_bytes())?;
// version needed to extract
central_dir_entry_buf.write_all(&VERSION_NEEDED_TO_EXTRACT.to_le_bytes())?;
// general purpose bit flag
central_dir_entry_buf.write_all(&GENERAL_PURPOSE_BIT_FLAG.to_le_bytes())?;
// compression type
central_dir_entry_buf
.write_all(&(self.header.compression_type as u16).to_le_bytes())?;
// Last modification time // moved to extra fields
central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
// Last modification date // moved to extra fields
central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
// crc
central_dir_entry_buf.write_all(&self.header.crc.to_le_bytes())?;
// Compressed size
central_dir_entry_buf.write_all(&self.compressed_size.to_le_bytes())?;
// Uncompressed size
central_dir_entry_buf.write_all(&self.header.uncompressed_size.to_le_bytes())?;
// Filename size
debug_assert!(self.header.filename.len() <= u16::MAX as usize);
central_dir_entry_buf.write_all(&(self.header.filename.len() as u16).to_le_bytes())?;
// extra field size
central_dir_entry_buf
.write_all(&self.header.extra_fields.data_length::<true>().to_le_bytes())?;
// comment size
central_dir_entry_buf.write_all(
&(self
.header
.file_comment
.as_ref()
.map(|fc| fc.len())
.unwrap_or(0) as u16)
.to_le_bytes(),
)?;
// disk number start
central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
// internal file attributes
central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
// external file attributes
central_dir_entry_buf.write_all(&self.header.external_file_attributes.to_le_bytes())?;
// relative offset of local header
central_dir_entry_buf.write_all(&self.local_header_offset.to_le_bytes())?;
}
buf.write_all(&central_dir_entry_header)?;
// Filename
buf.write_all(self.header.filename.as_bytes())?;
// Extra field
self.header.extra_fields.write::<_, true>(buf)?;
// File comment
if let Some(file_comment) = &self.header.file_comment {
buf.write_all(file_comment.as_bytes())?;
}
Ok(())
}
}

View File

@@ -0,0 +1,179 @@
use std::{
borrow::Cow,
fs::File,
io::Read,
panic::{RefUnwindSafe, UnwindSafe},
path::Path,
};
use flate2::{CrcReader, read::DeflateEncoder};
use super::{extra_field::ExtraFields, file::ZipFile};
use super::super::{
CompressionType, level::CompressionLevel, platform::attributes_from_fs,
zip_archive_parts::file::ZipFileHeader,
};
pub enum ZipJobOrigin<'a> {
Directory,
Filesystem { path: Cow<'a, Path> },
RawData(Cow<'a, [u8]>),
Reader(Box<dyn Read + Send + Sync + UnwindSafe + RefUnwindSafe + 'a>),
}
impl core::fmt::Debug for ZipJobOrigin<'_> {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::Directory => f.write_str("Directory"),
Self::Filesystem { path } => f.debug_struct("Filesystem").field("path", &path).finish(),
Self::RawData(raw_data) => f.debug_tuple("RawData").field(&raw_data).finish(),
Self::Reader(_reader) => f.debug_tuple("Reader").finish_non_exhaustive(),
}
}
}
#[derive(Debug)]
struct FileDigest {
data: Vec<u8>,
uncompressed_size: u32,
crc: u32,
}
#[derive(Debug)]
pub struct ZipJob<'a> {
pub data_origin: ZipJobOrigin<'a>,
pub extra_fields: ExtraFields,
pub archive_path: String,
pub file_comment: Option<String>,
pub external_attributes: u16,
/// Ignored when [`data_origin`](Self::data_origin) is a [`ZipJobOrigin::Directory`]
pub compression_level: CompressionLevel,
/// Ignored when [`data_origin`](Self::data_origin) is a [`ZipJobOrigin::Directory`]
pub compression_type: CompressionType,
}
impl ZipJob<'_> {
fn compress_file<R: Read>(
source: R,
uncompressed_size_approx: Option<u32>,
compression_type: CompressionType,
compression_level: CompressionLevel,
) -> std::io::Result<FileDigest> {
let mut crc_reader = CrcReader::new(source);
let mut data = Vec::with_capacity(uncompressed_size_approx.unwrap_or(0) as usize);
let uncompressed_size = match compression_type {
CompressionType::Deflate => {
let mut encoder = DeflateEncoder::new(&mut crc_reader, compression_level.into());
encoder.read_to_end(&mut data)?;
encoder.total_in() as usize
}
CompressionType::Stored => crc_reader.read_to_end(&mut data)?,
};
debug_assert!(uncompressed_size <= u32::MAX as usize);
let uncompressed_size = uncompressed_size as u32;
data.shrink_to_fit();
let crc = crc_reader.crc().sum();
Ok(FileDigest {
data,
uncompressed_size,
crc,
})
}
pub fn into_file(self) -> std::io::Result<ZipFile> {
match self.data_origin {
ZipJobOrigin::Directory => Ok(ZipFile::directory(
self.archive_path,
self.extra_fields,
self.external_attributes,
self.file_comment,
)),
ZipJobOrigin::Filesystem { path } => {
let file = File::open(path).unwrap();
let file_metadata = file.metadata().unwrap();
let uncompressed_size_approx = file_metadata.len();
debug_assert!(uncompressed_size_approx <= u32::MAX.into());
let uncompressed_size_approx = uncompressed_size_approx as u32;
let external_file_attributes = attributes_from_fs(&file_metadata);
let mut extra_fields = ExtraFields::new_from_fs(&file_metadata);
extra_fields.extend(self.extra_fields);
let FileDigest {
data,
uncompressed_size,
crc,
} = Self::compress_file(
file,
Some(uncompressed_size_approx),
self.compression_type,
self.compression_level,
)?;
Ok(ZipFile {
header: ZipFileHeader {
compression_type: CompressionType::Deflate,
crc,
uncompressed_size,
filename: self.archive_path,
external_file_attributes: (external_file_attributes as u32) << 16,
extra_fields,
file_comment: self.file_comment,
},
data,
})
}
ZipJobOrigin::RawData(data) => {
let uncompressed_size_approx = data.len();
debug_assert!(uncompressed_size_approx <= u32::MAX as usize);
let uncompressed_size_approx = uncompressed_size_approx as u32;
let FileDigest {
data,
uncompressed_size,
crc,
} = Self::compress_file(
data.as_ref(),
Some(uncompressed_size_approx),
self.compression_type,
self.compression_level,
)?;
Ok(ZipFile {
header: ZipFileHeader {
compression_type: CompressionType::Deflate,
crc,
uncompressed_size,
filename: self.archive_path,
external_file_attributes: (self.external_attributes as u32) << 16,
extra_fields: self.extra_fields,
file_comment: self.file_comment,
},
data,
})
}
ZipJobOrigin::Reader(reader) => {
let FileDigest {
data,
uncompressed_size,
crc,
} = Self::compress_file(
reader,
None,
self.compression_type,
self.compression_level,
)?;
Ok(ZipFile {
header: ZipFileHeader {
compression_type: CompressionType::Deflate,
crc,
uncompressed_size,
filename: self.archive_path,
external_file_attributes: (self.external_attributes as u32) << 16,
extra_fields: self.extra_fields,
file_comment: self.file_comment,
},
data,
})
}
}
}
}

View File

@@ -0,0 +1,17 @@
pub mod data;
pub mod extra_field;
pub mod file;
pub mod job;
use std::io::Seek;
#[inline]
pub fn stream_position_u32<W: Seek>(buf: &mut W) -> std::io::Result<u32> {
let offset = buf.stream_position()?;
debug_assert!(offset <= u32::MAX.into());
Ok(offset as u32)
}
#[inline]
pub fn files_amount_u16<T>(files: &[T]) -> u16 {
let amount = files.len();
debug_assert!(amount <= u16::MAX as usize);
amount as u16
}

View File

@@ -0,0 +1,142 @@
// Copyright 2023 Google LLC
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::cmp::min;
/// A struct which can issue periodic updates indicating progress towards
/// an external total, based on updates towards an internal goal.
pub struct ProgressUpdater<F: Fn(u64)> {
callback: F,
internal_progress: u64,
per_update_internal: u64,
update_external_amount: u64,
external_updates_sent: u64,
remainder_external: u64,
internal_total: u64,
}
impl<F: Fn(u64)> ProgressUpdater<F> {
/// Create a new progress updater, with a callback to be called periodically.
pub fn new(callback: F, external_total: u64, internal_total: u64, per_update_internal: u64) -> Self {
let per_update_internal = min(internal_total, per_update_internal);
let total_updates_expected = if per_update_internal == 0 { 0 } else { internal_total / per_update_internal };
let (update_external_amount, remainder_external) = if total_updates_expected == 0 {
(0, external_total)
} else {
(external_total / total_updates_expected, external_total % total_updates_expected)
};
Self {
callback,
internal_progress: 0u64,
per_update_internal,
update_external_amount,
external_updates_sent: 0u64,
remainder_external,
internal_total,
}
}
/// Indicate some progress towards the internal goal. May call back the
/// external callback function to show some progress towards the external
/// goal.
pub fn progress(&mut self, amount_internal: u64) {
self.internal_progress += amount_internal;
self.send_due_updates();
}
fn send_due_updates(&mut self) {
let updates_due = if self.per_update_internal == 0 { 0 } else { self.internal_progress / self.per_update_internal };
while updates_due > self.external_updates_sent {
(self.callback)(self.update_external_amount);
self.external_updates_sent += 1;
}
}
/// Indicate completion of the task. Fully update the callback towards the
/// external state.
pub fn finish(&mut self) {
self.internal_progress = self.internal_total;
self.send_due_updates();
if self.remainder_external > 0 {
(self.callback)(self.remainder_external);
}
}
}
#[test]
fn test_progress_updater() {
let amount_received = std::rc::Rc::new(std::cell::RefCell::new(0u64));
let mut progresser = ProgressUpdater::new(
|progress| {
*(amount_received.borrow_mut()) += progress;
},
100,
1000,
100,
);
assert_eq!(*amount_received.borrow(), 0);
progresser.progress(1);
assert_eq!(*amount_received.borrow(), 0);
progresser.progress(100);
assert_eq!(*amount_received.borrow(), 10);
progresser.progress(800);
assert_eq!(*amount_received.borrow(), 90);
progresser.finish();
assert_eq!(*amount_received.borrow(), 100);
}
#[test]
fn test_progress_updater_zero_external() {
let amount_received = std::rc::Rc::new(std::cell::RefCell::new(0u64));
let mut progresser = ProgressUpdater::new(
|progress| {
*(amount_received.borrow_mut()) += progress;
},
0,
1000,
100,
);
assert_eq!(*amount_received.borrow(), 0);
progresser.progress(1);
progresser.progress(800);
progresser.finish();
assert_eq!(*amount_received.borrow(), 0);
}
#[test]
fn test_progress_updater_small_internal() {
let amount_received = std::rc::Rc::new(std::cell::RefCell::new(0u64));
let mut progresser = ProgressUpdater::new(
|progress| {
*(amount_received.borrow_mut()) += progress;
},
100,
5,
100,
);
assert_eq!(*amount_received.borrow(), 0);
progresser.progress(1);
progresser.finish();
assert_eq!(*amount_received.borrow(), 100);
}
#[test]
fn test_progress_updater_zero_internal() {
let amount_received = std::rc::Rc::new(std::cell::RefCell::new(0u64));
let mut progresser = ProgressUpdater::new(
|progress| {
*(amount_received.borrow_mut()) += progress;
},
100,
0,
100,
);
assert_eq!(*amount_received.borrow(), 0);
progresser.finish();
assert_eq!(*amount_received.borrow(), 100);
}

View File

@@ -0,0 +1,327 @@
// Copyright 2022 Google LLC
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::{
borrow::Cow,
fs::File,
io::{ErrorKind, Read, Seek, SeekFrom},
path::{Path, PathBuf},
sync::Mutex,
};
use anyhow::{Context, Result};
use rayon::prelude::*;
use zip::{read::ZipFile, ZipArchive};
use super::{cloneable_seekable_reader::CloneableSeekableReader, progress_updater::ProgressUpdater, UnzipProgressReporter};
pub(crate) fn determine_stream_len<R: Seek>(stream: &mut R) -> std::io::Result<u64> {
let old_pos = stream.stream_position()?;
let len = stream.seek(SeekFrom::End(0))?;
if old_pos != len {
stream.seek(SeekFrom::Start(old_pos))?;
}
Ok(len)
}
/// Options for unzipping.
pub struct UnzipOptions<'a, 'b> {
/// The destination directory.
pub output_directory: Option<PathBuf>,
/// Password if encrypted.
pub password: Option<String>,
/// Whether to run in single-threaded mode.
pub single_threaded: bool,
/// A filename filter, optionally
pub filename_filter: Option<Box<dyn FilenameFilter + Sync + 'a>>,
/// An object to receive notifications of unzip progress.
pub progress_reporter: Box<dyn UnzipProgressReporter + Sync + 'b>,
}
/// An object which can unzip a zip file, in its entirety, from a local
/// file or from a network stream. It tries to do this in parallel wherever
/// possible.
pub struct UnzipEngine {
zipfile: Box<dyn UnzipEngineImpl>,
compressed_length: u64,
directory_creator: DirectoryCreator,
}
/// Code which can determine whether to unzip a given filename.
pub trait FilenameFilter {
/// Returns true if the given filename should be unzipped.
fn should_unzip(&self, filename: &str) -> bool;
}
/// The underlying engine used by the unzipper. This is different
/// for files and URIs.
trait UnzipEngineImpl {
fn unzip(&mut self, options: UnzipOptions, directory_creator: &DirectoryCreator) -> Vec<anyhow::Error>;
// Due to lack of RPITIT we'll return a Vec<String> here
fn list(&self) -> Result<Vec<String>, anyhow::Error>;
}
/// Engine which knows how to unzip a file.
#[derive(Clone)]
struct UnzipFileEngine(ZipArchive<CloneableSeekableReader<File>>);
impl UnzipEngineImpl for UnzipFileEngine {
fn unzip(&mut self, options: UnzipOptions, directory_creator: &DirectoryCreator) -> Vec<anyhow::Error> {
unzip_serial_or_parallel(self.0.len(), options, directory_creator, || self.0.clone(), || {})
}
fn list(&self) -> Result<Vec<String>, anyhow::Error> {
list(&self.0)
}
}
impl UnzipEngine {
/// Create an unzip engine which knows how to unzip a file.
pub fn for_file(mut zipfile: File) -> Result<Self> {
// The following line doesn't actually seem to make any significant
// performance difference.
// let zipfile = BufReader::new(zipfile);
let compressed_length = determine_stream_len(&mut zipfile)?;
let zipfile = CloneableSeekableReader::new(zipfile);
Ok(Self {
zipfile: Box::new(UnzipFileEngine(ZipArchive::new(zipfile)?)),
compressed_length,
directory_creator: DirectoryCreator::default(),
})
}
/// The total compressed length that we expect to retrieve over
/// the network or from the compressed file.
pub fn zip_length(&self) -> u64 {
self.compressed_length
}
// Perform the unzip.
pub fn unzip(mut self, options: UnzipOptions) -> Result<()> {
log::debug!("Starting extract");
options.progress_reporter.total_bytes_expected(self.compressed_length);
let errors = self.zipfile.unzip(options, &self.directory_creator);
// Return the first error code, if any.
errors.into_iter().next().map(Result::Err).unwrap_or(Ok(()))
}
/// List the filenames in the archive
pub fn list(self) -> Result<impl Iterator<Item = String>> {
// In future this might be a more dynamic iterator type.
self.zipfile.list().map(|mut v| {
// Names are returned in a HashMap iteration order so let's
// sort thme to be more reasonable
v.sort();
v.into_iter()
})
}
}
/// Return a list of filenames from the zip. For now this is infallible
/// but provide the option of an error code in case we do something
/// smarter in future.
fn list<'a, T: Read + Seek + 'a>(zip_archive: &ZipArchive<T>) -> Result<Vec<String>> {
Ok(zip_archive.file_names().map(|s| s.to_string()).collect())
}
fn unzip_serial_or_parallel<'a, T: Read + Seek + 'a>(
len: usize,
options: UnzipOptions,
directory_creator: &DirectoryCreator,
get_ziparchive_clone: impl Fn() -> ZipArchive<T> + Sync,
// Call when a file is going to be skipped
file_skip_callback: impl Fn() + Sync + Send + Clone,
) -> Vec<anyhow::Error> {
let progress_reporter: &dyn UnzipProgressReporter = options.progress_reporter.as_ref();
match (options.filename_filter, options.single_threaded) {
(None, true) => (0..len)
.map(|i| {
extract_file_by_index(
&get_ziparchive_clone,
i,
&options.output_directory,
&options.password,
progress_reporter,
directory_creator,
)
})
.filter_map(Result::err)
.collect(),
(None, false) => {
// We use par_bridge here rather than into_par_iter because it turns
// out to better preserve ordering of the IDs in the input range,
// i.e. we're more likely to ask our initial threads to act upon
// file IDs 0, 1, 2, 3, 4, 5 rather than 0, 1000, 2000, 3000 etc.
// On a device which is CPU-bound or IO-bound (rather than network
// bound) that's beneficial because we can start to decompress
// and write data to disk as soon as it arrives from the network.
(0..len)
.par_bridge()
.map(|i| {
extract_file_by_index(
&get_ziparchive_clone,
i,
&options.output_directory,
&options.password,
progress_reporter,
directory_creator,
)
})
.filter_map(Result::err)
.collect()
}
(Some(filename_filter), single_threaded) => {
// If we have a filename filter, an easy thing would be to
// iterate through each file index as above, and check to see if its
// name matches. Unfortunately, that seeks all over the place
// to get the filename from the local header.
// Instead, let's get a list of the filenames we need
// and request them from the zip library directly.
// As we can't predict their order in the file, this may involve
// arbitrary rewinds, so let's do it single-threaded.
if !single_threaded {
log::warn!("Unzipping specific files - assuming --single-threaded since we currently cannot unzip specific files in a multi-threaded mode. If you need that, consider launching multiple copies of ripunzip in parallel.");
}
let mut filenames: Vec<_> = get_ziparchive_clone()
.file_names()
.filter(|name| filename_filter.as_ref().should_unzip(name))
.map(|s| s.to_string())
.collect();
// The filenames returned by the file_names() method above are in
// HashMap iteration order (i.e. random). To avoid creating lots
// of HTTPS streams for files which are nearby each other in the
// zip, we'd ideally extract them in order of file position.
// We have no way of knowing file position (without iterating the
// whole file) so instead let's sort them and hope that files were
// zipped in alphabetical order, or close to it. If we're wrong,
// we'll just end up rewinding, that is, creating extra redundant
// HTTP(S) streams.
filenames.sort();
log::info!("Will unzip {} matching filenames", filenames.len());
file_skip_callback();
// let progress_reporter: &dyn UnzipProgressReporter = options.progress_reporter.as_ref();
filenames
.into_iter()
.map(|name| {
let myzip: &mut zip::ZipArchive<T> = &mut get_ziparchive_clone();
let file: ZipFile<T> = match &options.password {
None => myzip.by_name(&name)?,
Some(string) => myzip.by_name_decrypt(&name, string.as_bytes())?,
};
let r = extract_file(file, &options.output_directory, progress_reporter, directory_creator);
file_skip_callback();
r
})
.filter_map(Result::err)
.collect()
}
}
}
fn extract_file_by_index<'a, T: Read + Seek + 'a>(
get_ziparchive_clone: impl Fn() -> ZipArchive<T> + Sync,
i: usize,
output_directory: &Option<PathBuf>,
password: &Option<String>,
progress_reporter: &dyn UnzipProgressReporter,
directory_creator: &DirectoryCreator,
) -> Result<(), anyhow::Error> {
let myzip: &mut zip::ZipArchive<T> = &mut get_ziparchive_clone();
let file: ZipFile<T> = match password {
None => myzip.by_index(i)?,
Some(string) => myzip.by_index_decrypt(i, string.as_bytes())?,
};
extract_file(file, output_directory, progress_reporter, directory_creator)
}
fn extract_file<R: Read>(
file: ZipFile<R>,
output_directory: &Option<PathBuf>,
progress_reporter: &dyn UnzipProgressReporter,
directory_creator: &DirectoryCreator,
) -> Result<(), anyhow::Error> {
let name = file.enclosed_name().as_deref().map(Path::to_string_lossy).unwrap_or_else(|| Cow::Borrowed("<unprintable>")).to_string();
extract_file_inner(file, output_directory, progress_reporter, directory_creator).with_context(|| format!("Failed to extract {name}"))
}
/// Extracts a file from a zip file.
fn extract_file_inner<R: Read>(
mut file: ZipFile<R>,
output_directory: &Option<PathBuf>,
progress_reporter: &dyn UnzipProgressReporter,
directory_creator: &DirectoryCreator,
) -> Result<()> {
let name = file.enclosed_name().ok_or_else(|| std::io::Error::new(ErrorKind::Unsupported, "path not safe to extract"))?;
let display_name = name.display().to_string();
let out_path = match output_directory {
Some(output_directory) => output_directory.join(name),
None => name,
};
progress_reporter.extraction_starting(&display_name);
log::debug!("Start extract of file at {:x}, length {:x}, name {}", file.data_start(), file.compressed_size(), display_name);
if file.name().ends_with('/') {
directory_creator.create_dir_all(&out_path)?;
} else {
if let Some(parent) = out_path.parent() {
directory_creator.create_dir_all(parent)?;
}
let out_file = File::create(&out_path).with_context(|| "Failed to create file")?;
// Progress bar strategy. The overall progress across the entire zip file must be
// denoted in terms of *compressed* bytes, since at the outset we don't know the uncompressed
// size of each file. Yet, within a given file, we update progress based on the bytes
// of uncompressed data written, once per 1MB, because that's the information that we happen
// to have available. So, calculate how many compressed bytes relate to 1MB of uncompressed
// data, and the remainder.
let uncompressed_size = file.size();
let compressed_size = file.compressed_size();
let mut progress_updater = ProgressUpdater::new(
|external_progress| {
progress_reporter.bytes_extracted(external_progress);
},
compressed_size,
uncompressed_size,
1024 * 1024,
);
let mut out_file = progress_streams::ProgressWriter::new(out_file, |bytes_written| progress_updater.progress(bytes_written as u64));
// Using a BufWriter here doesn't improve performance even on a VM with
// spinny disks.
std::io::copy(&mut file, &mut out_file).with_context(|| "Failed to write directory")?;
progress_updater.finish();
}
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Some(mode) = file.unix_mode() {
std::fs::set_permissions(&out_path, std::fs::Permissions::from_mode(mode)).with_context(|| "Failed to set permissions")?;
}
}
log::debug!("Finished extract of file at {:x}, length {:x}, name {}", file.data_start(), file.compressed_size(), display_name);
progress_reporter.extraction_finished(&display_name);
Ok(())
}
/// An engine used to ensure we don't conflict in creating directories
/// between threads
#[derive(Default)]
struct DirectoryCreator(Mutex<()>);
impl DirectoryCreator {
fn create_dir_all(&self, path: &Path) -> Result<()> {
// Fast path - avoid locking if the directory exists
if path.exists() {
return Ok(());
}
let _exclusivity = self.0.lock().unwrap();
if path.exists() {
return Ok(());
}
std::fs::create_dir_all(path).with_context(|| "Failed to create directory")
}
}

View File

@@ -1,5 +1,6 @@
pub mod runtime_arch;
pub mod cli_host;
pub mod fastzip;
mod dialogs_const;
mod dialogs_common;

View File

@@ -78,7 +78,7 @@ fn check_arch_windows() -> Option<RuntimeArch> {
#[cfg(target_os = "windows")]
type IsWow64Process2Fn = unsafe extern "system" fn(
hProcess: windows::Win32::Foundation::HANDLE,
hprocess: windows::Win32::Foundation::HANDLE,
pprocessmachine: *mut windows::Win32::System::SystemInformation::IMAGE_FILE_MACHINE,
pnativemachine: *mut windows::Win32::System::SystemInformation::IMAGE_FILE_MACHINE,
) -> windows::core::BOOL;

View File

@@ -3,7 +3,7 @@ use windows::Win32::System::LibraryLoader::LOAD_LIBRARY_SEARCH_SYSTEM32;
use windows::Win32::System::LibraryLoader::LOAD_LIBRARY_FLAGS;
#[cfg(target_os = "windows")]
type SetDefaultDllDirectoriesFn = unsafe extern "system" fn(DirectoryFlags: u32) -> BOOL;
type SetDefaultDllDirectoriesFn = unsafe extern "system" fn(directory_flags: u32) -> BOOL;
#[cfg(target_os = "windows")]
unsafe fn set_default_dll_directories(flags: LOAD_LIBRARY_FLAGS) {