From decb820cf9a1f27acb871f8cae30e8df2a8e9cc6 Mon Sep 17 00:00:00 2001 From: MihailRis Date: Sat, 22 Feb 2025 05:47:27 +0300 Subject: [PATCH 01/12] add memory_istream --- src/io/memory_istream.hpp | 34 ++++++++++++++++++++++++++++++++++ test/io/memory_istream.cpp | 19 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 src/io/memory_istream.hpp create mode 100644 test/io/memory_istream.cpp diff --git a/src/io/memory_istream.hpp b/src/io/memory_istream.hpp new file mode 100644 index 00000000..09f6f418 --- /dev/null +++ b/src/io/memory_istream.hpp @@ -0,0 +1,34 @@ +#pragma once + +#include +#include "util/Buffer.hpp" + +class memory_istream : public std::istream { +public: + explicit memory_istream(util::Buffer buffer) + : std::istream(&buf), buf(std::move(buffer)) {} + +private: + class memory_streambuf : public std::streambuf { + public: + explicit memory_streambuf(util::Buffer buffer) + : buffer(std::move(buffer)) { + char* base = this->buffer.data(); + char* end = base + this->buffer.size(); + setg(base, base, end); + } + + memory_streambuf(const memory_streambuf&) = delete; + memory_streambuf& operator=(const memory_streambuf&) = delete; + + protected: + int_type underflow() override { + return traits_type::eof(); + } + + private: + util::Buffer buffer; + }; + + memory_streambuf buf; +}; diff --git a/test/io/memory_istream.cpp b/test/io/memory_istream.cpp new file mode 100644 index 00000000..faabd23c --- /dev/null +++ b/test/io/memory_istream.cpp @@ -0,0 +1,19 @@ +#include + +#include "io/memory_istream.hpp" + +TEST(io, memory_istream) { + const char data[] = "Hello, world!"; + const int n = std::strlen(data); + + util::Buffer buffer(data, n); + memory_istream stream(std::move(buffer)); + + ASSERT_TRUE(stream.good()); + + std::string text(n, '\0'); + stream.read(text.data(), n); + ASSERT_EQ(text, std::string(data)); + stream.read(text.data(), 1); + ASSERT_TRUE(stream.eof()); +} From 7f4b074d70b3c3869d8d183a5ddca0e076f91d58 Mon Sep 17 00:00:00 2001 From: MihailRis Date: Sat, 22 Feb 2025 05:48:48 +0300 Subject: [PATCH 02/12] add ZipFileDevice (WIP) --- src/io/devices/ZipFileDevice.cpp | 175 +++++++++++++++++++++++++++++++ src/io/devices/ZipFileDevice.hpp | 48 +++++++++ 2 files changed, 223 insertions(+) create mode 100644 src/io/devices/ZipFileDevice.cpp create mode 100644 src/io/devices/ZipFileDevice.hpp diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp new file mode 100644 index 00000000..79e3e006 --- /dev/null +++ b/src/io/devices/ZipFileDevice.cpp @@ -0,0 +1,175 @@ +#include "ZipFileDevice.hpp" + +#include +#include "debug/Logger.hpp" +#include "util/data_io.hpp" + +static debug::Logger logger("zip-file"); + +using namespace io; + +static constexpr uint32_t EOCD_SIGNATURE = 0x06054b50; +static constexpr uint32_t CENTRAL_DIR_SIGNATURE = 0x02014b50; +static constexpr uint32_t LOCAL_FILE_SIGNATURE = 0x04034b50; + +template +static T read_int(std::unique_ptr& file) { + T value = 0; + file->read(reinterpret_cast(&value), sizeof(value)); + return dataio::le2h(value); +} + +template +static void read_int(std::unique_ptr& file, T& value) { + file->read(reinterpret_cast(&value), sizeof(value)); + value = dataio::le2h(value); +} + +ZipFileDevice::Entry ZipFileDevice::readEntry() { + // Read entry info + Entry entry {}; + read_int(file, entry.versionMadeBy); + read_int(file, entry.versionNeeded); + read_int(file, entry.flags); + read_int(file, entry.compressionMethod); + read_int(file, entry.modTime); + read_int(file, entry.modDate); + read_int(file, entry.crc32); + read_int(file, entry.compressedSize); + read_int(file, entry.uncompressedSize); + auto fileNameLength = read_int(file); + auto extraFieldLength = read_int(file); + auto fileCommentLength = read_int(file); + read_int(file, entry.diskNumberStart); + read_int(file, entry.internalAttributes); + read_int(file, entry.externalAttributes); + read_int(file, entry.localHeaderOffset); + + entry.fileName.resize(fileNameLength, '\0'); + file->read(entry.fileName.data(), fileNameLength); + + // Skip extra field and file comment + file->seekg(extraFieldLength + fileCommentLength, std::ios::cur); + + if (entry.diskNumberStart == 0xFF) { + throw std::runtime_error("zip64 is not supported"); + } + return entry; +} + +void ZipFileDevice::findBlob(Entry& entry) { + file->seekg(entry.localHeaderOffset); + if (read_int(file) != LOCAL_FILE_SIGNATURE) { + throw std::runtime_error("invalid local file signature"); + } + read_int(file); // version + read_int(file); // flags + read_int(file); // compression method + read_int(file); // last modification time + read_int(file); // last modification date + read_int(file); // crc32 + read_int(file); // compressed size + read_int(file); // uncompressed size + auto nameLength = read_int(file); + auto extraFieldLength = read_int(file); + + // Skip extra field and file comment + file->seekg(nameLength + extraFieldLength, std::ios::cur); + entry.blobOffset = file->tellg(); + + std::cout << entry.fileName << ": " << entry.blobOffset << " " << entry.compressionMethod << std::endl; +} + +ZipFileDevice::ZipFileDevice(std::unique_ptr filePtr) + : file(std::move(filePtr)) { + + // Searching for EOCD + file->seekg(0, std::ios::end); + std::streampos fileSize = file->tellg(); + + bool foundEOCD = false; + for (int pos = static_cast(fileSize)-4; pos >= 0; --pos) { + file->seekg(pos); + if (read_int(file) == EOCD_SIGNATURE) { + foundEOCD = true; + break; + } + } + if (!foundEOCD) { + throw std::runtime_error("EOCD not found, ZIP file is invalid"); + } + + // Reading EOCD + read_int(file); // diskNumber + read_int(file); // centralDirDisk + read_int(file); // numEntriesThisDisk + auto totalEntries = read_int(file); + read_int(file); // centralDirSize + auto centralDirOffset = read_int(file); + read_int(file); // commentLength + + file->seekg(centralDirOffset); + + for (uint16_t i = 0; i < totalEntries; i++) { + if (read_int(file) != CENTRAL_DIR_SIGNATURE) { + logger.error() << "invalid central directory entry"; + break; + } + // Read entry info + Entry entry = readEntry(); + entries[entry.fileName] = std::move(entry); + } + + for (auto& [_, entry] : entries) { + findBlob(entry); + } +} + + +std::filesystem::path ZipFileDevice::resolve(std::string_view path) { + throw std::runtime_error("unable to resolve filesystem path"); +} + +std::unique_ptr ZipFileDevice::write(std::string_view path) { + return nullptr; +} + +std::unique_ptr ZipFileDevice::read(std::string_view path) { + return nullptr; +} + +size_t ZipFileDevice::size(std::string_view path) { + return 0; +} + +bool ZipFileDevice::exists(std::string_view path) { + return false; +} + +bool ZipFileDevice::isdir(std::string_view path) { + return false; +} + +bool ZipFileDevice::isfile(std::string_view path) { + return false; +} + +bool ZipFileDevice::mkdir(std::string_view path) { + return false; +} + +bool ZipFileDevice::mkdirs(std::string_view path) { + return false; +} + +bool ZipFileDevice::remove(std::string_view path) { + return false; +} + +uint64_t ZipFileDevice::removeAll(std::string_view path) { + return 0; +} + +std::unique_ptr ZipFileDevice::list(std::string_view path) { + return nullptr; +} diff --git a/src/io/devices/ZipFileDevice.hpp b/src/io/devices/ZipFileDevice.hpp new file mode 100644 index 00000000..dba02a98 --- /dev/null +++ b/src/io/devices/ZipFileDevice.hpp @@ -0,0 +1,48 @@ +#pragma once + +#include + +#include "Device.hpp" + +namespace io { + class ZipFileDevice : public Device { + struct Entry { + uint16_t versionMadeBy; + uint16_t versionNeeded; + uint16_t flags; + uint16_t compressionMethod; + uint16_t modTime; + uint16_t modDate; + uint32_t crc32; + uint32_t compressedSize; + uint32_t uncompressedSize; + uint16_t diskNumberStart; + uint16_t internalAttributes; + uint32_t externalAttributes; + uint32_t localHeaderOffset; + std::string fileName; + size_t blobOffset = 0; + }; + public: + ZipFileDevice(std::unique_ptr file); + + std::filesystem::path resolve(std::string_view path) override; + std::unique_ptr write(std::string_view path) override; + std::unique_ptr read(std::string_view path) override; + size_t size(std::string_view path) override; + bool exists(std::string_view path) override; + bool isdir(std::string_view path) override; + bool isfile(std::string_view path) override; + bool mkdir(std::string_view path) override; + bool mkdirs(std::string_view path) override; + bool remove(std::string_view path) override; + uint64_t removeAll(std::string_view path) override; + std::unique_ptr list(std::string_view path) override; + private: + std::unique_ptr file; + std::unordered_map entries; + + Entry readEntry(); + void findBlob(Entry& entry); + }; +} From 9389d63a5f2e64759c91a5dd114aebb14032de4d Mon Sep 17 00:00:00 2001 From: MihailRis Date: Sat, 22 Feb 2025 07:03:18 +0300 Subject: [PATCH 03/12] add deflate_istream --- src/io/deflate_istream.hpp | 101 +++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/io/deflate_istream.hpp diff --git a/src/io/deflate_istream.hpp b/src/io/deflate_istream.hpp new file mode 100644 index 00000000..ecbc738a --- /dev/null +++ b/src/io/deflate_istream.hpp @@ -0,0 +1,101 @@ +#pragma once + +#define ZLIB_CONST +#include +#include +#include +#include +#include + +class deflate_istream : public std::istream { +public: + explicit deflate_istream(std::unique_ptr src) + : std::istream(&buf), source(std::move(src)), buf(*source) {} + +private: + class deflate_streambuf : public std::streambuf { + public: + explicit deflate_streambuf(std::istream& src) : src(src) { + zstream.zalloc = Z_NULL; + zstream.zfree = Z_NULL; + zstream.opaque = Z_NULL; + zstream.avail_in = 0; + zstream.next_in = Z_NULL; + + int ret = inflateInit2(&zstream, -15); + if (ret != Z_OK) { + throw std::runtime_error("zlib init failed"); + } + } + + ~deflate_streambuf() { + inflateEnd(&zstream); + } + + deflate_streambuf(const deflate_streambuf&) = delete; + deflate_streambuf& operator=(const deflate_streambuf&) = delete; + + protected: + int_type underflow() override { + if (gptr() < egptr()) { + return traits_type::to_int_type(*gptr()); + } + + if (eof) { + return traits_type::eof(); + } + + zstream.next_out = reinterpret_cast(outBuf.data()); + zstream.avail_out = outBuf.size(); + + do { + if (zstream.avail_in == 0) { + src.read(inBuf.data(), inBuf.size()); + zstream.avail_in = static_cast(src.gcount()); + zstream.next_in = reinterpret_cast(inBuf.data()); + + if (src.bad()) { + return traits_type::eof(); + } + } + + int ret = inflate(&zstream, Z_NO_FLUSH); + if (ret == Z_STREAM_END) { + eof = true; + } else if (ret != Z_OK) { + if (ret == Z_BUF_ERROR && zstream.avail_out == outBuf.size()) { + continue; + } + return traits_type::eof(); + } + + const auto decompressed = outBuf.size() - zstream.avail_out; + if (decompressed > 0) { + setg(outBuf.data(), + outBuf.data(), + outBuf.data() + decompressed); + return traits_type::to_int_type(*gptr()); + } + + if (eof) { + return traits_type::eof(); + } + + } while (zstream.avail_in > 0 || !src.eof()); + + return traits_type::eof(); + } + + private: + static constexpr size_t BUFFER_SIZE = 16384; + + std::istream& src; + z_stream zstream {}; + std::array inBuf {}; + std::array outBuf {}; + bool eof = false; + }; + + std::unique_ptr source; + deflate_streambuf buf; +}; From 58acc1b2eccb4727d287b3a5600a2c4aa56f7d08 Mon Sep 17 00:00:00 2001 From: MihailRis Date: Sat, 22 Feb 2025 07:04:04 +0300 Subject: [PATCH 04/12] implement ZipFileDevice methods (WIP) --- src/coders/gzip.hpp | 12 +++---- src/io/devices/ZipFileDevice.cpp | 59 ++++++++++++++++++++++++++++---- src/io/devices/ZipFileDevice.hpp | 1 + 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/src/coders/gzip.hpp b/src/coders/gzip.hpp index f18b1c14..cd14d478 100644 --- a/src/coders/gzip.hpp +++ b/src/coders/gzip.hpp @@ -7,13 +7,13 @@ namespace gzip { const unsigned char MAGIC[] = "\x1F\x8B"; - /* Compress bytes array to GZIP format - @param src source bytes array - @param size length of source bytes array */ + /// Compress bytes array to GZIP format + /// @param src source bytes array + /// @param size length of source bytes array std::vector compress(const ubyte* src, size_t size); - /* Decompress bytes array from GZIP - @param src GZIP data - @param size length of GZIP data */ + /// Decompress bytes array from GZIP + /// @param src GZIP data + /// @param size length of GZIP data std::vector decompress(const ubyte* src, size_t size); } diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp index 79e3e006..cc54af34 100644 --- a/src/io/devices/ZipFileDevice.cpp +++ b/src/io/devices/ZipFileDevice.cpp @@ -2,7 +2,10 @@ #include #include "debug/Logger.hpp" +#include "io/memory_istream.hpp" +#include "io/deflate_istream.hpp" #include "util/data_io.hpp" +#include "util/Buffer.hpp" static debug::Logger logger("zip-file"); @@ -11,6 +14,8 @@ using namespace io; static constexpr uint32_t EOCD_SIGNATURE = 0x06054b50; static constexpr uint32_t CENTRAL_DIR_SIGNATURE = 0x02014b50; static constexpr uint32_t LOCAL_FILE_SIGNATURE = 0x04034b50; +static constexpr uint32_t COMPRESSION_NONE = 0; +static constexpr uint32_t COMPRESSION_DEFLATE = 8; template static T read_int(std::unique_ptr& file) { @@ -77,7 +82,15 @@ void ZipFileDevice::findBlob(Entry& entry) { file->seekg(nameLength + extraFieldLength, std::ios::cur); entry.blobOffset = file->tellg(); - std::cout << entry.fileName << ": " << entry.blobOffset << " " << entry.compressionMethod << std::endl; + for (size_t i = 0; i < entry.fileName.length(); i++) { + if (entry.fileName[i] == '\\') { + entry.fileName[i] = '/'; + } + } + if (entry.fileName[entry.fileName.length() - 1] == '/') { + entry.isDirectory = true; + entry.fileName = entry.fileName.substr(0, entry.fileName.length() - 1); + } } ZipFileDevice::ZipFileDevice(std::unique_ptr filePtr) @@ -135,23 +148,57 @@ std::unique_ptr ZipFileDevice::write(std::string_view path) { } std::unique_ptr ZipFileDevice::read(std::string_view path) { - return nullptr; + const auto& found = entries.find(std::string(path)); + if (found == entries.end()) { + throw std::runtime_error("could not to open file zip://" + std::string(path)); + } + auto& entry = found->second; + if (entry.isDirectory) { + throw std::runtime_error("zip://" + std::string(path) + " is directory"); + } + if (entry.blobOffset == 0) { + findBlob(entry); + } + file->seekg(entry.blobOffset); + + util::Buffer buffer(entry.compressedSize); + file->read(buffer.data(), buffer.size()); + auto memoryStream = std::make_unique(std::move(buffer)); + if (entry.compressionMethod == COMPRESSION_NONE) { + return memoryStream; + } else if (entry.compressionMethod == COMPRESSION_DEFLATE) { + return std::make_unique(std::move(memoryStream)); + } else { + throw std::runtime_error("unsupported compression method"); + } } size_t ZipFileDevice::size(std::string_view path) { - return 0; + const auto& found = entries.find(std::string(path)); + if (found == entries.end()) { + return false; + } + return found->second.uncompressedSize; } bool ZipFileDevice::exists(std::string_view path) { - return false; + return entries.find(std::string(path)) != entries.end(); } bool ZipFileDevice::isdir(std::string_view path) { - return false; + const auto& found = entries.find(std::string(path)); + if (found == entries.end()) { + return false; + } + return found->second.isDirectory; } bool ZipFileDevice::isfile(std::string_view path) { - return false; + const auto& found = entries.find(std::string(path)); + if (found == entries.end()) { + return false; + } + return !found->second.isDirectory; } bool ZipFileDevice::mkdir(std::string_view path) { diff --git a/src/io/devices/ZipFileDevice.hpp b/src/io/devices/ZipFileDevice.hpp index dba02a98..88d2213c 100644 --- a/src/io/devices/ZipFileDevice.hpp +++ b/src/io/devices/ZipFileDevice.hpp @@ -22,6 +22,7 @@ namespace io { uint32_t localHeaderOffset; std::string fileName; size_t blobOffset = 0; + bool isDirectory = false; }; public: ZipFileDevice(std::unique_ptr file); From 310bef1723eb34275a209fa64426994a04fd2d04 Mon Sep 17 00:00:00 2001 From: MihailRis Date: Sat, 22 Feb 2025 07:30:16 +0300 Subject: [PATCH 05/12] implement ZipFileDevice::list --- src/io/devices/ZipFileDevice.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp index cc54af34..eb87844f 100644 --- a/src/io/devices/ZipFileDevice.cpp +++ b/src/io/devices/ZipFileDevice.cpp @@ -1,6 +1,7 @@ #include "ZipFileDevice.hpp" -#include +#include + #include "debug/Logger.hpp" #include "io/memory_istream.hpp" #include "io/deflate_istream.hpp" @@ -217,6 +218,30 @@ uint64_t ZipFileDevice::removeAll(std::string_view path) { return 0; } +class ListPathsGenerator : public PathsGenerator { +public: + ListPathsGenerator(std::vector names) + : names(std::move(names)) {}; + + bool next(path& dst) override { + if (current == names.size()) { + return false; + } + dst = names[current++]; + return true; + } +private: + std::vector names; + size_t current = 0; +}; + std::unique_ptr ZipFileDevice::list(std::string_view path) { - return nullptr; + std::vector names; + auto folder = std::string(path) + "/"; + for (const auto& [name, entry] : entries) { + if (name.find(folder) == 0) { + names.push_back(name); + } + } + return std::make_unique(std::move(names)); } From 9cc55e30e3a85ad65d65f2c13e17732314462c20 Mon Sep 17 00:00:00 2001 From: MihailRis Date: Mon, 24 Feb 2025 20:06:55 +0300 Subject: [PATCH 06/12] add separateFunc ZipFileDevice constructor argument --- src/io/devices/ZipFileDevice.cpp | 38 ++++++++++++++++++++------------ src/io/devices/ZipFileDevice.hpp | 12 +++++++++- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp index eb87844f..1e237a3f 100644 --- a/src/io/devices/ZipFileDevice.cpp +++ b/src/io/devices/ZipFileDevice.cpp @@ -94,9 +94,10 @@ void ZipFileDevice::findBlob(Entry& entry) { } } -ZipFileDevice::ZipFileDevice(std::unique_ptr filePtr) - : file(std::move(filePtr)) { - +ZipFileDevice::ZipFileDevice( + std::unique_ptr filePtr, FileSeparateFunc separateFunc +) + : file(std::move(filePtr)), separateFunc(std::move(separateFunc)) { // Searching for EOCD file->seekg(0, std::ios::end); std::streampos fileSize = file->tellg(); @@ -139,7 +140,6 @@ ZipFileDevice::ZipFileDevice(std::unique_ptr filePtr) } } - std::filesystem::path ZipFileDevice::resolve(std::string_view path) { throw std::runtime_error("unable to resolve filesystem path"); } @@ -160,17 +160,27 @@ std::unique_ptr ZipFileDevice::read(std::string_view path) { if (entry.blobOffset == 0) { findBlob(entry); } - file->seekg(entry.blobOffset); - - util::Buffer buffer(entry.compressedSize); - file->read(buffer.data(), buffer.size()); - auto memoryStream = std::make_unique(std::move(buffer)); - if (entry.compressionMethod == COMPRESSION_NONE) { - return memoryStream; - } else if (entry.compressionMethod == COMPRESSION_DEFLATE) { - return std::make_unique(std::move(memoryStream)); + std::unique_ptr srcStream; + if (separateFunc) { + // Create new istream for concurrent data reading + srcStream = separateFunc(); + srcStream->seekg(entry.blobOffset); } else { - throw std::runtime_error("unsupported compression method"); + // Read compressed data to memory if istream cannot be separated + file->seekg(entry.blobOffset); + util::Buffer buffer(entry.compressedSize); + file->read(buffer.data(), buffer.size()); + srcStream = std::make_unique(std::move(buffer)); + } + if (entry.compressionMethod == COMPRESSION_NONE) { + return srcStream; + } else if (entry.compressionMethod == COMPRESSION_DEFLATE) { + return std::make_unique(std::move(srcStream)); + } else { + throw std::runtime_error( + "unsupported compression method [" + + std::to_string(entry.compressionMethod) + "]" + ); } } diff --git a/src/io/devices/ZipFileDevice.hpp b/src/io/devices/ZipFileDevice.hpp index 88d2213c..2598c750 100644 --- a/src/io/devices/ZipFileDevice.hpp +++ b/src/io/devices/ZipFileDevice.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include "Device.hpp" @@ -25,7 +26,15 @@ namespace io { bool isDirectory = false; }; public: - ZipFileDevice(std::unique_ptr file); + using FileSeparateFunc = std::function()>; + + /// @param file ZIP file seekable istream + /// @param separateFunc Optional function that creates new seekable + /// istream for the ZIP file. + ZipFileDevice( + std::unique_ptr file, + FileSeparateFunc separateFunc = nullptr + ); std::filesystem::path resolve(std::string_view path) override; std::unique_ptr write(std::string_view path) override; @@ -41,6 +50,7 @@ namespace io { std::unique_ptr list(std::string_view path) override; private: std::unique_ptr file; + FileSeparateFunc separateFunc; std::unordered_map entries; Entry readEntry(); From edb581bee30f47a241db6865e4422e4f1304130c Mon Sep 17 00:00:00 2001 From: MihailRis Date: Mon, 24 Feb 2025 21:00:23 +0300 Subject: [PATCH 07/12] add io::copy, io::copy_all & fix ZipFileDevice::list --- src/io/devices/ZipFileDevice.cpp | 29 +++++++++++------- src/io/io.cpp | 50 ++++++++++++++++++++++++++++++++ src/io/io.hpp | 9 ++++++ 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp index 1e237a3f..0496fc57 100644 --- a/src/io/devices/ZipFileDevice.cpp +++ b/src/io/devices/ZipFileDevice.cpp @@ -60,6 +60,16 @@ ZipFileDevice::Entry ZipFileDevice::readEntry() { if (entry.diskNumberStart == 0xFF) { throw std::runtime_error("zip64 is not supported"); } + + for (size_t i = 0; i < entry.fileName.length(); i++) { + if (entry.fileName[i] == '\\') { + entry.fileName[i] = '/'; + } + } + if (entry.fileName[entry.fileName.length() - 1] == '/') { + entry.isDirectory = true; + entry.fileName = entry.fileName.substr(0, entry.fileName.length() - 1); + } return entry; } @@ -82,16 +92,6 @@ void ZipFileDevice::findBlob(Entry& entry) { // Skip extra field and file comment file->seekg(nameLength + extraFieldLength, std::ios::cur); entry.blobOffset = file->tellg(); - - for (size_t i = 0; i < entry.fileName.length(); i++) { - if (entry.fileName[i] == '\\') { - entry.fileName[i] = '/'; - } - } - if (entry.fileName[entry.fileName.length() - 1] == '/') { - entry.isDirectory = true; - entry.fileName = entry.fileName.substr(0, entry.fileName.length() - 1); - } } ZipFileDevice::ZipFileDevice( @@ -248,9 +248,16 @@ private: std::unique_ptr ZipFileDevice::list(std::string_view path) { std::vector names; auto folder = std::string(path) + "/"; + size_t folderLen = folder.length(); for (const auto& [name, entry] : entries) { if (name.find(folder) == 0) { - names.push_back(name); + size_t pos = name.find('/', folderLen); + if (pos == std::string::npos) { + names.push_back(name.substr(folderLen, pos - folderLen)); + } + if (pos == name.length() - 1) { + names.push_back(name.substr(folderLen, pos - folderLen)); + } } } return std::make_unique(std::move(names)); diff --git a/src/io/io.cpp b/src/io/io.cpp index bef794b4..8ee9f8ce 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -252,6 +252,56 @@ uint64_t io::remove_all(const io::path& file) { return device.removeAll(file.pathPart()); } +bool io::copy(const io::path& src, const io::path& dst) { + auto& srcDevice = io::require_device(src.entryPoint()); + auto& dstDevice = io::require_device(dst.entryPoint()); + if (!srcDevice.isfile(src.pathPart())) { + return false; + } + auto input = srcDevice.read(src.pathPart()); + auto output = dstDevice.write(dst.pathPart()); + size_t size = srcDevice.size(src.pathPart()); + std::vector buffer(16'384); + while (size > 0) { + size_t read = std::min(size, buffer.size()); + input->read(buffer.data(), read); + auto gcount = input->gcount(); + output->write(buffer.data(), gcount); + size -= gcount; + if (input->eof()) { + break; + } + if (!input->good() || !output->good()) { + return false; + } + } + return output->good(); +} + +uint64_t io::copy_all(const io::path& src, const io::path& dst) { + auto& srcDevice = io::require_device(src.entryPoint()); + auto& dstDevice = io::require_device(dst.entryPoint()); + auto dstPath = dst.pathPart(); + if (!dstDevice.isdir(dstPath) && !dstDevice.mkdirs(dstPath)) { + return 0; + } + uint64_t count = 0; + for (auto& srcSubFile : directory_iterator(src)) { + auto dstSubFile = dst / srcSubFile.name(); + auto srcSubPath = srcSubFile.pathPart(); + auto dstSubPath = dstSubFile.pathPart(); + if (srcDevice.isdir(srcSubPath)) { + if (!dstDevice.mkdirs(dstSubPath)) { + continue; + } + count += copy_all(srcSubFile, dstSubFile); + } else if (copy(srcSubFile, dstSubFile)) { + count++; + } + } + return count; +} + size_t io::file_size(const io::path& file) { auto& device = io::require_device(file.entryPoint()); return device.size(file.pathPart()); diff --git a/src/io/io.hpp b/src/io/io.hpp index 46582400..0b2f950d 100644 --- a/src/io/io.hpp +++ b/src/io/io.hpp @@ -187,6 +187,15 @@ namespace io { /// @brief Remove file or empty directory bool remove(const io::path& file); + /// @brief Copy src file to dst file + /// @param src source file path + /// @param dst destination file path + /// @return true if success + bool copy(const io::path& src, const io::path& dst); + + /// @brief Copy all files and directories in the folder recursively + uint64_t copy_all(const io::path& src, const io::path& dst); + /// @brief Remove all files and directories in the folder recursively uint64_t remove_all(const io::path& file); From 57d05bde57e5d65aa856d4b73cecb9f855abf42b Mon Sep 17 00:00:00 2001 From: MihailRis Date: Tue, 25 Feb 2025 02:03:01 +0300 Subject: [PATCH 08/12] add io::last_write_time & add io::write_zip --- src/io/devices/Device.hpp | 8 +- src/io/devices/StdfsDevice.cpp | 16 +-- src/io/devices/StdfsDevice.hpp | 1 + src/io/devices/ZipFileDevice.cpp | 166 +++++++++++++++++++++++++++++-- src/io/devices/ZipFileDevice.hpp | 3 + src/io/io.cpp | 13 +++ src/io/io.hpp | 7 ++ src/io/path.hpp | 2 + 8 files changed, 197 insertions(+), 19 deletions(-) diff --git a/src/io/devices/Device.hpp b/src/io/devices/Device.hpp index a3addaf5..cfbe4b1e 100644 --- a/src/io/devices/Device.hpp +++ b/src/io/devices/Device.hpp @@ -8,7 +8,6 @@ #include "../path.hpp" namespace io { - /// @brief Device interface for file system operations class Device { public: @@ -28,6 +27,9 @@ namespace io { /// @brief Get file size in bytes virtual size_t size(std::string_view path) = 0; + /// @brief Get file last write timestamp + virtual file_time_type lastWriteTime(std::string_view path) = 0; + /// @brief Check if file or directory exists virtual bool exists(std::string_view path) = 0; @@ -82,6 +84,10 @@ namespace io { return parent->size((root / path).pathPart()); } + file_time_type lastWriteTime(std::string_view path) override { + return parent->lastWriteTime((root / path).pathPart()); + } + bool exists(std::string_view path) override { return parent->exists((root / path).pathPart()); } diff --git a/src/io/devices/StdfsDevice.cpp b/src/io/devices/StdfsDevice.cpp index cb47c2e6..fc0602b5 100644 --- a/src/io/devices/StdfsDevice.cpp +++ b/src/io/devices/StdfsDevice.cpp @@ -45,23 +45,23 @@ std::unique_ptr StdfsDevice::read(std::string_view path) { } size_t StdfsDevice::size(std::string_view path) { - auto resolved = resolve(path); - return fs::file_size(resolved); + return fs::file_size(resolve(path)); +} + +file_time_type StdfsDevice::lastWriteTime(std::string_view path) { + return fs::last_write_time(resolve(path)); } bool StdfsDevice::exists(std::string_view path) { - auto resolved = resolve(path); - return fs::exists(resolved); + return fs::exists(resolve(path)); } bool StdfsDevice::isdir(std::string_view path) { - auto resolved = resolve(path); - return fs::is_directory(resolved); + return fs::is_directory(resolve(path)); } bool StdfsDevice::isfile(std::string_view path) { - auto resolved = resolve(path); - return fs::is_regular_file(resolved); + return fs::is_regular_file(resolve(path)); } bool StdfsDevice::mkdir(std::string_view path) { diff --git a/src/io/devices/StdfsDevice.hpp b/src/io/devices/StdfsDevice.hpp index ec1a9526..69b12234 100644 --- a/src/io/devices/StdfsDevice.hpp +++ b/src/io/devices/StdfsDevice.hpp @@ -10,6 +10,7 @@ namespace io { std::unique_ptr write(std::string_view path) override; std::unique_ptr read(std::string_view path) override; size_t size(std::string_view path) override; + file_time_type lastWriteTime(std::string_view path) override; bool exists(std::string_view path) override; bool isdir(std::string_view path) override; bool isfile(std::string_view path) override; diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp index 0496fc57..5171ee83 100644 --- a/src/io/devices/ZipFileDevice.cpp +++ b/src/io/devices/ZipFileDevice.cpp @@ -11,6 +11,7 @@ static debug::Logger logger("zip-file"); using namespace io; +using namespace std::chrono; static constexpr uint32_t EOCD_SIGNATURE = 0x06054b50; static constexpr uint32_t CENTRAL_DIR_SIGNATURE = 0x02014b50; @@ -18,17 +19,52 @@ static constexpr uint32_t LOCAL_FILE_SIGNATURE = 0x04034b50; static constexpr uint32_t COMPRESSION_NONE = 0; static constexpr uint32_t COMPRESSION_DEFLATE = 8; -template -static T read_int(std::unique_ptr& file) { - T value = 0; - file->read(reinterpret_cast(&value), sizeof(value)); - return dataio::le2h(value); -} +namespace { + template + T read_int(std::unique_ptr& file) { + T value = 0; + file->read(reinterpret_cast(&value), sizeof(value)); + return dataio::le2h(value); + } -template -static void read_int(std::unique_ptr& file, T& value) { - file->read(reinterpret_cast(&value), sizeof(value)); - value = dataio::le2h(value); + template + void read_int(std::unique_ptr& file, T& value) { + file->read(reinterpret_cast(&value), sizeof(value)); + value = dataio::le2h(value); + } + file_time_type msdos_to_file_time(uint16_t date, uint16_t time) { + uint16_t year = ((date >> 9) & 0x7F) + 1980; + uint16_t month = (date >> 5) & 0x0F; + uint16_t day = date & 0x1F; + + uint16_t hours = (time >> 11) & 0x1F; + uint16_t minutes = (time >> 5) & 0x3F; + uint16_t seconds = (time & 0x1F) * 2; + + std::tm time_struct = {}; + time_struct.tm_year = year - 1900; + time_struct.tm_mon = month - 1; + time_struct.tm_mday = day; + time_struct.tm_hour = hours; + time_struct.tm_min = minutes; + time_struct.tm_sec = seconds; + time_struct.tm_isdst = -1; + + std::time_t time_t_value = std::mktime(&time_struct); + auto time_point = system_clock::from_time_t(time_t_value); + return file_time_type::clock::now() + (time_point - system_clock::now()); + } + + uint32_t to_ms_dos_timestamp(const file_time_type& fileTime) { + auto timePoint = time_point_cast( + fileTime - file_time_type::clock::now() + system_clock::now() + ); + std::time_t timeT = system_clock::to_time_t(timePoint); + std::tm tm = *std::localtime(&timeT); + uint16_t date = (tm.tm_year - 80) << 9 | (tm.tm_mon + 1) << 5 | tm.tm_mday; + uint16_t time = (tm.tm_hour << 11) | (tm.tm_min << 5) | (tm.tm_sec / 2); + return (date << 16) | time; + } } ZipFileDevice::Entry ZipFileDevice::readEntry() { @@ -192,6 +228,14 @@ size_t ZipFileDevice::size(std::string_view path) { return found->second.uncompressedSize; } +file_time_type ZipFileDevice::lastWriteTime(std::string_view path) { + const auto& found = entries.find(std::string(path)); + if (found == entries.end()) { + return file_time_type::min(); + } + return msdos_to_file_time(found->second.modDate, found->second.modTime); +} + bool ZipFileDevice::exists(std::string_view path) { return entries.find(std::string(path)) != entries.end(); } @@ -262,3 +306,105 @@ std::unique_ptr ZipFileDevice::list(std::string_view path) { } return std::make_unique(std::move(names)); } + +#include "io/io.hpp" +#include "coders/byte_utils.hpp" + +static void write_headers( + std::ostream& file, + const std::string& name, + size_t srcSize, + size_t compressedSize, + uint32_t crc, + const file_time_type& modificationTime, + ByteBuilder& centralDir +) { + auto timestamp = to_ms_dos_timestamp(modificationTime); + ByteBuilder header; + header.putInt32(LOCAL_FILE_SIGNATURE); + header.putInt16(10); // version + header.putInt16(0); // flags + header.putInt16(0); // compression method + header.putInt32(timestamp); // last modification datetime + header.putInt32(crc); // crc32 + header.putInt32(compressedSize); + header.putInt32(srcSize); + header.putInt16(name.length()); + header.putInt16(0); // extra field length + header.put(reinterpret_cast(name.data()), name.length()); + + size_t localHeaderOffset = file.tellp(); + file.write(reinterpret_cast(header.data()), header.size()); + + centralDir.putInt32(CENTRAL_DIR_SIGNATURE); + centralDir.putInt16(10); // version + centralDir.putInt16(0); // version + centralDir.putInt16(0); // flags + centralDir.putInt16(0); // compression method + centralDir.putInt32(timestamp); // last modification datetime + centralDir.putInt32(crc); // crc32 + centralDir.putInt32(compressedSize); + centralDir.putInt32(srcSize); + centralDir.putInt16(name.length()); + centralDir.putInt16(0); // extra field length + centralDir.putInt16(0); // file comment length + centralDir.putInt16(0); // disk number start + centralDir.putInt16(0); // internal attributes + centralDir.putInt32(0); // external attributes + centralDir.putInt32(localHeaderOffset); // local header offset + centralDir.put(reinterpret_cast(name.data()), name.length()); +} + +static size_t write_zip( + const std::string& root, + const path& folder, + std::ostream& file, + ByteBuilder& centralDir +) { + size_t entries = 0; + ByteBuilder localHeader; + for (const auto& entry : io::directory_iterator(folder)) { + auto name = entry.pathPart().substr(root.length() + 1); + auto modificationTime = io::last_write_time(entry); + if (io::is_directory(entry)) { + name = name + "/"; + write_headers(file, name, 0, 0, 0, modificationTime, centralDir); + entries += write_zip(root, entry, file, centralDir) + 1; + } else { + auto data = io::read_bytes_buffer(entry); + uint32_t crc = crc32(0, data.data(), data.size()); + write_headers( + file, + name, + data.size(), + data.size(), + crc, + modificationTime, + centralDir + ); + file.write(reinterpret_cast(data.data()), data.size()); + entries++; + } + } + return entries; +} + +void io::write_zip(const path& folder, const path& file) { + ByteBuilder centralDir; + auto out = io::write(file); + size_t entries = write_zip(folder.pathPart(), folder, *out, centralDir); + + size_t centralDirOffset = out->tellp(); + out->write(reinterpret_cast(centralDir.data()), centralDir.size()); + + ByteBuilder eocd; + eocd.putInt32(EOCD_SIGNATURE); + eocd.putInt16(0); // disk number + eocd.putInt16(0); // central dir disk + eocd.putInt16(entries); // num entries + eocd.putInt16(entries); // total entries + eocd.putInt32(centralDir.size()); // central dir size + eocd.putInt32(centralDirOffset); // central dir offset + eocd.putInt16(0); // comment length + out->write(reinterpret_cast(eocd.data()), eocd.size()); +} diff --git a/src/io/devices/ZipFileDevice.hpp b/src/io/devices/ZipFileDevice.hpp index 2598c750..69042821 100644 --- a/src/io/devices/ZipFileDevice.hpp +++ b/src/io/devices/ZipFileDevice.hpp @@ -40,6 +40,7 @@ namespace io { std::unique_ptr write(std::string_view path) override; std::unique_ptr read(std::string_view path) override; size_t size(std::string_view path) override; + io::file_time_type lastWriteTime(std::string_view path) override; bool exists(std::string_view path) override; bool isdir(std::string_view path) override; bool isfile(std::string_view path) override; @@ -56,4 +57,6 @@ namespace io { Entry readEntry(); void findBlob(Entry& entry); }; + + void write_zip(const path& folder, const path& file); } diff --git a/src/io/io.cpp b/src/io/io.cpp index 8ee9f8ce..2f4eab11 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -107,6 +107,14 @@ bool io::read(const io::path& filename, char* data, size_t size) { return stream->good(); } +std::unique_ptr io::write(const io::path& file) { + auto device = io::get_device(file.entryPoint()); + if (device == nullptr) { + throw std::runtime_error("io-device not found: " + file.entryPoint()); + } + return device->write(file.pathPart()); +} + std::unique_ptr io::read(const io::path& filename) { auto device = io::get_device(filename.entryPoint()); if (device == nullptr) { @@ -307,6 +315,11 @@ size_t io::file_size(const io::path& file) { return device.size(file.pathPart()); } +io::file_time_type io::last_write_time(const io::path& file) { + auto& device = io::require_device(file.entryPoint()); + return device.lastWriteTime(file.pathPart()); +} + std::filesystem::path io::resolve(const io::path& file) { auto device = io::get_device(file.entryPoint()); if (device == nullptr) { diff --git a/src/io/io.hpp b/src/io/io.hpp index 0b2f950d..5cc224be 100644 --- a/src/io/io.hpp +++ b/src/io/io.hpp @@ -142,6 +142,10 @@ namespace io { bool compressed = false ); + /// @brief Open file for writing + /// @throw std::runtime_error if file cannot be opened + std::unique_ptr write(const io::path& file); + /// @brief Open file for reading /// @throw std::runtime_error if file cannot be opened std::unique_ptr read(const io::path& file); @@ -202,6 +206,9 @@ namespace io { /// @brief Get file size in bytes size_t file_size(const io::path& file); + /// @brief Get file last write time timestamp + file_time_type last_write_time(const io::path& file); + std::filesystem::path resolve(const io::path& file); /// @brief Check if file is one of the supported data interchange formats diff --git a/src/io/path.hpp b/src/io/path.hpp index 5abc110e..538431b7 100644 --- a/src/io/path.hpp +++ b/src/io/path.hpp @@ -5,6 +5,8 @@ #include namespace io { + using file_time_type = std::filesystem::file_time_type; + /// @brief Access violation error class access_error : public std::runtime_error { public: From 226ee40b5acf7a9fbae81a9bca9598df8b775532 Mon Sep 17 00:00:00 2001 From: MihailRis Date: Tue, 25 Feb 2025 22:42:57 +0300 Subject: [PATCH 09/12] add output versions of deflate and memory streams --- src/io/deflate_istream.hpp | 154 ++++++++++++++++++------------------- src/io/deflate_ostream.hpp | 118 ++++++++++++++++++++++++++++ src/io/memory_istream.hpp | 42 +++++----- src/io/memory_ostream.hpp | 105 +++++++++++++++++++++++++ 4 files changed, 320 insertions(+), 99 deletions(-) create mode 100644 src/io/deflate_ostream.hpp create mode 100644 src/io/memory_ostream.hpp diff --git a/src/io/deflate_istream.hpp b/src/io/deflate_istream.hpp index ecbc738a..0dd8f6de 100644 --- a/src/io/deflate_istream.hpp +++ b/src/io/deflate_istream.hpp @@ -7,95 +7,93 @@ #include #include -class deflate_istream : public std::istream { +class deflate_istreambuf : public std::streambuf { public: - explicit deflate_istream(std::unique_ptr src) - : std::istream(&buf), source(std::move(src)), buf(*source) {} + explicit deflate_istreambuf(std::istream& src) : src(src) { + zstream.zalloc = Z_NULL; + zstream.zfree = Z_NULL; + zstream.opaque = Z_NULL; + zstream.avail_in = 0; + zstream.next_in = Z_NULL; + + int ret = inflateInit2(&zstream, -15); + if (ret != Z_OK) { + throw std::runtime_error("zlib init failed"); + } + } -private: - class deflate_streambuf : public std::streambuf { - public: - explicit deflate_streambuf(std::istream& src) : src(src) { - zstream.zalloc = Z_NULL; - zstream.zfree = Z_NULL; - zstream.opaque = Z_NULL; - zstream.avail_in = 0; - zstream.next_in = Z_NULL; - - int ret = inflateInit2(&zstream, -15); - if (ret != Z_OK) { - throw std::runtime_error("zlib init failed"); + ~deflate_istreambuf() { + inflateEnd(&zstream); + } + + deflate_istreambuf(const deflate_istreambuf&) = delete; + deflate_istreambuf& operator=(const deflate_istreambuf&) = delete; + +protected: + int_type underflow() override { + if (gptr() < egptr()) { + return traits_type::to_int_type(*gptr()); + } + + if (eof) { + return traits_type::eof(); + } + + zstream.next_out = reinterpret_cast(outBuf.data()); + zstream.avail_out = outBuf.size(); + + do { + if (zstream.avail_in == 0) { + src.read(inBuf.data(), inBuf.size()); + zstream.avail_in = static_cast(src.gcount()); + zstream.next_in = reinterpret_cast(inBuf.data()); + + if (src.bad()) { + return traits_type::eof(); + } } - } - ~deflate_streambuf() { - inflateEnd(&zstream); - } + int ret = inflate(&zstream, Z_NO_FLUSH); + if (ret == Z_STREAM_END) { + eof = true; + } else if (ret != Z_OK) { + if (ret == Z_BUF_ERROR && zstream.avail_out == outBuf.size()) { + continue; + } + return traits_type::eof(); + } - deflate_streambuf(const deflate_streambuf&) = delete; - deflate_streambuf& operator=(const deflate_streambuf&) = delete; - - protected: - int_type underflow() override { - if (gptr() < egptr()) { + const auto decompressed = outBuf.size() - zstream.avail_out; + if (decompressed > 0) { + setg(outBuf.data(), + outBuf.data(), + outBuf.data() + decompressed); return traits_type::to_int_type(*gptr()); } if (eof) { return traits_type::eof(); } + } while (zstream.avail_in > 0 || !src.eof()); - zstream.next_out = reinterpret_cast(outBuf.data()); - zstream.avail_out = outBuf.size(); + return traits_type::eof(); + } +private: + static constexpr size_t BUFFER_SIZE = 16384; - do { - if (zstream.avail_in == 0) { - src.read(inBuf.data(), inBuf.size()); - zstream.avail_in = static_cast(src.gcount()); - zstream.next_in = reinterpret_cast(inBuf.data()); - - if (src.bad()) { - return traits_type::eof(); - } - } - - int ret = inflate(&zstream, Z_NO_FLUSH); - if (ret == Z_STREAM_END) { - eof = true; - } else if (ret != Z_OK) { - if (ret == Z_BUF_ERROR && zstream.avail_out == outBuf.size()) { - continue; - } - return traits_type::eof(); - } - - const auto decompressed = outBuf.size() - zstream.avail_out; - if (decompressed > 0) { - setg(outBuf.data(), - outBuf.data(), - outBuf.data() + decompressed); - return traits_type::to_int_type(*gptr()); - } - - if (eof) { - return traits_type::eof(); - } - - } while (zstream.avail_in > 0 || !src.eof()); - - return traits_type::eof(); - } - - private: - static constexpr size_t BUFFER_SIZE = 16384; - - std::istream& src; - z_stream zstream {}; - std::array inBuf {}; - std::array outBuf {}; - bool eof = false; - }; - - std::unique_ptr source; - deflate_streambuf buf; + std::istream& src; + z_stream zstream {}; + std::array inBuf {}; + std::array outBuf {}; + bool eof = false; +}; + +class deflate_istream : public std::istream { +public: + explicit deflate_istream(std::unique_ptr src) + : std::istream(&buffer), source(std::move(src)), buffer(*source) {} + +private: + std::unique_ptr source; + deflate_istreambuf buffer; }; diff --git a/src/io/deflate_ostream.hpp b/src/io/deflate_ostream.hpp new file mode 100644 index 00000000..39678843 --- /dev/null +++ b/src/io/deflate_ostream.hpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include + +class deflate_ostreambuf : public std::streambuf { +public: + deflate_ostreambuf(std::ostream& dest, int level = Z_DEFAULT_COMPRESSION) + : dest(dest) { + zstream.zalloc = Z_NULL; + zstream.zfree = Z_NULL; + zstream.opaque = Z_NULL; + int ret = deflateInit2( + &zstream, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY + ); + if (ret != Z_OK) { + throw std::runtime_error("zlib deflate initialization failed"); + } + inBuffer = std::make_unique(bufferSize); + outBuffer = std::make_unique(outBufferSize); + + setp(inBuffer.get(), inBuffer.get() + bufferSize - 1); + } + + ~deflate_ostreambuf() { + try { + close(); + } catch (...) { + std::cerr << "error in zlib output stream finalization" << std::endl; + } + } + + bool close() { + overflow(EOF); + + // Finalize the deflate stream + zstream.avail_in = 0; + zstream.next_in = nullptr; + int ret; + do { + zstream.avail_out = outBufferSize; + zstream.next_out = reinterpret_cast(outBuffer.get()); + ret = deflate(&zstream, Z_FINISH); + if (ret == Z_STREAM_ERROR) { + break; + } + size_t compressed_size = outBufferSize - zstream.avail_out; + dest.write(outBuffer.get(), compressed_size); + } while (ret != Z_STREAM_END); + + deflateEnd(&zstream); + return true; + } + +protected: + int overflow(int c) override { + if (c != EOF) { + *pptr() = static_cast(c); + pbump(1); + } + + if (process_input() == EOF) { + return EOF; + } + + return c != EOF ? 0 : EOF; + } + + int sync() override { + if (process_input(Z_SYNC_FLUSH) == EOF) { + return -1; + } + dest.flush(); + return 0; + } + +private: + static const size_t bufferSize = 512; + static const size_t outBufferSize = bufferSize * 2; + + std::ostream& dest; + z_stream zstream {}; + std::unique_ptr inBuffer; + std::unique_ptr outBuffer; + + int process_input(int flush = Z_NO_FLUSH) { + size_t input_size = pptr() - pbase(); + zstream.avail_in = static_cast(input_size); + zstream.next_in = reinterpret_cast(pbase()); + + int ret; + do { + zstream.avail_out = outBufferSize; + zstream.next_out = reinterpret_cast(outBuffer.get()); + ret = deflate(&zstream, flush); + if (ret == Z_STREAM_ERROR) { + return EOF; + } + size_t compressed_size = outBufferSize - zstream.avail_out; + dest.write(outBuffer.get(), compressed_size); + if (!dest) { + return EOF; + } + } while (zstream.avail_out == 0); + + setp(inBuffer.get(), inBuffer.get() + bufferSize - 1); + return 0; + } +}; + +class deflate_ostream : public std::ostream { +public: + explicit deflate_ostream(std::ostream& dest, int level = Z_DEFAULT_COMPRESSION) + : std::ostream(&buffer), buffer(dest, level) {} + +private: + deflate_ostreambuf buffer; +}; diff --git a/src/io/memory_istream.hpp b/src/io/memory_istream.hpp index 09f6f418..0b2131d6 100644 --- a/src/io/memory_istream.hpp +++ b/src/io/memory_istream.hpp @@ -3,32 +3,32 @@ #include #include "util/Buffer.hpp" +class memory_streambuf : public std::streambuf { +public: + explicit memory_streambuf(util::Buffer buffer) + : buffer(std::move(buffer)) { + char* base = this->buffer.data(); + char* end = base + this->buffer.size(); + setg(base, base, end); + } + + memory_streambuf(const memory_streambuf&) = delete; + memory_streambuf& operator=(const memory_streambuf&) = delete; + +protected: + int_type underflow() override { + return traits_type::eof(); + } + +private: + util::Buffer buffer; +}; + class memory_istream : public std::istream { public: explicit memory_istream(util::Buffer buffer) : std::istream(&buf), buf(std::move(buffer)) {} private: - class memory_streambuf : public std::streambuf { - public: - explicit memory_streambuf(util::Buffer buffer) - : buffer(std::move(buffer)) { - char* base = this->buffer.data(); - char* end = base + this->buffer.size(); - setg(base, base, end); - } - - memory_streambuf(const memory_streambuf&) = delete; - memory_streambuf& operator=(const memory_streambuf&) = delete; - - protected: - int_type underflow() override { - return traits_type::eof(); - } - - private: - util::Buffer buffer; - }; - memory_streambuf buf; }; diff --git a/src/io/memory_ostream.hpp b/src/io/memory_ostream.hpp new file mode 100644 index 00000000..76ad8623 --- /dev/null +++ b/src/io/memory_ostream.hpp @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include + +#include "util/Buffer.hpp" + +class memory_buffer : public std::streambuf { +public: + explicit memory_buffer(size_t initial_size = 64) + : capacity(initial_size), + buffer(std::make_unique(initial_size)) { + setp(buffer.get(), buffer.get() + initial_size); + } + + std::string_view view() const { + return std::string_view(pbase(), pptr() - pbase()); + } + + util::Buffer release() { + return {std::move(buffer), size()}; + } + + size_t size() const { + return pptr()-pbase(); + } +protected: + int_type overflow(int_type c) override { + if (c == traits_type::eof()) + return traits_type::eof(); + + const size_t data_size = pptr() - pbase(); + const size_t new_capacity = std::max(capacity * 2, data_size + 1); + auto new_buffer = std::make_unique(new_capacity); + + std::memcpy(new_buffer.get(), pbase(), data_size); + + buffer = std::move(new_buffer); + capacity = new_capacity; + + setp(buffer.get(), buffer.get() + new_capacity); + pbump(data_size); + + *pptr() = traits_type::to_char_type(c); + pbump(1); + + return c; + } + + std::streamsize xsputn(const char* s, std::streamsize count) override { + const std::streamsize avail = epptr() - pptr(); + + if (avail >= count) { + std::memcpy(pptr(), s, count); + pbump(count); + return count; + } + + std::streamsize written = 0; + if (avail > 0) { + std::memcpy(pptr(), s, avail); + written += avail; + s += avail; + count -= avail; + pbump(avail); + } + + const size_t data_size = pptr() - pbase(); + const size_t required_capacity = data_size + count; + const size_t new_capacity = std::max(capacity * 2, required_capacity); + auto new_buffer = std::make_unique(new_capacity); + + std::memcpy(new_buffer.get(), pbase(), data_size); + std::memcpy(new_buffer.get() + data_size, s, count); + + buffer = std::move(new_buffer); + capacity = new_capacity; + + setp(buffer.get(), buffer.get() + new_capacity); + pbump(data_size + count); + written += count; + + return written; + } +private: + std::unique_ptr buffer; + size_t capacity; +}; + +class memory_ostream : public std::ostream { +public: + explicit memory_ostream(size_t initialCapacity = 64) + : std::ostream(&buffer), buffer(initialCapacity) {} + + std::string_view view() const { + return buffer.view(); + } + + util::Buffer release() { + return buffer.release(); + } +private: + memory_buffer buffer; +}; From bd614c5f2eee3d7bcf67c14dd3c676c6a15f5ebe Mon Sep 17 00:00:00 2001 From: MihailRis Date: Tue, 25 Feb 2025 22:49:31 +0300 Subject: [PATCH 10/12] enable deflate compression in io::write_zip --- src/io/devices/ZipFileDevice.cpp | 46 ++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp index 5171ee83..c5be9b57 100644 --- a/src/io/devices/ZipFileDevice.cpp +++ b/src/io/devices/ZipFileDevice.cpp @@ -4,7 +4,9 @@ #include "debug/Logger.hpp" #include "io/memory_istream.hpp" +#include "io/memory_ostream.hpp" #include "io/deflate_istream.hpp" +#include "io/deflate_ostream.hpp" #include "util/data_io.hpp" #include "util/Buffer.hpp" @@ -316,6 +318,7 @@ static void write_headers( size_t srcSize, size_t compressedSize, uint32_t crc, + int compressionMethod, const file_time_type& modificationTime, ByteBuilder& centralDir ) { @@ -324,7 +327,7 @@ static void write_headers( header.putInt32(LOCAL_FILE_SIGNATURE); header.putInt16(10); // version header.putInt16(0); // flags - header.putInt16(0); // compression method + header.putInt16(compressionMethod); // compression method header.putInt32(timestamp); // last modification datetime header.putInt32(crc); // crc32 header.putInt32(compressedSize); @@ -340,7 +343,7 @@ static void write_headers( centralDir.putInt16(10); // version centralDir.putInt16(0); // version centralDir.putInt16(0); // flags - centralDir.putInt16(0); // compression method + centralDir.putInt16(compressionMethod); // compression method centralDir.putInt32(timestamp); // last modification datetime centralDir.putInt32(crc); // crc32 centralDir.putInt32(compressedSize); @@ -368,21 +371,42 @@ static size_t write_zip( auto modificationTime = io::last_write_time(entry); if (io::is_directory(entry)) { name = name + "/"; - write_headers(file, name, 0, 0, 0, modificationTime, centralDir); - entries += write_zip(root, entry, file, centralDir) + 1; - } else { - auto data = io::read_bytes_buffer(entry); - uint32_t crc = crc32(0, data.data(), data.size()); write_headers( file, name, - data.size(), - data.size(), - crc, + 0, + 0, + 0, + COMPRESSION_NONE, modificationTime, centralDir ); - file.write(reinterpret_cast(data.data()), data.size()); + entries += write_zip(root, entry, file, centralDir) + 1; + } else { + auto uncompressed = io::read_bytes_buffer(entry); + uint32_t crc = crc32(0, uncompressed.data(), uncompressed.size()); + memory_ostream memoryStream; + { + deflate_ostream deflateStream(memoryStream); + deflateStream.write( + reinterpret_cast(uncompressed.data()), + uncompressed.size() + ); + deflateStream.flush(); + } + auto data = memoryStream.release(); + size_t dataSize = data.size(); + write_headers( + file, + name, + uncompressed.size(), + dataSize, + crc, + COMPRESSION_DEFLATE, + modificationTime, + centralDir + ); + file.write(reinterpret_cast(data.data()), dataSize); entries++; } } From b52cc1aea07f9c4a2caa78bb0a16f84fe939a8ce Mon Sep 17 00:00:00 2001 From: MihailRis Date: Tue, 25 Feb 2025 23:04:31 +0300 Subject: [PATCH 11/12] fix ZipFileDevice::list for root path --- src/io/devices/ZipFileDevice.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp index c5be9b57..8fd2e977 100644 --- a/src/io/devices/ZipFileDevice.cpp +++ b/src/io/devices/ZipFileDevice.cpp @@ -293,10 +293,19 @@ private: std::unique_ptr ZipFileDevice::list(std::string_view path) { std::vector names; - auto folder = std::string(path) + "/"; - size_t folderLen = folder.length(); - for (const auto& [name, entry] : entries) { - if (name.find(folder) == 0) { + if (path.empty()) { + for (const auto& [name, entry] : entries) { + if (name.find('/') == std::string::npos) { + names.push_back(name); + } + } + } else { + auto folder = std::string(path) + "/"; + size_t folderLen = folder.length(); + for (const auto& [name, entry] : entries) { + if (name.find(folder) != 0) { + continue; + } size_t pos = name.find('/', folderLen); if (pos == std::string::npos) { names.push_back(name.substr(folderLen, pos - folderLen)); From 7facc33a7e27fda3de587ba539ae3623da75e738 Mon Sep 17 00:00:00 2001 From: MihailRis Date: Wed, 26 Feb 2025 00:22:57 +0300 Subject: [PATCH 12/12] cleanup --- src/io/devices/ZipFileDevice.cpp | 147 +++++++++++++++---------------- src/io/memory_ostream.hpp | 2 +- 2 files changed, 74 insertions(+), 75 deletions(-) diff --git a/src/io/devices/ZipFileDevice.cpp b/src/io/devices/ZipFileDevice.cpp index 8fd2e977..15b8809c 100644 --- a/src/io/devices/ZipFileDevice.cpp +++ b/src/io/devices/ZipFileDevice.cpp @@ -81,19 +81,19 @@ ZipFileDevice::Entry ZipFileDevice::readEntry() { read_int(file, entry.crc32); read_int(file, entry.compressedSize); read_int(file, entry.uncompressedSize); - auto fileNameLength = read_int(file); - auto extraFieldLength = read_int(file); - auto fileCommentLength = read_int(file); + auto filename_len = read_int(file); + auto extra_field_len = read_int(file); + auto file_comment_len = read_int(file); read_int(file, entry.diskNumberStart); read_int(file, entry.internalAttributes); read_int(file, entry.externalAttributes); read_int(file, entry.localHeaderOffset); - entry.fileName.resize(fileNameLength, '\0'); - file->read(entry.fileName.data(), fileNameLength); + entry.fileName.resize(filename_len, '\0'); + file->read(entry.fileName.data(), filename_len); // Skip extra field and file comment - file->seekg(extraFieldLength + fileCommentLength, std::ios::cur); + file->seekg(extra_field_len + file_comment_len, std::ios::cur); if (entry.diskNumberStart == 0xFF) { throw std::runtime_error("zip64 is not supported"); @@ -124,11 +124,11 @@ void ZipFileDevice::findBlob(Entry& entry) { read_int(file); // crc32 read_int(file); // compressed size read_int(file); // uncompressed size - auto nameLength = read_int(file); - auto extraFieldLength = read_int(file); + auto name_len = read_int(file); + auto extra_field_len = read_int(file); // Skip extra field and file comment - file->seekg(nameLength + extraFieldLength, std::ios::cur); + file->seekg(name_len + extra_field_len, std::ios::cur); entry.blobOffset = file->tellg(); } @@ -138,10 +138,10 @@ ZipFileDevice::ZipFileDevice( : file(std::move(filePtr)), separateFunc(std::move(separateFunc)) { // Searching for EOCD file->seekg(0, std::ios::end); - std::streampos fileSize = file->tellg(); + std::streampos file_size = file->tellg(); bool foundEOCD = false; - for (int pos = static_cast(fileSize)-4; pos >= 0; --pos) { + for (int pos = static_cast(file_size)-4; pos >= 0; --pos) { file->seekg(pos); if (read_int(file) == EOCD_SIGNATURE) { foundEOCD = true; @@ -156,14 +156,14 @@ ZipFileDevice::ZipFileDevice( read_int(file); // diskNumber read_int(file); // centralDirDisk read_int(file); // numEntriesThisDisk - auto totalEntries = read_int(file); + auto total_entries = read_int(file); read_int(file); // centralDirSize - auto centralDirOffset = read_int(file); + auto central_dir_offset = read_int(file); read_int(file); // commentLength - file->seekg(centralDirOffset); + file->seekg(central_dir_offset); - for (uint16_t i = 0; i < totalEntries; i++) { + for (uint16_t i = 0; i < total_entries; i++) { if (read_int(file) != CENTRAL_DIR_SIGNATURE) { logger.error() << "invalid central directory entry"; break; @@ -198,22 +198,22 @@ std::unique_ptr ZipFileDevice::read(std::string_view path) { if (entry.blobOffset == 0) { findBlob(entry); } - std::unique_ptr srcStream; + std::unique_ptr src_stream; if (separateFunc) { // Create new istream for concurrent data reading - srcStream = separateFunc(); - srcStream->seekg(entry.blobOffset); + src_stream = separateFunc(); + src_stream->seekg(entry.blobOffset); } else { // Read compressed data to memory if istream cannot be separated file->seekg(entry.blobOffset); util::Buffer buffer(entry.compressedSize); file->read(buffer.data(), buffer.size()); - srcStream = std::make_unique(std::move(buffer)); + src_stream = std::make_unique(std::move(buffer)); } if (entry.compressionMethod == COMPRESSION_NONE) { - return srcStream; + return src_stream; } else if (entry.compressionMethod == COMPRESSION_DEFLATE) { - return std::make_unique(std::move(srcStream)); + return std::make_unique(std::move(src_stream)); } else { throw std::runtime_error( "unsupported compression method [" + @@ -301,17 +301,17 @@ std::unique_ptr ZipFileDevice::list(std::string_view path) { } } else { auto folder = std::string(path) + "/"; - size_t folderLen = folder.length(); + size_t folder_len = folder.length(); for (const auto& [name, entry] : entries) { if (name.find(folder) != 0) { continue; } - size_t pos = name.find('/', folderLen); + size_t pos = name.find('/', folder_len); if (pos == std::string::npos) { - names.push_back(name.substr(folderLen, pos - folderLen)); + names.push_back(name.substr(folder_len, pos - folder_len)); } if (pos == name.length() - 1) { - names.push_back(name.substr(folderLen, pos - folderLen)); + names.push_back(name.substr(folder_len, pos - folder_len)); } } } @@ -324,60 +324,59 @@ std::unique_ptr ZipFileDevice::list(std::string_view path) { static void write_headers( std::ostream& file, const std::string& name, - size_t srcSize, - size_t compressedSize, + size_t source_Size, + size_t compressed_size, uint32_t crc, - int compressionMethod, - const file_time_type& modificationTime, - ByteBuilder& centralDir + int compression_method, + const file_time_type& last_write_time, + ByteBuilder& central_dir ) { - auto timestamp = to_ms_dos_timestamp(modificationTime); + auto timestamp = to_ms_dos_timestamp(last_write_time); ByteBuilder header; header.putInt32(LOCAL_FILE_SIGNATURE); header.putInt16(10); // version header.putInt16(0); // flags - header.putInt16(compressionMethod); // compression method + header.putInt16(compression_method); // compression method header.putInt32(timestamp); // last modification datetime header.putInt32(crc); // crc32 - header.putInt32(compressedSize); - header.putInt32(srcSize); + header.putInt32(compressed_size); + header.putInt32(source_Size); header.putInt16(name.length()); header.putInt16(0); // extra field length header.put(reinterpret_cast(name.data()), name.length()); - size_t localHeaderOffset = file.tellp(); + size_t local_header_offset = file.tellp(); file.write(reinterpret_cast(header.data()), header.size()); - centralDir.putInt32(CENTRAL_DIR_SIGNATURE); - centralDir.putInt16(10); // version - centralDir.putInt16(0); // version - centralDir.putInt16(0); // flags - centralDir.putInt16(compressionMethod); // compression method - centralDir.putInt32(timestamp); // last modification datetime - centralDir.putInt32(crc); // crc32 - centralDir.putInt32(compressedSize); - centralDir.putInt32(srcSize); - centralDir.putInt16(name.length()); - centralDir.putInt16(0); // extra field length - centralDir.putInt16(0); // file comment length - centralDir.putInt16(0); // disk number start - centralDir.putInt16(0); // internal attributes - centralDir.putInt32(0); // external attributes - centralDir.putInt32(localHeaderOffset); // local header offset - centralDir.put(reinterpret_cast(name.data()), name.length()); + central_dir.putInt32(CENTRAL_DIR_SIGNATURE); + central_dir.putInt16(10); // version + central_dir.putInt16(0); // version + central_dir.putInt16(0); // flags + central_dir.putInt16(compression_method); // compression method + central_dir.putInt32(timestamp); // last modification datetime + central_dir.putInt32(crc); // crc32 + central_dir.putInt32(compressed_size); + central_dir.putInt32(source_Size); + central_dir.putInt16(name.length()); + central_dir.putInt16(0); // extra field length + central_dir.putInt16(0); // file comment length + central_dir.putInt16(0); // disk number start + central_dir.putInt16(0); // internal attributes + central_dir.putInt32(0); // external attributes + central_dir.putInt32(local_header_offset); // local header offset + central_dir.put(reinterpret_cast(name.data()), name.length()); } static size_t write_zip( const std::string& root, const path& folder, std::ostream& file, - ByteBuilder& centralDir + ByteBuilder& central_dir ) { size_t entries = 0; - ByteBuilder localHeader; for (const auto& entry : io::directory_iterator(folder)) { auto name = entry.pathPart().substr(root.length() + 1); - auto modificationTime = io::last_write_time(entry); + auto last_write_time = io::last_write_time(entry); if (io::is_directory(entry)) { name = name + "/"; write_headers( @@ -387,35 +386,35 @@ static size_t write_zip( 0, 0, COMPRESSION_NONE, - modificationTime, - centralDir + last_write_time, + central_dir ); - entries += write_zip(root, entry, file, centralDir) + 1; + entries += write_zip(root, entry, file, central_dir) + 1; } else { auto uncompressed = io::read_bytes_buffer(entry); uint32_t crc = crc32(0, uncompressed.data(), uncompressed.size()); - memory_ostream memoryStream; + memory_ostream memory_stream; { - deflate_ostream deflateStream(memoryStream); - deflateStream.write( + deflate_ostream deflate_stream(memory_stream); + deflate_stream.write( reinterpret_cast(uncompressed.data()), uncompressed.size() ); - deflateStream.flush(); + deflate_stream.flush(); } - auto data = memoryStream.release(); - size_t dataSize = data.size(); + auto data = memory_stream.release(); + size_t data_size = data.size(); write_headers( file, name, uncompressed.size(), - dataSize, + data_size, crc, COMPRESSION_DEFLATE, - modificationTime, - centralDir + last_write_time, + central_dir ); - file.write(reinterpret_cast(data.data()), dataSize); + file.write(reinterpret_cast(data.data()), data_size); entries++; } } @@ -423,12 +422,12 @@ static size_t write_zip( } void io::write_zip(const path& folder, const path& file) { - ByteBuilder centralDir; + ByteBuilder central_dir; auto out = io::write(file); - size_t entries = write_zip(folder.pathPart(), folder, *out, centralDir); + size_t entries = write_zip(folder.pathPart(), folder, *out, central_dir); - size_t centralDirOffset = out->tellp(); - out->write(reinterpret_cast(centralDir.data()), centralDir.size()); + size_t central_dir_offset = out->tellp(); + out->write(reinterpret_cast(central_dir.data()), central_dir.size()); ByteBuilder eocd; eocd.putInt32(EOCD_SIGNATURE); @@ -436,8 +435,8 @@ void io::write_zip(const path& folder, const path& file) { eocd.putInt16(0); // central dir disk eocd.putInt16(entries); // num entries eocd.putInt16(entries); // total entries - eocd.putInt32(centralDir.size()); // central dir size - eocd.putInt32(centralDirOffset); // central dir offset + eocd.putInt32(central_dir.size()); // central dir size + eocd.putInt32(central_dir_offset); // central dir offset eocd.putInt16(0); // comment length out->write(reinterpret_cast(eocd.data()), eocd.size()); } diff --git a/src/io/memory_ostream.hpp b/src/io/memory_ostream.hpp index 76ad8623..57034d72 100644 --- a/src/io/memory_ostream.hpp +++ b/src/io/memory_ostream.hpp @@ -23,7 +23,7 @@ public: } size_t size() const { - return pptr()-pbase(); + return pptr() - pbase(); } protected: int_type overflow(int_type c) override {