From 184e9c66484854a638bd67abeaa267c687116fac Mon Sep 17 00:00:00 2001 From: MihailRis Date: Wed, 4 Sep 2024 23:37:39 +0300 Subject: [PATCH] update region file format 2 to 3 (WIP) --- doc/specs/outdated/region_file_spec_v2.md | 41 +++++++ .../outdated/region_voxels_chunk_spec_v1.md | 26 +++++ doc/specs/region_file_spec.md | 29 +++-- doc/specs/region_voxels_chunk_spec.md | 13 +-- src/coders/compression.cpp | 48 +++++--- src/coders/compression.hpp | 2 +- src/files/RegionsLayer.cpp | 4 + src/files/WorldConverter.cpp | 28 ++--- src/files/WorldConverter.hpp | 7 +- src/files/WorldRegions.cpp | 4 + src/files/WorldRegions.hpp | 4 + src/files/compatibility.cpp | 109 ++++++++++++++++++ src/files/compatibility.hpp | 14 +++ src/files/files.cpp | 6 + src/files/files.hpp | 2 + test/files/compatibility.cpp | 15 +++ 16 files changed, 295 insertions(+), 57 deletions(-) create mode 100644 doc/specs/outdated/region_file_spec_v2.md create mode 100644 doc/specs/outdated/region_voxels_chunk_spec_v1.md create mode 100644 src/files/compatibility.cpp create mode 100644 src/files/compatibility.hpp create mode 100644 test/files/compatibility.cpp diff --git a/doc/specs/outdated/region_file_spec_v2.md b/doc/specs/outdated/region_file_spec_v2.md new file mode 100644 index 00000000..4e4d2890 --- /dev/null +++ b/doc/specs/outdated/region_file_spec_v2.md @@ -0,0 +1,41 @@ +# Region File (version 2) + +File format BNF (RFC 5234): + +```bnf +file = header (*chunk) offsets complete file +header = magic %x02 %x00 magic number, version and reserved + zero byte + +magic = %x2E %x56 %x4F %x58 '.VOXREG\0' + %x52 %x45 %x47 %x00 + +chunk = int32 (*byte) byte array with size prefix +offsets = (1024*int32) offsets table +int32 = 4byte signed big-endian 32 bit integer +byte = %x00-FF 8 bit unsigned integer +``` + +C struct visualization: + +```c +typedef unsigned char byte; + +struct file { + // 10 bytes + struct { + char magic[8] = ".VOXREG"; + byte version = 2; + byte reserved = 0; + } header; + + struct { + int32_t size; // byteorder: big-endian + byte* data; + } chunks[1024]; // file does not contain zero sizes for missing chunks + + int32_t offsets[1024]; // byteorder: big-endian +}; +``` + +Offsets table contains chunks positions in file. 0 means that chunk is not present in the file. Minimal valid offset is 10 (header size). diff --git a/doc/specs/outdated/region_voxels_chunk_spec_v1.md b/doc/specs/outdated/region_voxels_chunk_spec_v1.md new file mode 100644 index 00000000..7467944c --- /dev/null +++ b/doc/specs/outdated/region_voxels_chunk_spec_v1.md @@ -0,0 +1,26 @@ +# Voxels Chunk (version 1) + +Voxel regions layer chunk structure. + +Values are separated for extRLE8 compression efficiency. + +File format BNF (RFC 5234): + +```bnf +chunk = (65536*byte) block indices (most significant bytes) + (65536*byte) block indices (least significant bytes) + (65536*byte) block states (most significant bytes) + (65536*byte) block states (least significant bytes) + +byte = %x00-FF 8 bit unsigned integer +``` + +65536 is number of voxels per chunk (16\*256\*16) + +## Block state + +Block state is encoded in 16 bits: +- 0-2 bits (3) - block rotation index +- 3-5 bits (3) - segment block bits +- 6-7 bits (2) - reserved +- 8-15 bits (8) - user bits diff --git a/doc/specs/region_file_spec.md b/doc/specs/region_file_spec.md index 4e4d2890..30601798 100644 --- a/doc/specs/region_file_spec.md +++ b/doc/specs/region_file_spec.md @@ -1,18 +1,21 @@ -# Region File (version 2) +# Region File (version 3) File format BNF (RFC 5234): ```bnf file = header (*chunk) offsets complete file -header = magic %x02 %x00 magic number, version and reserved - zero byte +header = magic %x02 byte magic number, version and compression + method magic = %x2E %x56 %x4F %x58 '.VOXREG\0' %x52 %x45 %x47 %x00 -chunk = int32 (*byte) byte array with size prefix -offsets = (1024*int32) offsets table -int32 = 4byte signed big-endian 32 bit integer +chunk = uint32 uint32 (*byte) byte array with size and source size + prefix where source size is + decompressed chunk data size + +offsets = (1024*uint32) offsets table +int32 = 4byte unsigned big-endian 32 bit integer byte = %x00-FF 8 bit unsigned integer ``` @@ -25,17 +28,23 @@ struct file { // 10 bytes struct { char magic[8] = ".VOXREG"; - byte version = 2; - byte reserved = 0; + byte version = 3; + byte compression; } header; struct { - int32_t size; // byteorder: big-endian + uint32_t size; // byteorder: little-endian + uint32_t sourceSize; // byteorder: little-endian byte* data; } chunks[1024]; // file does not contain zero sizes for missing chunks - int32_t offsets[1024]; // byteorder: big-endian + uint32_t offsets[1024]; // byteorder: little-endian }; ``` Offsets table contains chunks positions in file. 0 means that chunk is not present in the file. Minimal valid offset is 10 (header size). + +Available compression methods: +0. no compression +1. extRLE8 +2. extRLE16 diff --git a/doc/specs/region_voxels_chunk_spec.md b/doc/specs/region_voxels_chunk_spec.md index 7467944c..d43208cb 100644 --- a/doc/specs/region_voxels_chunk_spec.md +++ b/doc/specs/region_voxels_chunk_spec.md @@ -1,17 +1,14 @@ -# Voxels Chunk (version 1) +# Voxels Chunk (version 2) -Voxel regions layer chunk structure. - -Values are separated for extRLE8 compression efficiency. +IDs and states are separated for extRLE16 compression efficiency. File format BNF (RFC 5234): ```bnf -chunk = (65536*byte) block indices (most significant bytes) - (65536*byte) block indices (least significant bytes) - (65536*byte) block states (most significant bytes) - (65536*byte) block states (least significant bytes) +chunk = (65536*uint16) block ids + (65536*uint16) block states +uint16 = 2byte 16 bit little-endian unsigned integer byte = %x00-FF 8 bit unsigned integer ``` diff --git a/src/coders/compression.cpp b/src/coders/compression.cpp index 343328cf..74f68528 100644 --- a/src/coders/compression.cpp +++ b/src/coders/compression.cpp @@ -25,29 +25,38 @@ static std::shared_ptr get_buffer(size_t minSize) { return nullptr; } +static auto compress_rle( + const ubyte* src, + size_t srclen, + size_t& len, + size_t(*encodefunc)(const ubyte*, size_t, ubyte*) +) { + auto buffer = get_buffer(srclen * 2); + auto bytes = buffer.get(); + std::unique_ptr uptr; + if (bytes == nullptr) { + uptr = std::make_unique(srclen * 2); + bytes = uptr.get(); + } + len = encodefunc(src, srclen, bytes); + if (uptr) { + return uptr; + } + auto data = std::make_unique(len); + std::memcpy(data.get(), bytes, len); + return data; +} + std::unique_ptr compression::compress( const ubyte* src, size_t srclen, size_t& len, Method method ) { switch (method) { case Method::NONE: throw std::invalid_argument("compression method is NONE"); - case Method::EXTRLE8: { - // max extrle out size is srcLen * 2 - auto buffer = get_buffer(srclen * 2); - auto bytes = buffer.get(); - std::unique_ptr uptr; - if (bytes == nullptr) { - uptr = std::make_unique(srclen * 2); - bytes = uptr.get(); - } - len = extrle::encode(src, srclen, bytes); - if (uptr) { - return uptr; - } - auto data = std::make_unique(len); - std::memcpy(data.get(), bytes, len); - return data; - } + case Method::EXTRLE8: + return compress_rle(src, srclen, len, extrle::encode); + case Method::EXTRLE16: + return compress_rle(src, srclen, len, extrle::encode16); case Method::GZIP: { auto buffer = gzip::compress(src, srclen); auto data = std::make_unique(buffer.size()); @@ -71,6 +80,11 @@ std::unique_ptr compression::decompress( extrle::decode(src, srclen, decompressed.get()); return decompressed; } + case Method::EXTRLE16: { + auto decompressed = std::make_unique(dstlen); + extrle::decode16(src, srclen, decompressed.get()); + return decompressed; + } case Method::GZIP: { auto buffer = gzip::decompress(src, srclen); if (buffer.size() != dstlen) { diff --git a/src/coders/compression.hpp b/src/coders/compression.hpp index 09f50b2a..98b7a0e3 100644 --- a/src/coders/compression.hpp +++ b/src/coders/compression.hpp @@ -6,7 +6,7 @@ namespace compression { enum class Method { - NONE, EXTRLE8, GZIP + NONE, EXTRLE8, EXTRLE16, GZIP }; /// @brief Compress buffer diff --git a/src/files/RegionsLayer.cpp b/src/files/RegionsLayer.cpp index 2ec8782a..3f0101f4 100644 --- a/src/files/RegionsLayer.cpp +++ b/src/files/RegionsLayer.cpp @@ -135,6 +135,10 @@ WorldRegion* RegionsLayer::getRegion(int x, int z) { return found->second.get(); } +fs::path RegionsLayer::getRegionFilePath(int x, int z) const { + return folder / get_region_filename(x, z); +} + WorldRegion* RegionsLayer::getOrCreateRegion(int x, int z) { if (auto region = getRegion(x, z)) { return region; diff --git a/src/files/WorldConverter.cpp b/src/files/WorldConverter.cpp index ee3f1fa6..4748172d 100644 --- a/src/files/WorldConverter.cpp +++ b/src/files/WorldConverter.cpp @@ -6,6 +6,7 @@ #include #include "content/ContentReport.hpp" +#include "files/compatibility.hpp" #include "data/dynamic.hpp" #include "debug/Logger.hpp" #include "files/files.hpp" @@ -48,7 +49,7 @@ void WorldConverter::addRegionsTasks( logger.error() << "could not parse region name " << name; continue; } - tasks.push(ConvertTask {taskType, file.path(), x, z}); + tasks.push(ConvertTask {taskType, file.path(), x, z, layerid}); } } @@ -58,11 +59,7 @@ void WorldConverter::createUpgradeTasks() { if (issue.issueType != ContentIssueType::REGION_FORMAT_UPDATE) { continue; } - if (issue.regionLayer == REGION_LAYER_VOXELS) { - addRegionsTasks(issue.regionLayer, ConvertTaskType::UPGRADE_VOXELS); - } else { - addRegionsTasks(issue.regionLayer, ConvertTaskType::UPGRADE_REGION); - } + addRegionsTasks(issue.regionLayer, ConvertTaskType::UPGRADE_REGION); } } @@ -159,12 +156,13 @@ std::shared_ptr WorldConverter::startTask( return pool; } -void WorldConverter::upgradeRegion(const fs::path& file, int x, int z) const { - throw std::runtime_error("unsupported region format"); -} - -void WorldConverter::upgradeVoxels(const fs::path& file, int x, int z) const { - throw std::runtime_error("unsupported region format"); +void WorldConverter::upgradeRegion( + const fs::path& file, int x, int z, RegionLayerIndex layer +) const { + auto path = wfile->getRegions().getRegionFilePath(layer, x, z); + auto bytes = files::read_bytes_buffer(path); + auto buffer = compatibility::convertRegion2to3(bytes, layer); + files::write_bytes(path, buffer.data(), buffer.size()); } void WorldConverter::convertVoxels(const fs::path& file, int x, int z) const { @@ -195,11 +193,7 @@ void WorldConverter::convert(const ConvertTask& task) const { switch (task.type) { case ConvertTaskType::UPGRADE_REGION: - upgradeRegion(task.file, task.x, task.z); - break; - case ConvertTaskType::UPGRADE_VOXELS: - upgradeRegion(task.file, task.x, task.z); - upgradeVoxels(task.file, task.x, task.z); + upgradeRegion(task.file, task.x, task.z, task.layer); break; case ConvertTaskType::VOXELS: convertVoxels(task.file, task.x, task.z); diff --git a/src/files/WorldConverter.hpp b/src/files/WorldConverter.hpp index 63cf5993..fe9e35de 100644 --- a/src/files/WorldConverter.hpp +++ b/src/files/WorldConverter.hpp @@ -24,8 +24,6 @@ enum class ConvertTaskType { PLAYER, /// @brief refresh region file version UPGRADE_REGION, - /// @brief rewrite voxels region file to new format - UPGRADE_VOXELS, }; struct ConvertTask { @@ -34,6 +32,7 @@ struct ConvertTask { /// @brief region coords int x, z; + RegionLayerIndex layer; }; class WorldConverter : public Task { @@ -45,8 +44,8 @@ class WorldConverter : public Task { uint tasksDone = 0; bool upgradeMode; - void upgradeRegion(const fs::path& file, int x, int z) const; - void upgradeVoxels(const fs::path& file, int x, int z) const; + void upgradeRegion( + const fs::path& file, int x, int z, RegionLayerIndex layer) const; void convertPlayer(const fs::path& file) const; void convertVoxels(const fs::path& file, int x, int z) const; void convertInventories(const fs::path& file, int x, int z) const; diff --git a/src/files/WorldRegions.cpp b/src/files/WorldRegions.cpp index f3e1fd88..500c21a0 100644 --- a/src/files/WorldRegions.cpp +++ b/src/files/WorldRegions.cpp @@ -290,6 +290,10 @@ const fs::path& WorldRegions::getRegionsFolder(RegionLayerIndex layerid) const { return layers[layerid].folder; } +fs::path WorldRegions::getRegionFilePath(RegionLayerIndex layerid, int x, int z) const { + return layers[layerid].getRegionFilePath(x, z); +} + void WorldRegions::writeAll() { for (auto& layer : layers) { fs::create_directories(layer.folder); diff --git a/src/files/WorldRegions.hpp b/src/files/WorldRegions.hpp index e4060002..fecd30fa 100644 --- a/src/files/WorldRegions.hpp +++ b/src/files/WorldRegions.hpp @@ -147,6 +147,8 @@ struct RegionsLayer { WorldRegion* getRegion(int x, int z); WorldRegion* getOrCreateRegion(int x, int z); + fs::path getRegionFilePath(int x, int z) const; + /// @brief Get chunk data. Read from file if not loaded yet. /// @param x chunk x coord /// @param z chunk z coord @@ -237,6 +239,8 @@ public: /// @return directory path const fs::path& getRegionsFolder(RegionLayerIndex layerid) const; + fs::path getRegionFilePath(RegionLayerIndex layerid, int x, int z) const; + /// @brief Write all region layers void writeAll(); diff --git a/src/files/compatibility.cpp b/src/files/compatibility.cpp new file mode 100644 index 00000000..0d9d6c08 --- /dev/null +++ b/src/files/compatibility.cpp @@ -0,0 +1,109 @@ +#include "compatibility.hpp" + +#include + +#include "constants.hpp" +#include "voxels/voxel.hpp" +#include "coders/compression.hpp" +#include "coders/byte_utils.hpp" +#include "lighting/Lightmap.hpp" +#include "util/data_io.hpp" + +static inline size_t VOXELS_DATA_SIZE_V1 = CHUNK_VOL * 4; +static inline size_t VOXELS_DATA_SIZE_V2 = CHUNK_VOL * 4; + +static util::Buffer convert_voxels_1to2(const ubyte* buffer, uint32_t size) { + auto data = compression::decompress( + buffer, size, VOXELS_DATA_SIZE_V1, compression::Method::EXTRLE8); + + util::Buffer dstBuffer(VOXELS_DATA_SIZE_V2); + auto dst = reinterpret_cast(dstBuffer.data()); + + for (size_t i = 0; i < CHUNK_VOL; i++) { + ubyte bid1 = data[i]; + ubyte bid2 = data[CHUNK_VOL + i]; + + ubyte bst1 = data[CHUNK_VOL * 2 + i]; + ubyte bst2 = data[CHUNK_VOL * 3 + i]; + + dst[i] = + (static_cast(bid1) << 8) | static_cast(bid2); + dst[CHUNK_VOL + i] = ( + (static_cast(bst1) << 8) | + static_cast(bst2) + ); + } + size_t outLen; + auto compressed = compression::compress( + data.get(), VOXELS_DATA_SIZE_V2, outLen, compression::Method::EXTRLE16); + return util::Buffer(std::move(compressed), outLen); +} + +util::Buffer compatibility::convertRegion2to3( + const util::Buffer& src, RegionLayerIndex layer +) { + const size_t REGION_CHUNKS = 1024; + const size_t HEADER_SIZE = 10; + const size_t OFFSET_TABLE_SIZE = REGION_CHUNKS * sizeof(uint32_t); + const ubyte COMPRESS_NONE = 0; + const ubyte COMPRESS_EXTRLE8 = 1; + const ubyte COMPRESS_EXTRLE16 = 2; + + const ubyte* const ptr = src.data(); + + ByteBuilder builder; + builder.putCStr(".VOXREG"); + builder.put(3); + switch (layer) { + case REGION_LAYER_VOXELS: builder.put(COMPRESS_EXTRLE16); break; + case REGION_LAYER_LIGHTS: builder.put(COMPRESS_EXTRLE8); break; + default: builder.put(COMPRESS_NONE); break; + } + + uint32_t offsets[REGION_CHUNKS] {}; + size_t chunkIndex = 0; + + auto tablePtr = reinterpret_cast( + ptr + src.size() - OFFSET_TABLE_SIZE + ); + + for (size_t i = 0; i < REGION_CHUNKS; i++) { + uint32_t srcOffset = dataio::be2h(tablePtr[i]); + if (srcOffset == 0) { + continue; + } + uint32_t size = *reinterpret_cast(ptr + srcOffset); + size = dataio::be2h(size); + + const ubyte* data = ptr + srcOffset + sizeof(uint32_t); + offsets[i] = builder.size(); + + switch (layer) { + case REGION_LAYER_VOXELS: { + auto dstdata = convert_voxels_1to2(data, size); + builder.putInt32(dstdata.size()); + builder.putInt32(VOXELS_DATA_SIZE_V2); + builder.put(dstdata.data(), dstdata.size()); + break; + } + case REGION_LAYER_LIGHTS: + builder.putInt32(size); + builder.putInt32(LIGHTMAP_DATA_LEN); + builder.put(data, size); + break; + case REGION_LAYER_ENTITIES: + case REGION_LAYER_INVENTORIES: { + builder.putInt32(size); + builder.putInt32(size); + builder.put(data, size); + break; + case REGION_LAYERS_COUNT: + throw std::invalid_argument("invalid enum"); + } + } + } + for (size_t i = 0; i < REGION_CHUNKS; i++) { + builder.putInt32(offsets[i]); + } + return util::Buffer(builder.build().data(), builder.size()); +} diff --git a/src/files/compatibility.hpp b/src/files/compatibility.hpp new file mode 100644 index 00000000..eef5b218 --- /dev/null +++ b/src/files/compatibility.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "typedefs.hpp" +#include "util/Buffer.hpp" +#include "files/world_regions_fwd.hpp" + +namespace compatibility { + /// @brief Convert region file from version 2 to 3 + /// @see /doc/specs/region_file_spec.md + /// @param src region file source content + /// @return new region file content + util::Buffer convertRegion2to3( + const util::Buffer& src, RegionLayerIndex layer); +} diff --git a/src/files/files.cpp b/src/files/files.cpp index e0e52297..8d2a46b4 100644 --- a/src/files/files.cpp +++ b/src/files/files.cpp @@ -66,6 +66,12 @@ bool files::read(const fs::path& filename, char* data, size_t size) { return true; } +util::Buffer files::read_bytes_buffer(const fs::path& path) { + size_t size; + auto bytes = files::read_bytes(path, size); + return util::Buffer(std::move(bytes), size); +} + std::unique_ptr files::read_bytes( const fs::path& filename, size_t& length ) { diff --git a/src/files/files.hpp b/src/files/files.hpp index abcbdbb4..324b1d67 100644 --- a/src/files/files.hpp +++ b/src/files/files.hpp @@ -7,6 +7,7 @@ #include #include "typedefs.hpp" +#include "util/Buffer.hpp" namespace fs = std::filesystem; @@ -59,6 +60,7 @@ namespace files { ); bool read(const fs::path&, char* data, size_t size); + util::Buffer read_bytes_buffer(const fs::path&); std::unique_ptr read_bytes(const fs::path&, size_t& length); std::vector read_bytes(const fs::path&); std::string read_string(const fs::path& filename); diff --git a/test/files/compatibility.cpp b/test/files/compatibility.cpp new file mode 100644 index 00000000..34237e21 --- /dev/null +++ b/test/files/compatibility.cpp @@ -0,0 +1,15 @@ +#include +#include + +#include "files/files.hpp" +#include "files/compatibility.hpp" + +TEST(compatibility, convert) { + auto infile = std::filesystem::u8path( + "voxels_0_1.bin"); + auto outfile = std::filesystem::u8path( + "output_0_1.bin"); + auto input = files::read_bytes_buffer(infile); + auto output = compatibility::convertRegion2to3(input, REGION_LAYER_VOXELS); + files::write_bytes(outfile, output.data(), output.size()); +}