Files
uc2/tests/test_cli_master.cmake
Eremey Valetov 8e70d4cab9 Add custom master-block deduplication for archive creation
Content-fingerprint grouping via FNV-1a hash of file headers: files
sharing identical first 4096 bytes are assigned a custom master block
built from the largest file in the group. Masters are compressed with
SuperMaster and written as MASMETA records in the central directory.
Files below 1 KB or without a group continue using the SuperMaster.

Includes CLI integration test and documentation updates (format spec,
usage, roadmap).
2026-03-12 02:18:12 -04:00

88 lines
3.2 KiB
CMake

# CLI master-block deduplication round-trip test.
# Creates multiple files with identical first 4096 bytes (triggering
# fingerprint grouping) and verifies they survive a create/extract cycle.
file(REMOVE_RECURSE "${TEST_DIR}")
file(MAKE_DIRECTORY "${TEST_DIR}/input" "${TEST_DIR}/output")
# Common header: 4500 bytes of repeated text (exceeds 4096-byte sample window)
string(REPEAT "Master-block deduplication test data header line.\n" 90 HEADER)
# variant_a.txt: header + unique tail A
string(REPEAT "AAAA unique tail for variant A, differs from B and C.\n" 60 TAIL_A)
file(WRITE "${TEST_DIR}/input/variant_a.txt" "${HEADER}${TAIL_A}")
# variant_b.txt: header + unique tail B
string(REPEAT "BBBB unique tail for variant B, differs from A and C.\n" 40 TAIL_B)
file(WRITE "${TEST_DIR}/input/variant_b.txt" "${HEADER}${TAIL_B}")
# variant_c.txt: header + unique tail C (largest → becomes master source)
string(REPEAT "CCCC unique tail for variant C, differs from A and B.\n" 100 TAIL_C)
file(WRITE "${TEST_DIR}/input/variant_c.txt" "${HEADER}${TAIL_C}")
# unrelated.txt: different header, should NOT be grouped
string(REPEAT "Completely different file unrelated to the master group.\n" 80 UNREL)
file(WRITE "${TEST_DIR}/input/unrelated.txt" "${UNREL}")
# small.txt: below MinMasterFile (< 1024 bytes), should use SuperMaster
file(WRITE "${TEST_DIR}/input/small.txt" "Short file, no custom master.\n")
# Create archive (capture stderr for master diagnostics)
execute_process(
COMMAND "${UC2_CLI}" -w "${TEST_DIR}/test.uc2"
"${TEST_DIR}/input/variant_a.txt"
"${TEST_DIR}/input/variant_b.txt"
"${TEST_DIR}/input/variant_c.txt"
"${TEST_DIR}/input/unrelated.txt"
"${TEST_DIR}/input/small.txt"
RESULT_VARIABLE RC
ERROR_VARIABLE STDERR
)
if(NOT RC EQUAL 0)
message(FATAL_ERROR "uc2 -w failed: ${RC}")
endif()
message(STATUS "Compression log:\n${STDERR}")
# Verify a custom master was created (stderr should mention "master[2]")
string(FIND "${STDERR}" "master[2]" HAS_MASTER)
if(HAS_MASTER EQUAL -1)
message(FATAL_ERROR "Expected custom master but none was created")
endif()
# Verify custom-master files are flagged in output
string(FIND "${STDERR}" "custom master" HAS_CM_TAG)
if(HAS_CM_TAG EQUAL -1)
message(FATAL_ERROR "Expected '(custom master)' tag in compression output")
endif()
# List archive
execute_process(
COMMAND "${UC2_CLI}" -l "${TEST_DIR}/test.uc2"
OUTPUT_VARIABLE LISTING
RESULT_VARIABLE RC
)
if(NOT RC EQUAL 0)
message(FATAL_ERROR "uc2 -l failed: ${RC}")
endif()
message(STATUS "Archive listing:\n${LISTING}")
# Extract
execute_process(
COMMAND "${UC2_CLI}" -d "${TEST_DIR}/output" "${TEST_DIR}/test.uc2"
RESULT_VARIABLE RC
)
if(NOT RC EQUAL 0)
message(FATAL_ERROR "uc2 extract failed: ${RC}")
endif()
# Verify each file
foreach(F variant_a.txt variant_b.txt variant_c.txt unrelated.txt small.txt)
file(READ "${TEST_DIR}/input/${F}" ORIGINAL)
file(READ "${TEST_DIR}/output/${F}" EXTRACTED)
if(NOT "${ORIGINAL}" STREQUAL "${EXTRACTED}")
message(FATAL_ERROR "${F}: content mismatch after round-trip")
endif()
endforeach()
message(STATUS "cli_master: all files verified (custom master deduplication)")