add uc2.1 man page and install rules

mdoc man page covering all modes and the OTS/ingest long options, verified with groff and NetBSD mandoc. CMake installs the binary and the man page (guarded against add_subdirectory embedding). Also corrects the stale direction-1 comment in the DOSBox round-trip script: multi-file archives created by v3 have extracted fine in the original since the custom-Huffman-tree fix.
fix rANS extraction crash and >64KB window corruption
2026-06-11 15:17:50 -04:00 · 2026-06-11 13:14:01 -04:00 · 2026-05-05 03:25:45 -04:00 · 2026-05-05 03:00:23 -04:00 · 2026-05-05 02:57:43 -04:00 · 2026-05-04 19:06:58 -04:00
117 changed files with 13995 additions and 57 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -4,3 +4,7 @@ original/UC2_source/**/*.exe binary
 original/UC2_source/**/*.DAT binary
 original/UC2_source/**/*.LIB binary
 original/UC2_source/**/*.PRJ binary
+
+# Test corpus and archives must be byte-exact
+tests/corpus/** binary
+tests/archives/** binary
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -0,0 +1,36 @@
+name: Build
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - { os: ubuntu-latest,  name: Linux }
+          - { os: macos-latest,   name: macOS }
+          - { os: windows-latest, name: Windows (MSVC) }
+    runs-on: ${{ matrix.os }}
+    name: ${{ matrix.name }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Lint -- forbid assert(side-effect)
+        if: runner.os == 'Linux'
+        run: python3 tests/scripts/check_assert_side_effects.py
+      - name: Configure
+        run: cmake -B build -DCMAKE_BUILD_TYPE=Release
+      - name: Build
+        run: cmake --build build --config Release
+      - name: Smoke test (Unix)
+        if: runner.os != 'Windows'
+        run: ./build/cli/uc2 -h
+      - name: Smoke test (Windows)
+        if: runner.os == 'Windows'
+        run: .\build\cli\Release\uc2.exe -h
+      - name: Test
+        run: ctest --test-dir build --output-on-failure -C Release
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,45 @@
+name: Docs
+
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: pages
+  cancel-in-progress: false
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install Sphinx
+        run: pip install -r docs/requirements.txt
+
+      - name: Build docs
+        run: sphinx-build -b html docs docs/_build/html
+
+      - uses: actions/upload-pages-artifact@v3
+        with:
+          path: docs/_build/html
+
+  deploy:
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - id: deployment
+        uses: actions/deploy-pages@v4
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,18 @@
 # Build
 build/
+build-*/
 cmake-build-*/

+# CTest run outputs (when ctest is invoked outside of build/)
+Testing/
+
+# Python bytecode
+__pycache__/
+*.pyc
+
+# Docs build
+docs/_build/
+
 # IDE
 .idea/
 .vscode/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,3 +9,10 @@ include(cmake/UC2Platform.cmake)

 add_subdirectory(lib)
 add_subdirectory(cli)
+add_subdirectory(contrib/libarchive)
+
+option(UC2_BUILD_TESTS "Build tests" ON)
+if(UC2_BUILD_TESTS AND NOT DJGPP)
+    enable_testing()
+    add_subdirectory(tests)
+endif()
--- a/CREDITS.md
+++ b/CREDITS.md
@@ -8,30 +8,71 @@ versioning.  The original source code is preserved in `original/UC2_source/`.

 - Website: <https://nicodevries.com/professional/>

-## Source Code Release
+## 2015 LGPL Source Release

-**Danny Bezemer** facilitated the public release of the UC2 source code in
-2015 under the LGPL-3.0 license.
+In December 2015, **Vladislav Sagunov** asked Nico de Vries whether
+the UC2 source could be re-released under a free licence.  De Vries
+agreed and personally re-published the full UC2 source under the GNU
+LGPL-3.0 (with a small Borland C/C++ runtime carve-out for DOS-specific
+code).  The release notes are preserved verbatim in
+`original/UC2_source/Read Me First.txt`.

 ## Portable Decompressor (unuc2 / libunuc2)

 **Jan Bobrowski** wrote a clean-room portable decompressor (2020--2021) that
 forms the foundation of this project's decompression engine.  The library
 (`libunuc2`) is licensed under LGPL-3.0; the CLI tool (`unuc2`) is licensed
-under GPL-3.0.
+under GPL-3.0-or-later.
+
+The following files in this repository derive directly from Bobrowski's
+work and retain his licence (see `docs/license-audit.md` for the full
+provenance table):
+
+- `lib/src/decompress.c` (LGPL-3.0-only)  --  derived from `libunuc2.c`
+- `lib/src/list.h` (LGPL-3.0-only)  --  byte-identical to upstream
+- `lib/include/uc2/libuc2.h` (LGPL-3.0-only)  --  derived from `libunuc2.h`
+- `cli/src/main.c` (GPL-3.0-or-later)  --  derived from `unuc2.c`,
+  with substantial additions for archive creation, OTS, and benchmarking
+- `cli/src/compat/compat_win32.c` (LGPL-3.0-only)
+- `cli/src/compat/compat_dos.c` (LGPL-3.0-only, DOS adaptation by Valetov)
+
+The SuperMaster dictionary (`lib/src/super.bin`) is bit-identical to the
+copy shipped in `original/unuc2-0.6/` and to the data extracted from
+de Vries's 1992 binaries.

 - Website: <http://torinak.com/~jb/unuc2/>
 - Original source preserved in `original/unuc2-0.6/`

 ## Additional Contributors

- **Jan-Pieter Cornet** -- early testing, archive samples, and format
+- **Danny Bezemer**  --  co-authored UC2 with de Vries during the
+  original 1992-1996 development.
+- **Jan-Pieter Cornet**  --  early testing, archive samples, and format
  documentation contributions to the unuc2 project.
- **Vladislav Sagunov** -- maintained UC2 resources and documentation.

 ## UC2 v3.0.0 Revival

-**Eremey Valetov** -- project revival, CMake build system, cross-platform
-porting, and ongoing development.
+**Eremey Valetov**  --  project revival, including:
+
+- CMake build system and cross-platform porting (Linux, macOS, MSVC, DJGPP)
+- LZ77+Huffman compression engine (compatible with original UC2 Pro)
+- rANS entropy coder (method 10, levels 6--9)
+- Content-defined chunking (CDC) with Gear rolling hash
+- Merkle DAG content addressing
+- Cross-archive block store for shared deduplication
+- SimHash near-duplicate detection
+- Delta compression for binary patching
+- Content-aware preprocessing (BCJ, BWT, delta filter)
+- LZ4 ultra-fast compression
+- BLAKE3 cryptographic hashing
+- SHA-256 (FIPS 180-4) implementation
+- OpenTimestamps integration (proof parser, walker, archive trailer)
+- Dictionary metadata for cross-archive sharing
+- Backward compatibility with original UC2 Pro (verified via DOSBox-X)
+- Automated test infrastructure (19 unit tests, DOSBox-X cross-tool testing)
+
+All files under "UC2 v3.0.0 Revival" are licensed GPL-3.0-or-later by
+Eremey Valetov (2026).  See `docs/license-audit.md` for the per-file
+provenance table and the LGPL-3.0 / GPL-3.0 chain rationale.

 - GitHub: <https://github.com/evvaletov/uc2>
--- a/README.md
+++ b/README.md
@@ -1,15 +1,32 @@
-# UC2 v3.0.0 — UltraCompressor II
+# UC2 v3.0.0  --  UltraCompressor II

 A cross-platform revival of UltraCompressor II, the DOS-era archiver by
 Nico de Vries (1992--1996).  UC2 was notable for its advanced deduplication
 ("master blocks"), file versioning, and competitive compression ratios on
 the hardware of its day.

-This project brings UC2 back as a modern, portable C99 tool.  Phase 1
-(current) provides decompression and archive listing, built on Jan
-Bobrowski's clean-room portable decompressor
-([unuc2/libunuc2](http://torinak.com/~jb/unuc2/)).  Phase 2 will add
-compression using the original algorithms.
+UC2 v3 brings it back as a modern, portable C99 tool with full
+backward compatibility  --  archives created by UC2 v3 can be extracted
+by the original 1992 UC2 Pro, and vice versa.
+
+## Features
+
+- **Full compression and decompression**  --  LZ77+Huffman (levels 2--5),
+  rANS entropy coding (levels 6--9), LZ4 ultra-fast mode
+- **Backward compatible** with the original UC2 Pro (verified via
+  automated DOSBox-X cross-tool round-trip testing)
+- **Content-defined chunking** (CDC) with Gear rolling hash for
+  position-independent deduplication
+- **Merkle DAG** content addressing with 64-bit hashes
+- **Cross-archive dedup** via shared block store
+- **Near-duplicate detection** via SimHash
+- **Delta compression** for binary patching
+- **Content-aware preprocessing**  --  BCJ (x86), BWT (text), delta filter
+- **BLAKE3** cryptographic hashing for archive integrity
+- **Benchmark mode**  --  test all methods on input data
+- **Personality**  --  warm, confident status messages (`-q` for scripting)
+- Directory archival with nested hierarchies
+- Cross-platform: Linux, macOS, Windows (MSVC), DOS (DJGPP)

 ## Building

@@ -18,53 +35,84 @@ Requires CMake >= 3.16 and a C99 compiler (GCC, Clang, or MSVC).
 ```sh
 cmake -B build
 cmake --build build
+ctest --test-dir build    # 16 unit tests
 ```

-The binary is at `build/cli/uc2`.
-
 ## Usage

+```sh
+uc2 -w archive.uc2 files...            # Create archive
+uc2 archive.uc2                        # Extract all files
+uc2 -l archive.uc2                     # List contents
+uc2 -t archive.uc2                     # Test integrity
+uc2 -d /tmp/out archive.uc2            # Extract to directory
+uc2 -w -L 8 archive.uc2 files...      # Create with rANS Tight
+uc2 -B files...                        # Benchmark all methods
 ```
-uc2 archive.uc2                       # Extract all files
-uc2 -l archive.uc2                    # List contents
-uc2 -t archive.uc2                    # Test archive integrity
-uc2 -d /tmp/out archive.uc2           # Extract to directory
-uc2 -l archive.uc2 '*.txt'            # List matching files
-uc2 -p archive.uc2 readme.txt         # Extract to stdout
-```
+
+### Compression Levels
+
+| Level | Method | Description |
+|-------|--------|-------------|
+| 2 | Huffman | Fast |
+| 3 | Huffman | Normal |
+| 4 | Huffman | Tight (default) |
+| 5 | Huffman | Ultra |
+| 6 | rANS | Fast |
+| 7 | rANS | Normal |
+| 8 | rANS | Tight |
+| 9 | rANS | Ultra |
+
+Levels 2--5 produce archives readable by the original 1992 UC2 Pro.
+Levels 6--9 use rANS entropy coding (UC2 v3 only, better compression).

 ### Options

 | Flag | Description |
 |------|-------------|
+| `-w` | Create archive |
 | `-l` | List archive contents |
 | `-t` | Test archive integrity |
-| `-a` | Include all file versions (not just latest) |
+| `-L n` | Compression level (2--9) |
+| `-B` | Benchmark all methods on input files |
 | `-d path` | Extract to specified directory |
 | `-f` | Overwrite existing files |
 | `-p` | Extract to stdout |
+| `-q` | Quiet (suppress status messages) |
+| `-a` | Include all file versions |
 | `-D` | Skip directory metadata; `-DD` also skips file metadata |
-| `-T` | Tab-separated output (for scripting) |
+| `-T` | Tab-separated output |

 ## Project Structure

 ```
 UC2/
-  lib/            libuc2 decompression library
-  cli/            uc2 command-line tool
-  original/       preserved original sources (reference only)
-  cmake/          build system modules
-  tests/          test archives and test programs
+  lib/              libuc2 compression/decompression library
+    include/uc2/    public headers (libuc2, uc2_cdc, uc2_merkle, uc2_rans, ...)
+    src/            library implementation
+  cli/              uc2 command-line tool
+  tests/            unit tests and test corpus
+  original/         preserved original UC2 Pro sources (reference only)
+  docs/             Sphinx documentation
 ```

+## Credits
+
+- **Nico de Vries**  --  Original UltraCompressor II (1992--1996)
+- **Danny Bezemer**  --  Facilitated source code release (2015)
+- **Jan Bobrowski**  --  Clean-room portable decompressor (unuc2/libunuc2, 2020--2021)
+- **Eremey Valetov**  --  UC2 v3 revival, compression engine, deduplication, and ongoing development
+
+See [CREDITS.md](CREDITS.md) for full attribution.
+
 ## History

 - **v1.0--v2.3** (1992--1996) Original DOS releases by Nico de Vries
 - **2015** Source code released under LGPL-3.0 by Danny Bezemer
 - **2020--2021** Jan Bobrowski writes unuc2/libunuc2 (portable decompressor)
- **2026** UC2 v3.0.0: cross-platform revival
+- **2026** UC2 v3.0.0: cross-platform revival with full compression engine,
+  backward compatibility, and modern deduplication

 ## License

-GPL-3.0.  See [LICENSE](LICENSE) and [CREDITS.md](CREDITS.md) for full
-attribution.
+GPL-3.0.  See [LICENSE](LICENSE) and [CREDITS.md](CREDITS.md).
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -0,0 +1,296 @@
+# UC2 Roadmap
+
+## Phase 1: Decompression MVP (DONE)
+
+- [x] Port Bobrowski's libunuc2 decompression engine
+- [x] CLI tool with list/extract/test/pipe modes
+- [x] CMake build system (Linux, MSVC fallback for super.bin)
+- [x] Win32 compat layer carried over
+- [x] Tagged v3.0.0-alpha.1
+
+## Phase 2: Original Compression Engine (DONE)
+
+- [x] Port LZ77+Huffman compressor from `ULTRACMP.CPP`, `TREEGEN.CPP`, `TREEENC.CPP`
+- [x] Write as the inverse of the decompressor (Bobrowski's code is the spec)
+- [x] Compression levels 2=Fast, 3=Normal, 4=Tight, 5=Ultra
+- [x] CLI create mode (`uc2 -w`), compression level flag (`-L`)
+- [x] SuperMaster dictionary support (built-in 49 KB dictionary)
+- [x] Round-trip testing: 37 unit tests + CLI integration tests
+- [x] Round-trip testing vs original `uc2pro.exe` in DOSBox
+      (Direction: original creates -> UC2 v3 extracts  --  verified.
+       Reverse direction is a known limitation: the original UC2 Pro
+       cannot read UC2 v3 archives due to compression bitstream
+       differences.)
+- [x] Backward compatibility with original UC2 Pro (listing +
+      extraction verified for multi-file archives in both directions
+      in automated DOSBox-X test).
+- [x] Custom Huffman tree optimization: use default tree for first
+      small block (< 256 ibuf entries), custom trees for larger
+      blocks.  Matches the original's bFlag logic.  37% compression
+      improvement on text data while maintaining backward compat.
+- [x] UC2 personality: status messages continuing the original's
+      tradition ("Everything went OK", compression level names,
+      "Fast, reliable and superior compression").  Suppressed by -q.
+
+## Phase 3: Modernized Master-Block Deduplication
+
+UC2's signature feature from 1992, ahead of its time.  Modernize into
+something no mainstream archiver offers.
+
+- [x] Content-fingerprint file grouping (FNV-1a hash of first 4096 bytes)
+- [x] Custom master-block generation from largest file in each group
+- [x] MASMETA central directory records with full metadata
+- [x] Masters compressed with SuperMaster, files compressed with custom master
+- [x] CLI integration test validating master deduplication round-trip
+- [x] Content-defined chunking (CDC) with Gear rolling hash
+      (`uc2_cdc.h`): chunker library + integration into archive
+      creation.  Files sharing content at ANY position (not just
+      identical prefixes) are now grouped for master-block dedup.
+- [x] Merkle DAG of deduplicated blocks (`uc2_merkle.h`):
+      content-addressable chunk trees with 64-bit FNV-1a hashes,
+      structural similarity comparison, single-byte-change resilience.
+      8 unit tests including partial overlap detection.
+- [x] Cross-archive dedup via shared block store (`uc2_blockstore.h`):
+      content-addressable chunk storage with two-level directory
+      layout, dedup statistics, read-back verification.
+      6 unit tests including cross-archive dedup scenario.
+- [x] Near-duplicate detection via SimHash (`uc2_simhash.h`):
+      64-bit locality-sensitive fingerprint with Hamming distance,
+      detects patched executables (16 changed bytes in 8KB: dist <= 8).
+      6 unit tests.
+- [x] Delta compression (`uc2_delta.h`): binary diff with COPY/INSERT
+      instructions, hash-based source matching. 96-byte patch in 16KB
+      file -> >50% size savings.  6 unit tests including round-trip.
+
+## Phase 4: Modern Compression Backends
+
+Pluggable algorithms behind new method IDs; original Method 4 kept for
+backward compatibility.
+
+- [x] rANS entropy coder (`uc2_rans.h`) integrated into archive format
+      as method 10.  Levels 6-9 use rANS (vs 2-5 Huffman).  32-bit
+      table-based rANS, <5% overhead vs Shannon entropy.  End-to-end
+      round-trip verified (create/list/extract/verify).
+- [x] zstd-inspired dictionary compression (`uc2_dict.h`): formal
+      dictionary metadata with content-hash IDs, integrity checksums,
+      serialization format, and cross-archive sharing via block store.
+      6 unit tests including round-trip and corruption detection.
+- [x] LZ4 ultra-fast mode (`uc2_lz4.h`): single-probe hash table,
+      O(1) match finding, 4-byte minimum match, variable-length
+      literal/match token encoding.  6 unit tests including
+      text, binary, all-same, incompressible, and small inputs.
+- [x] Content-aware preprocessing (`uc2_preprocess.h`):
+      BCJ (E8/E9 x86 address normalization), BWT (Burrows-Wheeler
+      for text), delta filter (byte-wise with configurable stride),
+      automatic content detection (text/x86/structured/binary).
+      11 unit tests.
+- [x] Built-in benchmark mode (`uc2 -B files...`): tests all 8 Huffman/rANS
+      levels plus LZ4, reports compressed size, ratio, and timing.
+
+## Phase 5: Quantum-Resistant Encryption
+
+No mainstream archiver offers post-quantum encryption.
+
+- [ ] CRYSTALS-Kyber (NIST PQC standard) for key encapsulation, pure C
+      (PQClean project, public domain)
+- [ ] AES-256-GCM for authenticated payload encryption
+- [ ] Hybrid mode: classical ECDH + Kyber for transition period
+- [ ] Passphrase-based key derivation via Argon2
+- [ ] Per-file selective encryption within archives
+- [ ] Plausible deniability: multi-archive-in-one with separate passwords.
+      Each password decrypts a different archive layer.  Under hostile
+      pressure, revealing one password gives access to a decoy layer
+      while the real archive remains hidden and indistinguishable from
+      random padding.  (Inspired by VeraCrypt hidden volumes.)
+
+## Phase 6: DOS / FreeDOS / Retro-Computing
+
+- [x] DJGPP cross-compilation toolchain: `cmake/djgpp-toolchain.cmake`
+      builds `uc2.exe` against the prebuilt DJGPP gcc 7.2 / 12.2 from
+      `andrewwutw/build-djgpp`.  Output is a 32-bit DPMI DOS executable
+      (MZ + COFF + go32 stub).  See `cmake/README-djgpp.md` for the
+      one-time setup (CPATH unset is required on hosts that export it).
+- [x] DOSBox-X smoke test: `tests/scripts/dos_smoke.sh` runs `uc2 -h`
+      and `uc2 -l <archive>` under DOSBox-X via the flatpak; verifies
+      the cross-compiled binary actually loads under a real DPMI host.
+      Real vintage hardware test still pending.
+- [ ] Method 80 (Turbo) support
+- [ ] Multi-volume archive spanning across physical media (floppies)
+- [ ] Self-extracting archives per platform (DOS COM/EXE, Linux ELF, Windows PE)
+- [ ] ANSI art progress display, CP850 codepage handling
+- [ ] Position as the archiver for retrocomputing preservation:
+      disk images, ROM collections, BBS archive redistribution
+
+## Phase 7: Cryptographic Integrity & Timestamping
+
+- [x] BLAKE3 content hashing (`uc2_blake3.h`): pure C implementation,
+      256-bit digests, incremental and one-shot API, constant-time
+      comparison, tree hashing structure.  7 unit tests including
+      avalanche, incremental-vs-oneshot, and single-byte updates.
+- [x] SHA-256 (`uc2_sha256.h`): pure-C FIPS 180-4 implementation,
+      one-shot and incremental API.  6 unit tests against published
+      test vectors (empty, "abc", 56-byte, 1M `'a'`, byte-by-byte
+      incremental, every-split-point boundary).
+- [x] OpenTimestamps integration (`uc2_ots.h`): pure-C parser,
+      serializer, and walker for the standard `.ots` proof format.
+      Append-only sidecar trailer (magic-bracketed, reverse-scan-safe)
+      stores the proof verbatim and preserves backward compatibility
+      with the original UC2 Pro reader.  Walker supports the
+      calendar-path subset (APPEND, PREPEND, SHA256); proofs with other
+      crypto ops are accepted as structurally valid but flagged for
+      `ots verify` follow-up.  CLI: `--ots-attach`, `--ots-extract`,
+      `--ots-info`; `uc2 -t` recomputes archive SHA-256 and verifies
+      the leaf and walk.  Strict-canonical-varint parser, 64-bit
+      overflow check, depth-bounded recursion, varbytes cap.
+      17 unit tests.
+- [ ] OTS upgrade: fetch the upgraded proof from the calendar after
+      the Bitcoin attestation has been minted (~1-6h), replace the
+      pending-only trailer with the Bitcoin block-header attestation.
+- [ ] Useful for legal/forensic archiving, software provenance, digital
+      preservation
+
+## Phase 8: Decentralized & Cloud Integration
+
+- [ ] IPFS pinning: `uc2 --ipfs-pin archive.uc2` to publish,
+      `uc2 --ipfs-get <CID>` to retrieve
+- [ ] Content-addressable dedup maps directly to IPFS CIDs;
+      master blocks become sharable across users ("swarm dedup")
+- [ ] Cloud archiving backend: `uc2 --s3 s3://bucket/path` for
+      streaming compress-to-cloud with dedup-aware incremental uploads
+- [ ] Filecoin/Sia for decentralized paid storage (optional)
+
+## Phase 9: Zero-Knowledge Proofs (Experimental)
+
+ZK proofs extend the Merkle DAG and encryption layers with
+privacy-preserving verification.  Most valuable for decentralized and
+compliance scenarios; heavyweight, so implemented as an optional module.
+
+- [ ] **Prove archive integrity without revealing contents**  --  ZK proof
+      that the archive's Merkle root matches claimed file hashes, without
+      exposing the tree structure.  Enables auditing of IPFS-shared
+      encrypted archives.
+- [ ] **Selective disclosure from encrypted archives**  --  prove a specific
+      file (by hash) exists in an encrypted archive without decrypting
+      anything else.  Useful for collaborative encrypted team archives.
+- [ ] **Verifiable deduplication**  --  ZK proof that master-block dedup was
+      performed correctly across archives without revealing block contents.
+      Builds trust in distributed dedup without data leaks.
+- [ ] **Compliance proofs**  --  prove properties ("archive created before
+      date Y", "archive does not contain file with hash H") without
+      revealing contents.  For regulatory/legal use cases.
+- [ ] Implementation: Halo2 or Bulletproofs (no trusted setup) via
+      Rust-to-C wrapper or WASM bridge; compile-time optional module.
+      STARKs preferred over SNARKs for quantum resistance alignment
+      with Phase 5.
+
+### ZK Feasibility Notes
+
+ZK adds genuine value for privacy-focused decentralized archiving (Phases
+7--8) but is heavyweight for a CLI tool.  SNARKs require pairing-friendly
+curves (not quantum-resistant); **STARKs are preferred** as they align
+with the post-quantum direction and need no trusted setup.  Proof
+generation is slow (seconds to minutes for complex circuits) so this is
+an opt-in feature, not on the critical path.  Prototype in a fork first.
+
+## Phase 10: Ecosystem Integrations
+
+### libarchive plugin
+
+Highest-leverage integration.  Adding UC2 read/write support to libarchive
+makes `.uc2` a first-class format for `bsdtar`, `cmake`, `pkg(8)`,
+file-roller, Ark, and dozens of other tools across the Linux ecosystem.
+
+- [-] libarchive read handler (decompression/listing): milestones
+      1-3 shipped -- bid() recognises UC2 magic; read_header() slurps
+      the archive, walks uc2_read_cdir, yields each entry mapped onto
+      archive_entry; read_data() drives uc2_extract through a buffering
+      write callback and yields the result via libarchive's pull API.
+      Memory scales with archive size in v1.  Remaining: master-block
+      dependency tracking (M4), seekable adapter (deferred), bsdtar
+      round-trip test (M7), upstream PR (M8).
+- [ ] libarchive write handler (compression, once Phase 2 is done)
+
+### Streaming dedup ingestion
+
+Position UC2 as a deduplicating storage layer that other tools pipe into.
+No other CLI archiver offers this.
+
+```sh
+rsync -a /data/ | uc2 --ingest repo.uc2      # dedup on receive
+tar cf - /project | uc2 --ingest backup.uc2   # dedup tar stream
+cp -a /snapshot/ | uc2 --ingest backup.uc2    # incremental dedup
+```
+
+- [x] `uc2 --ingest` mode v1: stdin -> CDC -> sidecar blockstore at
+      `<archive>.blocks/` -> chunk-hash manifest.  `uc2 --ingest-restore`
+      reverses the round-trip.  Tested: small/multichunk round-trip,
+      idempotent dedup on repeat ingest, empty stream, bad-magic
+      rejection.  Now legacy: writer defaults to v2.
+- [x] `uc2 --ingest` v2 (default): self-contained archive with the
+      chunk pool embedded inside the archive file itself.  No sidecar
+      directory.  Manifest entries carry absolute file offsets;
+      duplicate hashes share an offset (intra-call dedup).
+      Cross-archive dedup is not preserved -- the trade-off is the
+      single-file UX.  v1 archives still readable for restore.
+- [ ] `uc2 --ingest` v3: integrate with master-block archive layout
+      so output is a real UC2 v3 archive consumable by uc2 -x / -l
+- [ ] Tar-entry preservation: parse tar boundaries inside --ingest
+      so individual files are recoverable as archive entries
+- [ ] Incremental snapshots: `uc2 snapshot /path repo.uc2`
+      (borg/restic-style deduplicating backups without filesystem support)
+
+### Foreign archive format support
+
+Read (and optionally write) other archive formats, enabling UC2 as a
+universal archive tool and migration path for legacy collections.
+
+- [ ] ZIP read/write (deflate, store; the universal baseline format)
+- [ ] RAR read (v4/v5; for extraction from existing collections)
+- [ ] TGZ/tar.gz read/write (tar + gzip; Unix ecosystem staple)
+- [ ] ISO 9660 read (CD/DVD images; retro-computing preservation)
+
+### File manager plugins
+
+Bobrowski already shipped prototypes; update for UC2 v3.
+
+- [ ] Midnight Commander VFS plugin (update `misc/mc.ext` and `misc/uuc2`)
+- [ ] Total Commander WCX plugin (update `misc/unuc2-wcx.c`)
+
+## Phase 11: Advanced Features
+
+- [ ] Archive-as-filesystem: FUSE mount for `.uc2` on Linux (read-only,
+      decompress-on-the-fly with master-block caching)
+- [ ] Compression tournaments / community challenges
+- [ ] Neural/learned compression preprocessor (modern platforms only,
+      not DOS  --  optional compile-time module)
+- [ ] Jupyter kernel for interactive archive exploration and compression
+      research (Python, building on foxkernel experience):
+      - Rich HTML tables for archive listings with compression ratios
+      - Interactive dedup graph visualization (master-block DAG: which
+        files share blocks, space savings)
+      - Inline benchmark charts comparing methods/levels (ratio vs speed)
+      - Version diff visualization between archive snapshots
+      - Huffman tree / ANS state table visualization for algorithm
+        development
+
+## Testing Strategy
+
+- Create reference UC2 archives using original `uc2pro.exe` in DOSBox
+- Unit tests: magic detection, Fletcher checksum, CP850->UTF-8
+- Integration: extract test archives, compare SHA-256 against manifest
+- Phase 2: round-trip (new compress -> old extract in DOSBox, and vice versa)
+- Phase 3+: dedup correctness, cross-archive block sharing
+- Phase 5: encryption round-trip, key derivation vectors
+- Phase 9: ZK proof soundness and completeness
+
+## Maintenance Log
+
+- 2026-06-11: Fixed the rANS (L6-9) extraction crash and >64KB silent
+  corruption (git-bug d747658, closed): master COMPRESS records now
+  carry the real method (10 at L6-9); the rANS decoder consumes the
+  EOB pair instead of desyncing the bit cursor; bits_feed handles
+  short reads without overrunning its buffer; compressor chunk loads
+  and rANS output flushing respect the 64KB circular-window edge.
+  Found debugging extraction on sdf.org (NetBSD 10) but reproducible
+  everywhere. New regression test: cli_bigfile. Follow-up filed:
+  bf73896 (ftell offsets >4GB truncate silently; P2).
--- a/cli/CMakeLists.txt
+++ b/cli/CMakeLists.txt
@@ -1,5 +1,7 @@
 # uc2 command-line tool

+include(GNUInstallDirs)
+
 add_executable(uc2-cli src/main.c)
 set_target_properties(uc2-cli PROPERTIES OUTPUT_NAME uc2)

@@ -13,13 +15,36 @@ target_include_directories(uc2-cli PRIVATE

 target_compile_features(uc2-cli PRIVATE c_std_99)

+# Skip installation when uc2 is embedded via add_subdirectory()
+if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
+    install(TARGETS uc2-cli RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+    if(UNIX)
+        install(FILES uc2.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
+    endif()
+endif()
+
 if(WIN32)
    target_sources(uc2-cli PRIVATE src/compat/compat_win32.c)
+    # Shared compat headers (err.h, fnmatch.h) — both MSVC and MinGW lack these
    target_include_directories(uc2-cli PRIVATE src/compat/include)
+    if(MSVC)
+        # MSVC standalone headers (unistd.h, utime.h, getopt.h) — no #include_next
+        target_sources(uc2-cli PRIVATE src/compat/getopt.c)
+        target_include_directories(uc2-cli PRIVATE src/compat/include/msvc)
+    else()
+        # MinGW/Clang: headers that wrap system headers via #include_next
+        target_include_directories(uc2-cli PRIVATE src/compat/include/posix)
+    endif()
    target_compile_definitions(uc2-cli PRIVATE
+        NO_OLDNAMES
        g_err g_errx g_warn g_warnx g_vwarn g_vwarnx g_verr g_verrx
        g_getprogname g_setlinebuf g_fnmatch
-        g_compat__utf8_console g_compat__wpath g_fopen
-        g_access g_mkdir g_utime
+        g_compat__utf8_console g_compat__wpath
+        g_access g_unlink g_chdir g_mkdir g_chmod g_utime
+        g_opendir
    )
+elseif(DJGPP)
+    target_sources(uc2-cli PRIVATE src/compat/compat_dos.c)
+    # Only add the err.h and fnmatch.h headers, not sys/ overrides
+    target_include_directories(uc2-cli PRIVATE src/compat/include/dos)
 endif()
--- a/cli/src/compat/compat_dos.c
+++ b/cli/src/compat/compat_dos.c
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: LGPL-3.0-only */
+
+/* DOS/DJGPP compatibility layer for UC2.
+   Provides BSD err.h functions and fnmatch for DJGPP,
+   which lacks these POSIX/BSD extensions.
+   Copyright © Jan Bobrowski 2020 / Licence: LGPL
+   Adapted for DOS by Eremey Valetov 2026 */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+/* err/errx/warn/warnx family */
+
+#include "err.h"
+
+static const char *_progname = "uc2";
+
+const char *getprogname(void)
+{
+	return _progname;
+}
+
+void setprogname(const char *argv0)
+{
+	const char *p = argv0;
+	for (const char *q = argv0; *q; q++)
+		if (*q == '/' || *q == '\\')
+			p = q + 1;
+	_progname = p;
+}
+
+void vwarn(const char *f, va_list a)
+{
+	fprintf(stderr, "%s: ", getprogname());
+	if (f) {
+		vfprintf(stderr, f, a);
+		fprintf(stderr, ": ");
+	}
+	fflush(stderr);
+	perror(0);
+}
+
+void vwarnx(const char *f, va_list a)
+{
+	fprintf(stderr, "%s: ", getprogname());
+	if (f)
+		vfprintf(stderr, f, a);
+	fprintf(stderr, "\n");
+	fflush(stderr);
+}
+
+void warn(const char *f, ...)
+{
+	va_list a;
+	va_start(a, f);
+	vwarn(f, a);
+	va_end(a);
+}
+
+void warnx(const char *f, ...)
+{
+	va_list a;
+	va_start(a, f);
+	vwarnx(f, a);
+	va_end(a);
+}
+
+void verr(int x, const char *f, va_list a)
+{
+	vwarn(f, a);
+	exit(x);
+}
+
+void verrx(int x, const char *f, va_list a)
+{
+	vwarnx(f, a);
+	exit(x);
+}
+
+void err(int x, const char *f, ...)
+{
+	va_list a;
+	va_start(a, f);
+	verr(x, f, a);
+}
+
+void errx(int x, const char *f, ...)
+{
+	va_list a;
+	va_start(a, f);
+	verrx(x, f, a);
+}
+
+/* fnmatch */
+
+#include "fnmatch.h"
+
+int fnmatch(const char *pattern, const char *string, int flags)
+{
+	for (;;) {
+		char c = *pattern++;
+		switch (c) {
+		case '\\':
+			if (*pattern && !(flags & FNM_NOESCAPE))
+				c = *pattern++;
+		default:
+			if (c != *string++)
+				return FNM_NOMATCH;
+			if (!c)
+				return 0;
+			continue;
+		case '?':
+			c = *string++;
+			if (!c || (flags & FNM_PATHNAME && c == '/'))
+				return FNM_NOMATCH;
+			continue;
+		case '*':
+			do {
+				if (fnmatch(pattern, string, flags) == 0)
+					return 0;
+				if (flags & FNM_PATHNAME && *string == '/')
+					return FNM_NOMATCH;
+			} while (*string++);
+			return FNM_NOMATCH;
+		case '[':;
+			const char *p = pattern;
+			if (!*pattern++)
+				return FNM_NOMATCH;
+			for (;;) {
+				c = *pattern;
+				if (c == ']')
+					break;
+				if (!c)
+					return FNM_NOMATCH;
+				pattern++;
+			}
+			c = *string++;
+			if (flags & FNM_PATHNAME && c == '/')
+				return FNM_NOMATCH;
+			for (;;) {
+				if (c == *p++)
+					break;
+				if (*p == '-' && p + 1 < pattern) {
+					if (p[-1] <= c && c <= p[1])
+						break;
+					p++;
+				}
+				if (p == pattern)
+					return FNM_NOMATCH;
+			}
+			pattern++;
+			continue;
+		}
+	}
+}
--- a/cli/src/compat/compat_win32.c
+++ b/cli/src/compat/compat_win32.c
@@ -1,4 +1,9 @@
-/* Copyright © Jan Bobrowski 2020 / Licence: LGPL */
+/* SPDX-License-Identifier: LGPL-3.0-only */
+
+/* Win32 compatibility layer for UC2 CLI.
+   Provides POSIX/BSD functions missing from MSVC and MinGW.
+   All file operations use wide-char Windows APIs for UTF-8 support.
+   Copyright (c) Jan Bobrowski 2020 / Licence: LGPL */

 #define NO_OLDNAMES
 #include <stdlib.h>
@@ -110,7 +115,7 @@ const char *getprogname(void)
 {
 	static char name[256];
 	if (!name[0]) {
-#ifdef _pgmptr
+#ifdef _WIN32
 		char *p = _pgmptr;
 		char *q = p;
 		int n;
@@ -215,6 +220,18 @@ wchar_t *compat__wpath(const char *path);

 #ifdef g_compat__utf8_console
 #include <fcntl.h>
+#ifdef _MSC_VER
+/* MSVC: use CRT initializer table (.CRT$XCU) instead of GCC constructor */
+static void __cdecl compat__utf8_console_init(void)
+{
+	setvbuf(stdout, 0, _IOFBF, 1<<16);
+	setvbuf(stderr, 0, _IOFBF, 1<<16);
+	SetConsoleOutputCP(CP_UTF8);
+}
+#pragma section(".CRT$XCU", read)
+__declspec(allocate(".CRT$XCU"))
+static void (__cdecl *compat__utf8_console_p)(void) = compat__utf8_console_init;
+#else
 __attribute__((constructor))
 void compat__utf8_console(void)
 {
@@ -223,6 +240,7 @@ void compat__utf8_console(void)
 	SetConsoleOutputCP(CP_UTF8);
 }
 #endif
+#endif

 #ifdef g_compat__wpath
 wchar_t *compat__wpath(const char *path)
@@ -253,6 +271,22 @@ int access(const char *path, int mode)
 }
 #endif

+#ifdef g_unlink
+int unlink(const char *path)
+{
+	wchar_t *wpath = compat__wpath(path);
+	return wpath ? _wunlink(wpath) : -1;
+}
+#endif
+
+#ifdef g_chdir
+int chdir(const char *path)
+{
+	wchar_t *wpath = compat__wpath(path);
+	return wpath ? _wchdir(wpath) : -1;
+}
+#endif
+
 #ifdef g_mkdir
 int mkdir(const char *path, int mode)
 {
@@ -266,16 +300,85 @@ int mkdir(const char *path, int mode)
 }
 #endif

+#ifdef g_chmod
+int chmod(const char *path, int mode)
+{
+	wchar_t *wpath = compat__wpath(path);
+	return wpath ? _wchmod(wpath, mode) : -1;
+}
+#endif
+
 #ifdef g_utime
 #include <sys/utime.h>
-int utime(const char *path, struct utimbuf *ut)
+/* The Windows SDK declares an inline utime() in <sys/utime.h>.  Our
+ * shim utime.h substitutes utime -> compat__utime at the call site so
+ * UC2's UTF-8 paths round-trip through compat__wpath. */
+int compat__utime(const char *path, struct utimbuf *ut)
 {
 	wchar_t *wpath = compat__wpath(path);
 	if (!wpath)
 		return -1;
-	struct __utimbuf32 wut = {.actime = ut->actime, .modtime = ut->modtime};
+	struct __utimbuf32 wut = {.actime = (long)ut->actime, .modtime = (long)ut->modtime};
 	return _wutime32(wpath, &wut);
 }
 #endif

+#ifdef g_opendir
+#include "dirent.h"
+
+struct UC2_DIR {
+	HANDLE handle;
+	WIN32_FIND_DATAW find;
+	int first;
+	struct dirent ent;
+};
+
+DIR *opendir(const char *path)
+{
+	wchar_t *wpath = compat__wpath(path);
+	if (!wpath)
+		return 0;
+	size_t n = wcslen(wpath);
+	if (n + 3 >= MAX_PATH)
+		return 0;
+	wchar_t pat[MAX_PATH];
+	wcscpy(pat, wpath);
+	if (n > 0 && pat[n-1] != L'\\' && pat[n-1] != L'/')
+		pat[n++] = L'\\';
+	pat[n++] = L'*';
+	pat[n] = 0;
+	DIR *d = malloc(sizeof *d);
+	if (!d) return 0;
+	d->handle = FindFirstFileW(pat, &d->find);
+	if (d->handle == INVALID_HANDLE_VALUE) {
+		free(d);
+		return 0;
+	}
+	d->first = 1;
+	return d;
+}
+
+struct dirent *readdir(DIR *d)
+{
+	if (!d) return 0;
+	if (!d->first && !FindNextFileW(d->handle, &d->find))
+		return 0;
+	d->first = 0;
+	int rc = WideCharToMultiByte(CP_UTF8, 0, d->find.cFileName, -1,
+	                             d->ent.d_name, sizeof d->ent.d_name,
+	                             0, 0);
+	if (rc <= 0) return 0;
+	return &d->ent;
+}
+
+int closedir(DIR *d)
+{
+	if (!d) return -1;
+	if (d->handle != INVALID_HANDLE_VALUE)
+		FindClose(d->handle);
+	free(d);
+	return 0;
+}
+#endif
+
 #endif
--- a/cli/src/compat/getopt.c
+++ b/cli/src/compat/getopt.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Minimal POSIX getopt() for MSVC.
+   Supports short options with optional arguments (e.g., "d:"). */
+
+#include <stdio.h>
+#include <string.h>
+#include "include/msvc/getopt.h"
+
+char *optarg;
+int optind = 1, opterr = 1, optopt;
+
+int getopt(int argc, char *const argv[], const char *optstring)
+{
+	static int optpos = 0;
+
+	if (optind >= argc || !argv[optind])
+		return -1;
+
+	if (argv[optind][0] != '-' || !argv[optind][1])
+		return -1;
+
+	if (argv[optind][1] == '-' && !argv[optind][2]) {
+		optind++;
+		return -1;
+	}
+
+	if (!optpos)
+		optpos = 1;
+
+	int c = argv[optind][optpos];
+	const char *p = strchr(optstring, c);
+
+	if (!p || c == ':') {
+		optopt = c;
+		if (opterr && optstring[0] != ':')
+			fprintf(stderr, "%s: invalid option -- '%c'\n", argv[0], c);
+		if (!argv[optind][++optpos]) {
+			optind++;
+			optpos = 0;
+		}
+		return '?';
+	}
+
+	if (p[1] == ':') {
+		if (argv[optind][optpos + 1]) {
+			optarg = &argv[optind][optpos + 1];
+		} else if (++optind >= argc) {
+			optopt = c;
+			if (opterr && optstring[0] != ':')
+				fprintf(stderr, "%s: option requires an argument -- '%c'\n",
+				        argv[0], c);
+			optpos = 0;
+			return optstring[0] == ':' ? ':' : '?';
+		} else {
+			optarg = argv[optind];
+		}
+		optind++;
+		optpos = 0;
+	} else {
+		if (!argv[optind][++optpos]) {
+			optind++;
+			optpos = 0;
+		}
+	}
+
+	return c;
+}
--- a/cli/src/compat/include/dos/err.h
+++ b/cli/src/compat/include/dos/err.h
@@ -0,0 +1,18 @@
+#ifndef _ERR_H
+#define _ERR_H
+#ifdef __GNUC__
+#define err_noreturn __attribute__((noreturn))
+//#define err_noreturn [[noreturn]]
+#else
+#define err_noreturn
+#endif
+err_noreturn void err(int x, const char* f, ...);
+err_noreturn void errx(int x, const char* f, ...);
+void warn(const char* f, ...);
+void warnx(const char* f, ...);
+#include <stdarg.h>
+void verr(int x, const char* f, va_list a);
+void verrx(int x, const char* f, va_list a);
+void vwarn(const char* f, va_list a);
+void vwarnx(const char* f, va_list a);
+#endif
--- a/cli/src/compat/include/dos/fnmatch.h
+++ b/cli/src/compat/include/dos/fnmatch.h
@@ -0,0 +1,15 @@
+#ifndef _FNMATCH_H
+#define _FNMATCH_H
+
+#define	FNM_PATHNAME	0x1
+#define	FNM_NOESCAPE	0x2
+#define	FNM_PERIOD	0x4
+#define	FNM_LEADING_DIR	0x8
+#define	FNM_CASEFOLD	0x10
+
+#define	FNM_NOMATCH 1
+#define FNM_NOSYS   (-1)
+
+int fnmatch(const char *, const char *, int);
+
+#endif
--- a/cli/src/compat/include/err.h
+++ b/cli/src/compat/include/err.h
@@ -1,8 +1,11 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
 #ifndef _ERR_H
 #define _ERR_H
 #ifdef __GNUC__
 #define err_noreturn __attribute__((noreturn))
-//#define err_noreturn [[noreturn]]
+#elif defined(_MSC_VER)
+#define err_noreturn __declspec(noreturn)
 #else
 #define err_noreturn
 #endif
--- a/cli/src/compat/include/fnmatch.h
+++ b/cli/src/compat/include/fnmatch.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
 #ifndef _FNMATCH_H
 #define _FNMATCH_H

--- a/cli/src/compat/include/msvc/dirent.h
+++ b/cli/src/compat/include/msvc/dirent.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Minimal POSIX dirent.h for MSVC.
+ *
+ * Implements only what UC2's archive scanner uses: opendir, readdir,
+ * closedir, and a struct dirent with d_name.  UTF-8 paths are
+ * round-tripped through the wide-char Win32 APIs to match the rest of
+ * the compat layer.  d_name is sized to hold a Windows MAX_PATH-long
+ * filename re-encoded to UTF-8 (worst case: 4 bytes per code point). */
+
+#ifndef _COMPAT_DIRENT_H
+#define _COMPAT_DIRENT_H
+
+#include <stddef.h>
+
+#define UC2_DIRENT_NAME_MAX  1024  /* 260 wide chars * 4 (UTF-8) rounded up */
+
+struct dirent {
+	char d_name[UC2_DIRENT_NAME_MAX];
+};
+
+typedef struct UC2_DIR DIR;
+
+DIR *opendir(const char *path);
+struct dirent *readdir(DIR *d);
+int closedir(DIR *d);
+
+#endif
--- a/cli/src/compat/include/msvc/getopt.h
+++ b/cli/src/compat/include/msvc/getopt.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Minimal POSIX getopt for MSVC */
+#ifndef _COMPAT_GETOPT_H
+#define _COMPAT_GETOPT_H
+
+extern char *optarg;
+extern int optind, opterr, optopt;
+
+int getopt(int argc, char *const argv[], const char *optstring);
+
+#endif
--- a/cli/src/compat/include/msvc/unistd.h
+++ b/cli/src/compat/include/msvc/unistd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Minimal POSIX unistd.h for MSVC */
+#ifndef _COMPAT_UNISTD_H
+#define _COMPAT_UNISTD_H
+
+#include <io.h>
+#include <direct.h>
+
+#ifndef F_OK
+#define F_OK 0
+#endif
+#ifndef R_OK
+#define R_OK 4
+#endif
+#ifndef W_OK
+#define W_OK 2
+#endif
+#ifndef X_OK
+#define X_OK 0
+#endif
+
+#ifndef PATH_MAX
+#define PATH_MAX 260
+#endif
+
+#include <sys/stat.h>
+#ifndef S_ISDIR
+#define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR)
+#endif
+#ifndef S_ISREG
+#define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG)
+#endif
+
+/* Provided by compat_win32.c (UTF-8-aware via wide-char APIs) */
+int access(const char *path, int mode);
+int unlink(const char *path);
+int chdir(const char *path);
+int mkdir(const char *path, int mode);
+int chmod(const char *path, int mode);
+
+#endif
--- a/cli/src/compat/include/msvc/utime.h
+++ b/cli/src/compat/include/msvc/utime.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* POSIX utime.h for MSVC.
+ *
+ * Modern MSVC SDKs (Windows 10 SDK 10.0.26100+) provide both
+ * struct utimbuf and an inline wrapper named utime() in <sys/utime.h>.
+ * The inline wrapper is not UTF-8-aware: it forwards to _utime32,
+ * which interprets the path in the local ANSI codepage.
+ *
+ * UC2 needs UTF-8 paths to round-trip correctly, so this shim
+ * substitutes utime() with our compat__utime(), which goes through
+ * compat__wpath() before calling _wutime32. */
+
+#ifndef _COMPAT_UTIME_H
+#define _COMPAT_UTIME_H
+
+#include <sys/utime.h>
+
+#ifdef _COMPAT_UTIMBUF_FALLBACK
+#include <time.h>
+struct utimbuf {
+	time_t actime;
+	time_t modtime;
+};
+#endif
+
+int compat__utime(const char *path, struct utimbuf *ut);
+#define utime compat__utime
+
+#endif
--- a/cli/src/compat/include/posix/sys/stat.h
+++ b/cli/src/compat/include/posix/sys/stat.h
--- a/cli/src/compat/include/posix/sys/utime.h
+++ b/cli/src/compat/include/posix/sys/utime.h
--- a/cli/src/compat/include/posix/unistd.h
+++ b/cli/src/compat/include/posix/unistd.h
--- a/cli/src/main.c
+++ b/cli/src/main.c
--- a/cli/uc2.1
+++ b/cli/uc2.1
@@ -0,0 +1,183 @@
+.\" SPDX-License-Identifier: GPL-3.0-or-later
+.Dd June 11, 2026
+.Dt UC2 1
+.Os
+.Sh NAME
+.Nm uc2
+.Nd UltraCompressor II archiver
+.Sh SYNOPSIS
+.Nm
+.Op Fl afpqDT
+.Op Fl C Ar directory
+.Op Fl d Ar destination
+.Ar archive.uc2
+.Op Ar file ...
+.Nm
+.Fl l
+.Op Fl aqT
+.Ar archive.uc2
+.Op Ar file ...
+.Nm
+.Fl t
+.Op Fl aq
+.Ar archive.uc2
+.Op Ar file ...
+.Nm
+.Fl w
+.Op Fl q
+.Op Fl L Ar level
+.Ar archive.uc2
+.Ar file ...
+.Nm
+.Fl B
+.Ar file ...
+.Nm
+.Fl -ots-attach Ar proof.ots
+.Op Fl f
+.Ar archive.uc2
+.Nm
+.Fl -ots-extract
+.Ar archive.uc2 out.ots
+.Nm
+.Fl -ots-info
+.Ar archive.uc2
+.Nm
+.Fl -ingest Ar archive
+.Nm
+.Fl -ingest-restore Ar archive
+.Sh DESCRIPTION
+.Nm
+is a cross-platform revival of UltraCompressor II, the MS-DOS archiver
+published by Nico de Vries (AIP-NL) between 1992 and 1996.
+It reads and writes the original UC2 archive format byte-compatibly:
+archives created by the original
+.Pa uc2pro.exe
+extract correctly, and archives created at compression levels 2 through 5
+can be read by the original DOS program.
+.Pp
+Without a mode option,
+.Nm
+extracts the named archive into the current directory (or into the
+directory given with
+.Fl d ) .
+If
+.Ar file
+arguments are given, only matching entries are processed; shell-style
+wildcards are accepted.
+.Pp
+Compression levels 2 through 5 use the original LZ77 and Huffman
+bitstream.
+Levels 6 through 9 replace the entropy stage with rANS coding for
+better ratios; such archives require
+.Nm
+version 3 and cannot be read by the original DOS program.
+Files with similar content are grouped and compressed against shared
+master blocks, the deduplication scheme UC2 introduced in 1992.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl a
+Process all stored versions of each file, not only the most recent.
+.It Fl B
+Benchmark every compression method on the given input files and print
+a comparison table; no archive is written.
+.It Fl C Ar directory
+Change to
+.Ar directory
+before reading or writing any files.
+Applies in every mode.
+.It Fl c , Fl p
+Write extracted data to standard output instead of creating files.
+.It Fl D
+Do not restore modification times and permissions of directories.
+Given twice, file metadata is not restored either.
+.It Fl d Ar destination
+Extract into
+.Ar destination
+instead of the current directory.
+.It Fl f
+Overwrite existing files when extracting, and allow
+.Fl -ots-attach
+to replace an existing proof.
+.It Fl h
+Print version, usage, and option summary.
+.It Fl L Ar level
+Select the compression level for
+.Fl w :
+2 (Fast), 3 (Normal), 4 (Tight, the default), 5 (Ultra) for the
+backward-compatible Huffman methods, or 6 through 9 for rANS.
+.It Fl l
+List the archive contents.
+.It Fl q
+Quiet operation; suppress status messages.
+.It Fl T
+Separate listing columns with tabs instead of spaces.
+.It Fl t
+Test archive integrity without writing any files.
+.It Fl w
+Create (write) an archive from the given files and directories.
+.It Fl x
+Extract; this is the default mode.
+.El
+.Pp
+The long options operate on OpenTimestamps proofs and the streaming
+deduplication store:
+.Bl -tag -width Ds
+.It Fl -ots-attach Ar proof.ots
+Attach an OpenTimestamps proof to the archive after verifying that the
+proof commits to the archive contents.
+The proof is stored in a trailer that the original DOS program ignores.
+.It Fl -ots-extract
+Write the attached proof to the named output file, suitable for
+.Ql ots verify .
+.It Fl -ots-info
+Describe the attached proof.
+.It Fl -ingest Ar archive
+Read a data stream from standard input into a content-addressed
+deduplicating block store inside
+.Ar archive .
+.It Fl -ingest-restore Ar archive
+Write the ingested stream from
+.Ar archive
+to standard output.
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+Create an archive at the default level and list it:
+.Bd -literal -offset indent
+$ uc2 -w backup.uc2 project/ notes.txt
+$ uc2 -l backup.uc2
+.Ed
+.Pp
+Extract it elsewhere, overwriting existing files (the destination
+directory must exist):
+.Bd -literal -offset indent
+$ mkdir -p /tmp/restore
+$ uc2 -f -d /tmp/restore backup.uc2
+.Ed
+.Pp
+Create a smaller archive readable only by
+.Nm
+version 3, then verify it:
+.Bd -literal -offset indent
+$ uc2 -w -L 9 backup.uc2 project/
+$ uc2 -t backup.uc2
+.Ed
+.Sh HISTORY
+UltraCompressor II was written by Nico de Vries at AIP-NL and sold as
+shareware from 1992 to 1996.
+De Vries re-released the original source under the LGPL in 2015; Jan
+Bobrowski's libunuc2 (2020) built a portable decompressor from it.
+.Nm
+version 3 builds on that lineage as a full reimplementation in C99,
+maintained by Eremey Valetov.
+.Sh AUTHORS
+.An Nico de Vries
+(original UltraCompressor II),
+.An Jan Bobrowski
+(libunuc2),
+.An Eremey Valetov Aq Mt evaletov@protonmail.com
+(version 3).
+.Pp
+Source and issue tracker:
+.Lk https://github.com/evvaletov/uc2
--- a/cmake/README-djgpp.md
+++ b/cmake/README-djgpp.md
@@ -0,0 +1,77 @@
+# DJGPP cross-compile
+
+This builds `uc2.exe` for DOS / FreeDOS using the DJGPP toolchain.
+The output is a 32-bit protected-mode DOS executable that runs under
+the bundled `cwsdpmi.exe` extender (or any DPMI host).
+
+## One-time setup
+
+1. Get a DJGPP cross-toolchain.  The simplest source is the prebuilt
+   release from `andrewwutw/build-djgpp`:
+
+   ```sh
+   curl -fsLO https://github.com/andrewwutw/build-djgpp/releases/download/v3.4/djgpp-linux64-gcc1220.tar.bz2
+   sudo mkdir -p /opt && sudo tar xjf djgpp-linux64-gcc1220.tar.bz2 -C /opt
+   ```
+
+   This puts the toolchain at `/opt/djgpp/`.  Use any prefix; pass
+   it as `-DDJGPP_ROOT=<prefix>` when configuring.
+
+2. (Linux hosts) Make sure your shell has not exported `CPATH` or
+   `CPLUS_INCLUDE_PATH`.  Some distros and dev environments
+   (Intel oneAPI, certain conda envs) export them.  GCC honours these
+   regardless of `-nostdinc`, so any host include directory listed there
+   ends up *first* in the cross-compiler's search path -- typically
+   pulling in glibc headers that fail to compile against DJGPP libc.
+   Either `unset CPATH CPLUS_INCLUDE_PATH` for the build shell, or
+   wrap the cmake invocation in `env -u CPATH -u CPLUS_INCLUDE_PATH`.
+
+## Build
+
+```sh
+unset CPATH CPLUS_INCLUDE_PATH
+cmake -B build-djgpp \
+      -DCMAKE_TOOLCHAIN_FILE=cmake/djgpp-toolchain.cmake \
+      -DDJGPP_ROOT=/opt/djgpp
+cmake --build build-djgpp
+```
+
+Output: `build-djgpp/cli/uc2` (also linked as `uc2.exe`).  Copy it
+plus `cwsdpmi.exe` (shipped with DJGPP at
+`<DJGPP_ROOT>/i586-pc-msdosdjgpp/bin/cwsdpmi.exe`) to a DOS volume.
+
+## Status
+
+- Compiles clean against DJGPP gcc 7.2.0 and 12.2.0.
+- Library (`libuc2.a`) builds without changes.
+- CLI uses the DOS compat layer in `cli/src/compat/compat_dos.c` for
+  the BSD `err.h` and POSIX `fnmatch` shims.
+- Runtime smoke test verified: `uc2 -h` and `uc2 -l <archive>` both
+  succeed under DOSBox-X via `tests/scripts/dos_smoke.sh`.
+
+## Smoke test
+
+```sh
+# 1. Get CWSDPMI extender:
+curl -fsLO http://www.delorie.com/pub/djgpp/current/v2misc/csdpmi7b.zip
+unzip csdpmi7b.zip -d /tmp/cwsdpmi
+
+# 2. Run the smoke test (needs flatpak install com.dosbox_x.DOSBox-X):
+tests/scripts/dos_smoke.sh \
+    build-djgpp/cli/uc2.exe \
+    /tmp/cwsdpmi/bin/CWSDPMI.EXE \
+    tests/archives/basic.uc2
+```
+
+Exits 0 on success.  Skips (with a "SKIP: ..." line) if any of:
+the DJGPP build was not run, CWSDPMI.EXE is missing, or DOSBox-X is
+not installed.
+
+## Notes
+
+- The toolchain file forces `CMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY`
+  because the compiler check would otherwise try to execute a DOS .exe
+  on the host kernel and fail.
+- DJGPP's `unistd.h` provides POSIX-shaped APIs; most of the existing
+  source compiles unchanged.  The library has no DOS-specific code
+  paths.
--- a/cmake/UC2Platform.cmake
+++ b/cmake/UC2Platform.cmake
@@ -2,6 +2,11 @@

 if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang")
    add_compile_options(-Wall -Wextra -Wno-unused-parameter)
+    if(DJGPP)
+        # DJGPP needs gnu99 for PATH_MAX and other POSIX extensions
+        add_compile_options(-std=gnu99)
+    endif()
 elseif(MSVC)
    add_compile_options(/W3)
+    add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
 endif()
--- a/cmake/djgpp-toolchain.cmake
+++ b/cmake/djgpp-toolchain.cmake
@@ -0,0 +1,55 @@
+# CMake toolchain file for DJGPP cross-compile (DOS / FreeDOS).
+#
+# Usage:
+#   cmake -B build-djgpp -DCMAKE_TOOLCHAIN_FILE=cmake/djgpp-toolchain.cmake
+#   cmake --build build-djgpp
+#
+# Requires the DJGPP cross-toolchain on PATH or at DJGPP_ROOT.  The standard
+# layout from andrewwutw/build-djgpp and the djfdyuruiry/djgpp docker image
+# is /usr/local/bin/djgpp/.  Override with -DDJGPP_ROOT=<path> if installed
+# elsewhere.
+
+set(CMAKE_SYSTEM_NAME Generic)        # bare DJGPP DOS, no OS abstractions
+set(CMAKE_SYSTEM_PROCESSOR i386)
+
+# Project source uses `if(DJGPP)` to gate the DOS compat layer (cli/src/
+# compat/compat_dos.c, sys-include/dos shim).  Set the variable up front
+# so those guards activate.
+set(DJGPP TRUE)
+
+# Locate the toolchain prefix.
+if(NOT DEFINED DJGPP_ROOT)
+    if(EXISTS /usr/local/bin/djgpp)
+        set(DJGPP_ROOT /usr/local/bin/djgpp)
+    elseif(EXISTS /opt/djgpp)
+        set(DJGPP_ROOT /opt/djgpp)
+    endif()
+endif()
+
+if(DEFINED DJGPP_ROOT AND EXISTS ${DJGPP_ROOT})
+    set(_DJGPP_BIN ${DJGPP_ROOT}/bin)
+else()
+    set(_DJGPP_BIN "")
+endif()
+
+set(CMAKE_C_COMPILER   ${_DJGPP_BIN}/i586-pc-msdosdjgpp-gcc)
+set(CMAKE_CXX_COMPILER ${_DJGPP_BIN}/i586-pc-msdosdjgpp-g++)
+set(CMAKE_AR           ${_DJGPP_BIN}/i586-pc-msdosdjgpp-ar    CACHE FILEPATH "")
+set(CMAKE_RANLIB       ${_DJGPP_BIN}/i586-pc-msdosdjgpp-ranlib CACHE FILEPATH "")
+set(CMAKE_STRIP        ${_DJGPP_BIN}/i586-pc-msdosdjgpp-strip  CACHE FILEPATH "")
+
+if(DEFINED DJGPP_ROOT)
+    set(CMAKE_FIND_ROOT_PATH ${DJGPP_ROOT}/i586-pc-msdosdjgpp)
+endif()
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+# DJGPP can produce static binaries; tests run inside DOSBox-X.
+set(CMAKE_EXE_LINKER_FLAGS_INIT "")
+
+# CMake's compiler check tries to build a test binary.  DJGPP-produced
+# .exe binaries are valid COFF executables that the host kernel will
+# refuse to run, so use STATIC_LIBRARY mode.
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
--- a/cmake/djgpp.cmake
+++ b/cmake/djgpp.cmake
@@ -0,0 +1,34 @@
+# CMake toolchain file for DJGPP cross-compilation (DOS target)
+#
+# Usage: cmake -B build-dos -DCMAKE_TOOLCHAIN_FILE=cmake/djgpp.cmake
+#        cmake --build build-dos
+
+set(CMAKE_SYSTEM_NAME Generic)
+set(CMAKE_SYSTEM_PROCESSOR i586)
+
+set(DJGPP_ROOT "/usr/local/djgpp" CACHE PATH "DJGPP installation root")
+set(DJGPP_PREFIX "i586-pc-msdosdjgpp" CACHE STRING "DJGPP toolchain prefix")
+
+set(CMAKE_C_COMPILER   "${DJGPP_ROOT}/bin/${DJGPP_PREFIX}-gcc")
+set(CMAKE_CXX_COMPILER "${DJGPP_ROOT}/bin/${DJGPP_PREFIX}-g++")
+set(CMAKE_ASM_COMPILER "${DJGPP_ROOT}/bin/${DJGPP_PREFIX}-gcc")
+set(CMAKE_AR           "${DJGPP_ROOT}/bin/${DJGPP_PREFIX}-ar" CACHE FILEPATH "")
+set(CMAKE_RANLIB       "${DJGPP_ROOT}/bin/${DJGPP_PREFIX}-ranlib" CACHE FILEPATH "")
+set(CMAKE_STRIP        "${DJGPP_ROOT}/bin/${DJGPP_PREFIX}-strip" CACHE FILEPATH "")
+
+# This DJGPP cross-compiler has /usr/include baked in and -nostdinc doesn't
+# remove it.  Using -I (not -isystem) puts the DJGPP paths before /usr/include
+# so the correct headers are always found first.
+set(_DJGPP_NOSTDINC "-nostdinc -I${DJGPP_ROOT}/lib/gcc/${DJGPP_PREFIX}/12.2.0/include -I${DJGPP_ROOT}/lib/gcc/${DJGPP_PREFIX}/12.2.0/include-fixed -I${DJGPP_ROOT}/${DJGPP_PREFIX}/sys-include")
+set(CMAKE_C_FLAGS_INIT "${_DJGPP_NOSTDINC}")
+set(CMAKE_ASM_FLAGS_INIT "${_DJGPP_NOSTDINC}")
+
+set(CMAKE_FIND_ROOT_PATH "${DJGPP_ROOT}/${DJGPP_PREFIX}")
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".exe")
+
+set(DJGPP TRUE)
+set(DOS TRUE)
--- a/contrib/libarchive/CMakeLists.txt
+++ b/contrib/libarchive/CMakeLists.txt
@@ -0,0 +1,58 @@
+# Optional libarchive read-format plugin for UC2.
+#
+# Enable with -DUC2_BUILD_LIBARCHIVE_PLUGIN=ON.  Because libarchive's
+# read-format API is internal (archive_read_private.h, __archive_read_*),
+# the plugin links against a libarchive source tree rather than the
+# installed -devel package.  Pass -DLIBARCHIVE_SOURCE_DIR=<path> to a
+# checkout (or extracted release tarball) of libarchive.
+
+option(UC2_BUILD_LIBARCHIVE_PLUGIN
+       "Build the libarchive read-format plugin (milestone 1: bid)" OFF)
+
+if(NOT UC2_BUILD_LIBARCHIVE_PLUGIN)
+    return()
+endif()
+
+if(NOT DEFINED LIBARCHIVE_SOURCE_DIR)
+    message(WARNING
+        "UC2_BUILD_LIBARCHIVE_PLUGIN=ON but LIBARCHIVE_SOURCE_DIR is "
+        "not set.  Pass -DLIBARCHIVE_SOURCE_DIR=<path-to-libarchive-source>; "
+        "the plugin needs libarchive's private headers (archive_read_private.h, "
+        "archive_platform.h).  Skipping plugin build.")
+    return()
+endif()
+
+if(NOT EXISTS "${LIBARCHIVE_SOURCE_DIR}/libarchive/archive_read_private.h")
+    message(WARNING
+        "LIBARCHIVE_SOURCE_DIR=${LIBARCHIVE_SOURCE_DIR} does not look like "
+        "a libarchive source tree (archive_read_private.h not found).  "
+        "Skipping plugin build.")
+    return()
+endif()
+
+# libarchive's headers live alongside its own config.h; set up enough
+# private include paths for archive_platform.h to compile against the
+# build host's <archive.h>.
+add_library(uc2_libarchive STATIC archive_read_support_format_uc2.c)
+target_include_directories(uc2_libarchive PRIVATE
+    "${LIBARCHIVE_SOURCE_DIR}/libarchive"
+)
+target_link_libraries(uc2_libarchive PRIVATE uc2)
+target_compile_features(uc2_libarchive PRIVATE c_std_99)
+
+# archive_platform.h pulls in build-time configuration that is normally
+# generated by libarchive's own configure.  Bypass that path: tell the
+# compiler we are not using HAVE_CONFIG_H, and provide enough feature
+# guesses to keep the platform header satisfied.
+target_compile_definitions(uc2_libarchive PRIVATE
+    PLATFORM_CONFIG_H="archive_platform_config.uc2.h"
+)
+configure_file(archive_platform_config.uc2.h.in
+               "${CMAKE_CURRENT_BINARY_DIR}/archive_platform_config.uc2.h"
+               COPYONLY)
+target_include_directories(uc2_libarchive PRIVATE
+    "${CMAKE_CURRENT_BINARY_DIR}"
+)
+
+message(STATUS "UC2: libarchive plugin enabled "
+               "(milestone 1, source=${LIBARCHIVE_SOURCE_DIR})")
--- a/contrib/libarchive/README.md
+++ b/contrib/libarchive/README.md
@@ -0,0 +1,166 @@
+# UC2 read-format plugin for libarchive
+
+This directory contains the design and a skeleton implementation of a
+read-only `.uc2` format handler for libarchive.  The goal is to make
+UC2 archives transparently extractable by every libarchive-using tool
+(`bsdtar`, `cmake`, `pkg(8)`, file-roller, Ark, and others).
+
+## Status
+
+- **Milestones 1-3 shipped.**  `archive_read_support_format_uc2.c`
+  implements:
+  - `bid()` -- `__archive_read_ahead` reads the first 4 bytes,
+    returns 64 on UC2 magic.
+  - `read_header()` -- on first call, slurps the entire archive
+    into memory via `__archive_read_ahead` + `__archive_read_consume`,
+    opens a `libuc2` handle bound to the slurped buffer, walks
+    `uc2_read_cdir` to cache every entry (with `uc2_get_tag`
+    resolution for tagged entries), then yields entries one per call
+    via `archive_entry_set_pathname` / `set_size` / `set_mtime` /
+    `set_filetype` / `set_perm`.
+  - `read_data()` -- on first call per entry, runs `uc2_extract`
+    with a buffering write callback, then yields the whole entry
+    in one slice; subsequent calls return `ARCHIVE_EOF`.
+  - `read_data_skip()` and `cleanup()` -- correct.
+- Memory model: archive is slurped fully on the first `read_header`,
+  so memory use scales with archive size.  Acceptable for v1; future
+  work can swap in a seekable adapter when the underlying filter
+  supports `__archive_read_seek`.
+- `CMakeLists.txt` activates with `-DUC2_BUILD_LIBARCHIVE_PLUGIN=ON
+  -DLIBARCHIVE_SOURCE_DIR=<libarchive-checkout>`.  The pin against a
+  source tree (rather than `find_package(LibArchive)`) is required
+  because the read-format API is internal -- the public `-devel`
+  package ships only `archive.h` and `archive_entry.h`.
+
+## Integration recipe (manual, until upstream merge)
+
+To actually exercise the plugin from `bsdtar`, the plugin must be
+linked into the libarchive binary itself (the relevant API is internal
+and not exported from the system shared library).  Two paths:
+
+1. **Drop-in patch.**  Copy `archive_read_support_format_uc2.c` into
+   `libarchive/libarchive/`, then add one line to
+   `libarchive/libarchive/archive_read_support_format_all.c`:
+
+   ```c
+   archive_read_support_format_uc2(a);
+   ```
+
+   plus one entry in `libarchive/libarchive/CMakeLists.txt` next to
+   the other `archive_read_support_format_*.c` sources.  Rebuild
+   libarchive; then `bsdtar -tf archive.uc2` lists entries.
+
+2. **External link.**  Build `libuc2_libarchive.a` from this directory
+   (`cmake -DUC2_BUILD_LIBARCHIVE_PLUGIN=ON -DLIBARCHIVE_SOURCE_DIR=...`).
+   Build a custom `libarchive_static.a` that includes the same
+   `LIBARCHIVE_SOURCE_DIR`.  Link both into a small driver program
+   that calls `archive_read_support_format_uc2(a)`.
+
+The upstream PR (milestone 8 in the original issue) replaces both
+recipes with a single first-class `bsdtar` integration.
+
+## Why an out-of-tree skeleton?
+
+libarchive's read-format plugin API is internal.
+`archive_read_register_format` is a static function, not part of the
+public ABI.  An out-of-tree `.so` cannot be loaded into an unmodified
+libarchive at runtime.
+
+The supported integration paths are:
+
+1. **Upstream merge.**  Submit
+   `archive_read_support_format_uc2.c` as a PR against
+   `libarchive/libarchive`.  Once merged, distros pick it up and
+   every tool that links libarchive sees `.uc2` automatically.  This
+   is the long-term goal.
+
+2. **Patched libarchive build.**  Distribute a small patch that
+   includes the UC2 plugin against a known libarchive version.
+   Useful for testing before upstream merge and for users who want
+   `.uc2` support before the upstream release reaches their distro.
+
+3. **Static-library wrapper.**  Build the plugin as part of a custom
+   tool that statically links libarchive + this plugin.  Useful for
+   demo binaries; not a substitute for upstream merge because the
+   wrapper still won't be picked up by `bsdtar` etc.
+
+## Architecture
+
+UC2 archives use a fixed front header (29 bytes), a record stream
+of compressed bodies, and a compressed central directory whose
+offset is recorded in the front header.  The central directory
+holds OHEAD records for masters, dirs, and files; entry attributes
+are in OSMETA + DIRMETA / FILEMETA.
+
+The plugin uses libuc2 for parsing and decompression and adapts the
+results to libarchive's `struct archive_entry` model.  libuc2 already
+exposes a streaming read API (`uc2_open`, `uc2_read_cdir`,
+`uc2_extract`) and is GPL-3.0 / LGPL-3.0; the plugin is GPL-3.0-or-later
+to match the cli/main.c license boundary.  See
+[`docs/license-audit.md`](../../docs/license-audit.md) for the
+provenance table.
+
+### Callback responsibilities
+
+- **bid**: read the first 4 bytes via `__archive_read_ahead`, check
+  for the UC2 magic (`0x1A324355`).  Return 64 on match, 0 otherwise.
+  libarchive uses the highest bid to pick a format; 64 is the
+  conventional "format-recognised" score.
+
+- **read_header**: on first call, open the libuc2 handle and read
+  the central directory into memory.  On every call, return one
+  entry's metadata via `archive_entry_*` setters.  When entries are
+  exhausted, return `ARCHIVE_EOF`.
+
+- **read_data**: stream decompressed bytes for the current entry.
+  libuc2's `uc2_extract` invokes a write callback per chunk; the
+  plugin needs to convert this push model into libarchive's pull
+  model (the standard way: a small ring buffer, plus a generator
+  loop or coroutine).  The simplest first implementation buffers
+  the whole entry, which is correct but increases memory pressure
+  for very large files; refine later.
+
+- **read_data_skip**: advance to the next entry without producing
+  data.  Decompression cannot be safely skipped (master-block
+  dependencies), so the plugin still decompresses, just discards.
+
+- **cleanup**: close the libuc2 handle, free buffers.
+
+### libuc2 IO callbacks
+
+libuc2 takes user-supplied callbacks for read/alloc/free/warn.  The
+plugin wires these to libarchive's filter stack:
+- `read` -> `__archive_read_seek` + `__archive_read_ahead`
+- `alloc`/`free` -> `malloc`/`free`
+- `warn` -> push to libarchive's warning log via
+  `archive_set_error`.
+
+## Build
+
+The CMake target only configures when libarchive headers are present.
+Install on Fedora/RHEL with `dnf install libarchive-devel`, on Debian
+with `apt install libarchive-dev`, or build libarchive from source.
+
+```sh
+cmake -B build -DUC2_BUILD_LIBARCHIVE_PLUGIN=ON
+cmake --build build --target uc2_libarchive
+```
+
+The built object can be linked into a libarchive-using application or
+patched into libarchive's source tree (`libarchive/libarchive/`).
+
+## Roadmap
+
+The current skeleton compiles into a stub library that registers a
+no-op format.  The implementation milestones, in order:
+
+1. bid function with magic check (~20 lines)
+2. read_header for the first entry only (single-file archives)
+3. read_data for uncompressed-by-master entries
+4. Master-block decompression and dependency tracking
+5. Multi-file archives + directory entries
+6. Tagged entries (long names, extended attributes)
+7. Round-trip test against bsdtar built from a patched libarchive
+8. Upstream PR
+
+Each milestone is independently shippable as a working subset.
--- a/contrib/libarchive/archive_platform_config.uc2.h.in
+++ b/contrib/libarchive/archive_platform_config.uc2.h.in
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Minimal stand-in for libarchive's generated config.h.
+ *
+ * libarchive's archive_platform.h refuses to include if it can't find
+ * either a generated config.h (via HAVE_CONFIG_H + autoconf) or a
+ * hand-built PLATFORM_CONFIG_H.  This file is the latter.  Only the
+ * minimum needed to compile our bid() and read_header() is set; full
+ * libarchive functionality is not required from this translation unit.
+ */
+
+#ifndef UC2_ARCHIVE_PLATFORM_CONFIG_H
+#define UC2_ARCHIVE_PLATFORM_CONFIG_H
+
+/* Common POSIX feature presence (Linux/macOS/BSD).  Adjust by host
+ * if cross-compiling onto something different. */
+#define HAVE_STDINT_H        1
+#define HAVE_INTTYPES_H      1
+#define HAVE_STDLIB_H        1
+#define HAVE_STRING_H        1
+#define HAVE_SYS_TYPES_H     1
+#define HAVE_UNISTD_H        1
+#define HAVE_ERRNO_H         1
+#define HAVE_LIMITS_H        1
+#define HAVE_FCNTL_H         1
+#define HAVE_LOCALE_H        1
+#define HAVE_WCHAR_H         1
+#define HAVE_WCTYPE_H        1
+#define HAVE_TIME_H          1
+#define HAVE_ICONV_H         1
+#define HAVE_LANGINFO_H      1
+#define HAVE_DECL_NL_LANGINFO 1
+#define HAVE_DECL_INT32_MAX   1
+#define HAVE_DECL_INT32_MIN   1
+#define HAVE_DECL_UINT32_MAX  1
+#define HAVE_DECL_INT64_MAX   1
+#define HAVE_DECL_INT64_MIN   1
+#define HAVE_DECL_UINT64_MAX  1
+#define HAVE_DECL_INTMAX_MAX  1
+#define HAVE_DECL_INTMAX_MIN  1
+#define HAVE_DECL_UINTMAX_MAX 1
+#define HAVE_DECL_SIZE_MAX    1
+#define HAVE_DECL_SSIZE_MAX   1
+
+/* iconv shape on glibc / FreeBSD. */
+#define ICONV_CONST          ""
+
+/* libarchive expects these to be defined to 0 or 1. */
+#define HAVE_ZLIB_H          0
+#define HAVE_BZLIB_H         0
+#define HAVE_LZMA_H          0
+#define HAVE_LZO1X_H         0
+#define HAVE_LZ4_H           0
+#define HAVE_ZSTD_H          0
+
+#endif
--- a/contrib/libarchive/archive_read_support_format_uc2.c
+++ b/contrib/libarchive/archive_read_support_format_uc2.c
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* libarchive read handler for UC2 v3 archives.
+ *
+ * Status: milestones 1-3.
+ *   M1 -- bid() with UC2 magic check.
+ *   M2 -- read_header iterates uc2_read_cdir, maps each cdir entry to
+ *         libarchive's archive_entry shape (name, size, mode, mtime).
+ *   M3 -- read_data uses uc2_extract to decompress an entry, buffers
+ *         the result, then yields it via libarchive's pull-style API.
+ *
+ * Strategy: on the first read_header call we slurp the entire archive
+ * into memory through __archive_read_ahead, then drive libuc2 against
+ * that buffer.  This is correct for any input but uses memory equal
+ * to the archive size; future revisions can swap in a seekable adapter
+ * when the underlying filter supports __archive_read_seek.
+ *
+ * Built against libarchive's internal API
+ * (archive_read_private.h, __archive_read_ahead,
+ * __archive_read_register_format), so it must compile inside a
+ * libarchive source tree.  Pass -DLIBARCHIVE_SOURCE_DIR=<path> to
+ * cmake to build standalone.
+ */
+
+#include "archive_platform.h"
+
+#include "archive.h"
+#include "archive_entry.h"
+#include "archive_private.h"
+#include "archive_read_private.h"
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <uc2/libuc2.h>
+
+#define ARCHIVE_FORMAT_UC2  0xC0FF0000  /* placeholder format code */
+
+struct uc2_la_state {
+	/* Slurped archive */
+	uint8_t *data;
+	size_t   len;
+	int      slurped;     /* 0 = not yet, 1 = done */
+
+	/* libuc2 */
+	uc2_handle handle;
+
+	/* Cached cdir entries.  uc2_read_cdir is single-pass; we capture
+	 * everything on the first read_header call. */
+	struct uc2_entry *entries;
+	int n_entries;
+	int n_capacity;
+	int next_entry;
+	char label[12];
+
+	/* Per-entry decompressed buffer for read_data. */
+	uint8_t *entry_data;
+	size_t   entry_cap;
+	size_t   entry_len;
+	int      entry_yielded;
+};
+
+/* libuc2 IO callbacks bound to the slurped buffer. */
+static int
+slurp_read(void *ctx, unsigned pos, void *buf, unsigned len)
+{
+	struct uc2_la_state *st = (struct uc2_la_state *)ctx;
+	if ((size_t)pos >= st->len)
+		return 0;
+	unsigned avail = (unsigned)(st->len - pos);
+	if (len > avail)
+		len = avail;
+	memcpy(buf, st->data + pos, len);
+	return (int)len;
+}
+
+static void *
+slurp_alloc(void *ctx, unsigned size)
+{
+	(void)ctx;
+	return malloc(size);
+}
+
+static void
+slurp_free(void *ctx, void *ptr)
+{
+	(void)ctx;
+	free(ptr);
+}
+
+static struct uc2_io slurp_io = {
+	.read  = slurp_read,
+	.alloc = slurp_alloc,
+	.free  = slurp_free,
+	.warn  = NULL,
+};
+
+/* Push-style write callback for uc2_extract.  Buffer everything and
+ * let read_data yield it in one slice. */
+struct extract_buf {
+	uint8_t *data;
+	size_t   cap;
+	size_t   len;
+	int      err;
+};
+
+static int
+extract_write(void *ctx, const void *p, unsigned len)
+{
+	struct extract_buf *eb = (struct extract_buf *)ctx;
+	if (eb->len + len > eb->cap) {
+		size_t ncap = eb->cap ? eb->cap * 2 : 4096;
+		while (ncap < eb->len + len) ncap *= 2;
+		uint8_t *np = realloc(eb->data, ncap);
+		if (!np) { eb->err = 1; return -1; }
+		eb->data = np;
+		eb->cap = ncap;
+	}
+	memcpy(eb->data + eb->len, p, len);
+	eb->len += len;
+	return (int)len;
+}
+
+/* DOS date/time -> Unix time_t (UTC; DOS times are local but we treat
+ * them as UTC since timezone info is not present in the archive). */
+static time_t
+dos_to_unix_time(unsigned dos_time)
+{
+	struct tm tm;
+	memset(&tm, 0, sizeof tm);
+	tm.tm_sec  = (dos_time & 0x1f) * 2;
+	tm.tm_min  = (dos_time >> 5)  & 0x3f;
+	tm.tm_hour = (dos_time >> 11) & 0x1f;
+	tm.tm_mday = (dos_time >> 16) & 0x1f;
+	tm.tm_mon  = ((dos_time >> 21) & 0x0f) - 1;
+	tm.tm_year = ((dos_time >> 25) & 0x7f) + 80;
+#if defined(_WIN32)
+	return _mkgmtime(&tm);
+#elif defined(__GLIBC__) || defined(__APPLE__) || defined(__FreeBSD__) || \
+      defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
+	return timegm(&tm);
+#else
+	return mktime(&tm);
+#endif
+}
+
+static int  uc2_la_bid(struct archive_read *, int);
+static int  uc2_la_read_header(struct archive_read *, struct archive_entry *);
+static int  uc2_la_read_data(struct archive_read *, const void **,
+                             size_t *, int64_t *);
+static int  uc2_la_read_data_skip(struct archive_read *);
+static int  uc2_la_cleanup(struct archive_read *);
+
+int
+archive_read_support_format_uc2(struct archive *_a)
+{
+	struct archive_read *a = (struct archive_read *)_a;
+	struct uc2_la_state *state;
+	int r;
+
+	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
+	    ARCHIVE_STATE_NEW, "archive_read_support_format_uc2");
+
+	state = (struct uc2_la_state *)calloc(1, sizeof(*state));
+	if (state == NULL) {
+		archive_set_error(_a, ENOMEM,
+		    "Out of memory allocating UC2 reader state");
+		return (ARCHIVE_FATAL);
+	}
+
+	r = __archive_read_register_format(a,
+	    state,
+	    "uc2",
+	    uc2_la_bid,
+	    NULL,
+	    uc2_la_read_header,
+	    uc2_la_read_data,
+	    uc2_la_read_data_skip,
+	    NULL,
+	    uc2_la_cleanup,
+	    NULL,
+	    NULL);
+
+	if (r != ARCHIVE_OK)
+		free(state);
+	return (r);
+}
+
+static int
+uc2_la_bid(struct archive_read *a, int best_bid)
+{
+	const unsigned char *p;
+
+	(void)best_bid;
+
+	p = __archive_read_ahead(a, 4, NULL);
+	if (p == NULL)
+		return (-1);
+
+	if (p[0] == 0x55 && p[1] == 0x43 && p[2] == 0x32 && p[3] == 0x1A)
+		return (64);
+	return (0);
+}
+
+/* Slurp the entire archive into state->data via __archive_read_ahead +
+ * __archive_read_consume.  Returns ARCHIVE_OK or ARCHIVE_FATAL. */
+static int
+slurp_archive(struct archive_read *a, struct uc2_la_state *st)
+{
+	for (;;) {
+		ssize_t avail;
+		const void *p = __archive_read_ahead(a, 1, &avail);
+		if (p == NULL) {
+			if (avail < 0) {
+				archive_set_error(&a->archive, EIO,
+				    "UC2: read error while slurping archive");
+				return (ARCHIVE_FATAL);
+			}
+			break; /* clean EOF */
+		}
+		if (avail <= 0)
+			break;
+
+		if (st->len + (size_t)avail > st->len /* overflow guard */) {
+			size_t need = st->len + (size_t)avail;
+			if (need > (size_t)0x80000000u) {
+				archive_set_error(&a->archive, ENOMEM,
+				    "UC2: archive too large to slurp (>2GB)");
+				return (ARCHIVE_FATAL);
+			}
+			/* grow to power-of-two */
+			size_t cap = st->len ? st->len : 4096;
+			while (cap < need) cap *= 2;
+			uint8_t *np = (uint8_t *)realloc(st->data, cap);
+			if (!np) {
+				archive_set_error(&a->archive, ENOMEM,
+				    "UC2: out of memory slurping archive");
+				return (ARCHIVE_FATAL);
+			}
+			st->data = np;
+		}
+		memcpy(st->data + st->len, p, (size_t)avail);
+		st->len += (size_t)avail;
+		__archive_read_consume(a, avail);
+	}
+	return (ARCHIVE_OK);
+}
+
+/* Walk uc2_read_cdir and cache all entries.  Tagged entries have
+ * uc2_get_tag called to fully resolve names. */
+static int
+collect_entries(struct archive_read *a, struct uc2_la_state *st)
+{
+	st->handle = uc2_open(&slurp_io, st);
+	if (st->handle == NULL) {
+		archive_set_error(&a->archive, EINVAL,
+		    "UC2: uc2_open failed");
+		return (ARCHIVE_FATAL);
+	}
+
+	for (;;) {
+		if (st->n_entries >= st->n_capacity) {
+			int ncap = st->n_capacity ? st->n_capacity * 2 : 32;
+			struct uc2_entry *ne = (struct uc2_entry *)realloc(
+			    st->entries, (size_t)ncap * sizeof *ne);
+			if (!ne) {
+				archive_set_error(&a->archive, ENOMEM,
+				    "UC2: out of memory collecting entries");
+				return (ARCHIVE_FATAL);
+			}
+			st->entries = ne;
+			st->n_capacity = ncap;
+		}
+
+		struct uc2_entry *e = &st->entries[st->n_entries];
+		int ret = uc2_read_cdir(st->handle, e);
+		if (ret == UC2_End)
+			break;
+		if (ret < 0) {
+			archive_set_error(&a->archive, EINVAL,
+			    "UC2: uc2_read_cdir failed: %s",
+			    uc2_message(st->handle, ret));
+			return (ARCHIVE_FATAL);
+		}
+
+		while (ret == UC2_TaggedEntry) {
+			char *tag;
+			void *data;
+			unsigned size;
+			ret = uc2_get_tag(st->handle, e, &tag, &data, &size);
+			if (ret < 0) {
+				archive_set_error(&a->archive, EINVAL,
+				    "UC2: uc2_get_tag failed: %s",
+				    uc2_message(st->handle, ret));
+				return (ARCHIVE_FATAL);
+			}
+		}
+
+		st->n_entries++;
+	}
+
+	uc2_finish_cdir(st->handle, st->label);
+	return (ARCHIVE_OK);
+}
+
+static int
+uc2_la_read_header(struct archive_read *a, struct archive_entry *entry)
+{
+	struct uc2_la_state *st = (struct uc2_la_state *)a->format->data;
+
+	a->archive.archive_format = ARCHIVE_FORMAT_UC2;
+	a->archive.archive_format_name = "UC2";
+
+	if (!st->slurped) {
+		int r = slurp_archive(a, st);
+		if (r != ARCHIVE_OK) return r;
+		st->slurped = 1;
+
+		r = collect_entries(a, st);
+		if (r != ARCHIVE_OK) return r;
+	}
+
+	if (st->next_entry >= st->n_entries)
+		return (ARCHIVE_EOF);
+
+	struct uc2_entry *e = &st->entries[st->next_entry++];
+
+	/* Reset per-entry buffer state. */
+	st->entry_len = 0;
+	st->entry_yielded = 0;
+
+	archive_entry_set_pathname(entry, e->name);
+	archive_entry_set_size(entry, (la_int64_t)e->size);
+	archive_entry_set_mtime(entry, dos_to_unix_time(e->dos_time), 0);
+
+	if (e->is_dir) {
+		archive_entry_set_filetype(entry, AE_IFDIR);
+		archive_entry_set_perm(entry, 0755);
+	} else {
+		archive_entry_set_filetype(entry, AE_IFREG);
+		mode_t mode = 0644;
+		if (e->attr & UC2_Attr_R) mode &= ~0222;
+		archive_entry_set_perm(entry, mode);
+	}
+
+	return (ARCHIVE_OK);
+}
+
+static int
+uc2_la_read_data(struct archive_read *a,
+                 const void **buff, size_t *size, int64_t *offset)
+{
+	struct uc2_la_state *st = (struct uc2_la_state *)a->format->data;
+
+	if (st->next_entry == 0 || st->entry_yielded) {
+		*buff = NULL;
+		*size = 0;
+		*offset = 0;
+		return (ARCHIVE_EOF);
+	}
+
+	struct uc2_entry *e = &st->entries[st->next_entry - 1];
+	if (e->is_dir || e->size == 0) {
+		st->entry_yielded = 1;
+		*buff = NULL;
+		*size = 0;
+		*offset = 0;
+		return (ARCHIVE_EOF);
+	}
+
+	/* Decompress the whole entry once. */
+	struct extract_buf eb = { .data = st->entry_data, .cap = st->entry_cap };
+	int ret = uc2_extract(st->handle, &e->xi, e->size,
+	                      extract_write, &eb);
+	st->entry_data = eb.data;
+	st->entry_cap = eb.cap;
+	st->entry_len = eb.len;
+
+	if (ret < 0 || eb.err) {
+		archive_set_error(&a->archive, EIO,
+		    "UC2: uc2_extract failed: %s",
+		    uc2_message(st->handle, ret));
+		return (ARCHIVE_FATAL);
+	}
+
+	st->entry_yielded = 1;
+	*buff = st->entry_data;
+	*size = st->entry_len;
+	*offset = 0;
+	return (ARCHIVE_OK);
+}
+
+static int
+uc2_la_read_data_skip(struct archive_read *a)
+{
+	struct uc2_la_state *st = (struct uc2_la_state *)a->format->data;
+	st->entry_yielded = 1;
+	return (ARCHIVE_OK);
+}
+
+static int
+uc2_la_cleanup(struct archive_read *a)
+{
+	struct uc2_la_state *st = (struct uc2_la_state *)a->format->data;
+	if (st == NULL)
+		return (ARCHIVE_OK);
+	if (st->handle)
+		uc2_close(st->handle);
+	free(st->data);
+	free(st->entries);
+	free(st->entry_data);
+	free(st);
+	a->format->data = NULL;
+	return (ARCHIVE_OK);
+}
--- a/docs/_static/.gitkeep
+++ b/docs/_static/.gitkeep
--- a/docs/blog/uc2-revival-writeup.md
+++ b/docs/blog/uc2-revival-writeup.md
@@ -0,0 +1,201 @@
+# Reviving UltraCompressor II: a 1992 DOS archiver, ported forward
+
+Subtitle candidates:
+- *Show HN: UC2 v3 - 1992 DOS archiver, ported to modern C99* (HN)
+- *UltraCompressor II revival: rANS, CDC, BLAKE3, OpenTimestamps* (Lobsters)
+
+---
+
+In 1992, Nico de Vries shipped UltraCompressor II for DOS.  It
+competed with PKZIP and ARJ.  Among the things it did that were
+unusual for the era: master-block deduplication.  If an archive
+contained several similar files, UC2 could store one shared "master"
+block and represent each file as a delta against it - within a
+single archive, on a single floppy, in 4 MB of RAM.
+
+UC2 v3.0.0-alpha.1 is a port forward.  Its compressor produces
+bitstreams that the original `uc2pro.exe` (UC2 Pro v2.3, 1992)
+accepts and extracts at byte-identical fidelity.  It also adds
+content-defined chunking, an rANS entropy coder, BLAKE3 + SHA-256
+hashing, and an OpenTimestamps integration so an archive can be
+anchored to a Bitcoin block at creation time.
+
+This post is the story of how it got here.
+
+## The lineage
+
+UC2 has passed through several pairs of hands across thirty-four
+years:
+
+1. **Nico de Vries (1992-1996)** wrote UC2, with assistance from
+   Danny Bezemer, Jan-Pieter Cornet, and others credited in the
+   original `U_MANUAL.TXT`.
+
+2. **2015 LGPL release**.  In December 2015, Vladislav Sagunov asked
+   de Vries whether the source could be re-released under a free
+   licence.  De Vries agreed and published the full source under the
+   GNU LGPL-3.0 (with a small Borland C/C++ runtime carve-out for
+   DOS-specific code).  That release lives in this repo unchanged
+   under `original/UC2_source/`, including the `Read Me First.txt`
+   from de Vries himself.
+
+3. **Jan Bobrowski (2020-2021)** wrote a clean-room portable
+   *decompressor* in C, called `unuc2` / `libunuc2`.  The library is
+   LGPL-3.0; the CLI tool is GPL-3.0-or-later.  Bobrowski's
+   decompressor compiles cleanly on POSIX, MSVC, and (with care) DOS,
+   and his code is what most modern UC2 work builds on.
+
+4. **Eremey Valetov (2026)** is who I am.  What I've added is the
+   *compressor* that pairs with Bobrowski's decompressor, plus
+   several modules of compression / dedup / integrity work.
+
+Bobrowski-derived files in the repo retain LGPL-3.0; new work is
+GPL-3.0-or-later.  Per-file provenance is in
+[`docs/license-audit.md`](../license-audit.md); the 1992 source and
+the 2020 release are preserved unmodified.
+
+## What's in v3
+
+### Byte-bitstream-compatible LZ77 + Huffman
+
+The compressor in `lib/src/compress.c` is the inverse of Bobrowski's
+decompressor and produces UC2 v3 archives whose method-4 bitstream is
+accepted by `uc2pro.exe`.  Cross-tool round-trip is in CI: a
+`tests/scripts/roundtrip_dosbox.sh` job runs the original 1992 binary
+in DOSBox-X against archives built by the modern tool (and vice
+versa) and verifies that extracted files are bit-identical to the
+inputs.
+
+Compression levels 2-5 (Fast, Normal, Tight, Ultra) match the
+original's IDs.  The original's `bFlag` heuristic for choosing
+between default and custom Huffman trees on small blocks is
+preserved.
+
+### Master-block deduplication, modernised
+
+The 1992 UC2 grouped files by an exact prefix match and built one
+master block per group.  v3 layers content-defined chunking (CDC) on
+top: file pairs that share large blocks of content at *non-aligned*
+positions can also be grouped, since the chunker breaks both files
+on the same content-defined boundaries.  CDC uses a Gear rolling
+hash with an average chunk size of 4 KiB.
+
+Several additional modules ship as libraries with their own unit
+tests, used so far by the compressor's master-block selection logic
+and exposed for callers:
+- a Merkle DAG of deduplicated blocks (`uc2_merkle.h`),
+- a content-addressable cross-archive block store (`uc2_blockstore.h`),
+- SimHash near-duplicate detection (`uc2_simhash.h`),
+- byte-level delta compression (`uc2_delta.h`).
+
+These extend the format with new metadata records.  Method-4 (the
+1992 bitstream) remains untouched, so old readers handle the file
+data; new readers see the additional dedup hints.
+
+### Modern compression backends
+
+Phase 4 added pluggable backends behind new method IDs.  Method 4
+(the original Huffman) is kept as-is for round-trip with the 1992
+binary.
+
+- **Method 10**: rANS entropy coder.  32-bit table-based.  Selected
+  by levels 6-9.
+- **LZ4**: ultra-fast mode, exposed via the `uc2_lz4.h` library and
+  the `uc2 -B` benchmark; not yet a first-class archive backend.
+- **Content-aware preprocessing** (`uc2_preprocess.h`): BCJ for x86
+  address normalisation, BWT for text, byte-stride delta filter.
+- **Dictionary metadata** (`uc2_dict.h`): zstd-inspired formal
+  dictionary records with content-hash IDs and integrity checksums.
+- **`uc2 -B`**: built-in benchmark mode runs all methods on the
+  input and prints ratio + timing per method.
+
+### Cryptographic integrity
+
+Phase 7 anchored the archive's content hash:
+
+- **BLAKE3** (`uc2_blake3.h`) for fast content hashing.
+- **SHA-256** (`uc2_sha256.h`, FIPS 180-4) for OpenTimestamps
+  compatibility.
+- **OpenTimestamps integration** (`uc2_ots.h`): the archive's SHA-256
+  can be anchored to a Bitcoin block via a public calendar server,
+  and the resulting proof is stored in a magic-bracketed sidecar
+  trailer appended after the recorded archive bytes.  The 1992 reader
+  ignores the trailer (it uses the front header's recorded length),
+  preserving backward compatibility.  Extracted proofs are
+  byte-identical to standard `.ots` files; the cross-check test runs
+  them through `python-opentimestamps` to confirm round-trip parsing.
+
+The OTS parser is conservative about hostile input: strict-canonical
+varints, depth-bounded recursion, varbytes size cap, leaf digest must
+match the recomputed archive SHA-256 before `--ots-attach` accepts a
+proof.
+
+## A demonstration
+
+```sh
+# Create an archive
+$ uc2 -w -L4 demo.uc2 file1.txt file2.txt
+UC2 compression level: Tight
+Created demo.uc2 (2 files, 0 dirs, 1 master, 215 bytes)
+Everything went OK
+
+# Extract with the original UC2 Pro v2.3 in DOSBox-X
+$ dosbox -conf <(echo -e "[autoexec]\nmount C: .\nC:\nuc2pro.exe -x demo.uc2")
+# -> bit-identical files
+
+# Anchor the archive to the Bitcoin blockchain
+$ ots stamp demo.uc2          # produces demo.uc2.ots from a calendar
+$ uc2 --ots-attach demo.uc2.ots demo.uc2
+Attached 396-byte OTS proof to demo.uc2
+
+$ uc2 -t demo.uc2
+Testing archive integrity...
+OTS proof: leaf matches; structure verified
+Everything went OK
+```
+
+Cross-tool round-trip is in CI.  The OTS extracted output is
+verified against the upstream `python-opentimestamps` parser when
+that package is installed (the test skips cleanly otherwise).
+
+## What's coming
+
+The roadmap is in [`ROADMAP.md`](../../ROADMAP.md), with each item
+tracked as a `git-bug` issue.  The next things on the list are
+practical:
+
+- **DJGPP cross-compile** so v3 actually runs on DOS.  The compat
+  layer is already in `cli/src/compat/compat_dos.c`; the
+  cross-compile target and DOSBox-X CI are the missing pieces.
+- **libarchive read handler** so `.uc2` is a recognised format for
+  tools that integrate with libarchive.
+- **`uc2 --ingest` streaming mode** for piping `tar` or `rsync` into
+  a deduplicating sink.
+
+Beyond that, the issue tracker has speculative items for
+post-quantum encryption, IPFS integration, and zero-knowledge
+proofs.  Those are research directions, not promises.
+
+## Why bother?
+
+Two reasons.
+
+First, archive formats are a load-bearing piece of computing
+history.  Preserving the 1992 source unchanged, the 2015 LGPL
+re-release unchanged, and the 2020 portable decompressor unchanged
+- all in the same repository as the modern port - is what makes
+the format survive its hosting choices.
+
+Second, the master-block design from 1992 turns out to be a
+surprisingly clean substrate to bolt content-defined chunking,
+content-addressable storage, and verifiable timestamps onto.  Phase
+3 and Phase 7 work landed without breaking the 1992 reader.  Doing
+the same project as a wrapper around `gzip` would have been more
+work for less reach.
+
+The repo, with full source, license trail, test suite, and
+roadmap, is at <https://github.com/evvaletov/uc2>.
+
+---
+
+*Eremey Valetov, May 2026.*
--- a/docs/building.rst
+++ b/docs/building.rst
@@ -0,0 +1,73 @@
+Building
+========
+
+Requirements
+------------
+
+- CMake >= 3.16
+- C99 compiler: GCC, Clang, or MSVC
+- Optional: DJGPP cross-compiler for DOS builds
+
+Linux / macOS
+-------------
+
+.. code-block:: sh
+
+   cmake -B build -DCMAKE_BUILD_TYPE=Release
+   cmake --build build
+   ctest --test-dir build
+
+The binary is ``build/cli/uc2`` and the library is
+``build/lib/libuc2.a``.
+
+Windows (MSVC)
+--------------
+
+.. code-block:: sh
+
+   cmake -B build
+   cmake --build build --config Release
+   ctest --test-dir build -C Release
+
+DOS (DJGPP Cross-Compilation)
+-----------------------------
+
+Cross-compile from a Linux host using the DJGPP toolchain:
+
+.. code-block:: sh
+
+   cmake -B build-dos -DCMAKE_TOOLCHAIN_FILE=cmake/djgpp.cmake
+   cmake --build build-dos
+
+This produces a DOS executable suitable for DOSBox or real hardware.
+
+Build Options
+-------------
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 15 55
+
+   * - Option
+     - Default
+     - Description
+   * - ``UC2_BUILD_TESTS``
+     - ``ON``
+     - Build test programs
+   * - ``CMAKE_BUILD_TYPE``
+     - (none)
+     - ``Release``, ``Debug``, ``RelWithDebInfo``
+
+Running Tests
+-------------
+
+.. code-block:: sh
+
+   ctest --test-dir build --output-on-failure
+
+Tests include:
+
+- **identify**: UC2 magic detection
+- **extract**: decompression against reference archives
+- **roundtrip**: compress → archive → decompress → verify (8 patterns
+  × 4 compression levels = 32 tests)
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -0,0 +1,29 @@
+project = "UC2"
+copyright = "2026, Eremey Valetov"
+author = "Eremey Valetov"
+release = "3.0.0"
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.githubpages",
+]
+
+templates_path = ["_templates"]
+exclude_patterns = ["_build"]
+
+html_theme = "furo"
+html_static_path = ["_static"]
+html_title = "UC2 — UltraCompressor II"
+html_logo = None
+html_favicon = None
+
+html_theme_options = {
+    "source_repository": "https://github.com/evvaletov/uc2",
+    "source_branch": "main",
+    "source_directory": "docs/",
+}
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+}
--- a/docs/format.rst
+++ b/docs/format.rst
@@ -0,0 +1,196 @@
+UC2 Archive Format
+==================
+
+This documents the binary format as implemented by the original UC2
+v2.x and supported by UC2 v3.
+
+Archive Layout
+--------------
+
+.. code-block:: none
+
+   FHEAD (13 bytes)
+   XHEAD (16 bytes)
+   File data blocks (compressed bitstreams)
+   COMPRESS + compressed central directory
+
+All multi-byte integers are little-endian.
+
+FHEAD — File Header
+~~~~~~~~~~~~~~~~~~~
+
+.. list-table::
+   :widths: 15 15 70
+
+   * - Offset
+     - Size
+     - Field
+   * - 0
+     - 4
+     - Magic: ``UC2\x1A`` (0x1A324355)
+   * - 4
+     - 4
+     - Component length
+   * - 8
+     - 4
+     - Component length + 0x01B2C3D4 (validation)
+   * - 12
+     - 1
+     - Damage protection flag
+
+XHEAD — Extended Header
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. list-table::
+   :widths: 15 15 70
+
+   * - Offset
+     - Size
+     - Field
+   * - 13
+     - 4
+     - Cdir volume (always 1)
+   * - 17
+     - 4
+     - Cdir offset
+   * - 21
+     - 2
+     - Fletcher checksum of raw cdir
+   * - 23
+     - 1
+     - Busy flag
+   * - 24
+     - 2
+     - Version made by (e.g. 200 = v2.00)
+   * - 26
+     - 2
+     - Version needed to extract
+   * - 28
+     - 1
+     - Reserved
+
+Central Directory
+-----------------
+
+The central directory is itself compressed using the UC2 compression
+engine.  It is located at the offset specified in XHEAD and preceded by
+a COMPRESS record.
+
+Each directory entry begins with a 1-byte type tag:
+
+.. list-table::
+   :widths: 15 85
+
+   * - 1
+     - Directory entry (OSMETA + DIRMETA)
+   * - 2
+     - File entry (OSMETA + FILEMETA + COMPRESS + LOCATION)
+   * - 3
+     - Master entry (MASMETA + COMPRESS + LOCATION)
+   * - 4
+     - End of central directory
+
+The directory ends with XTAIL (17 bytes) + archive serial (4 bytes).
+
+Master Blocks
+~~~~~~~~~~~~~
+
+Masters are LZ77 dictionary prefixes that pre-fill the sliding window
+before decompression, allowing back-references into shared content
+across files.  Three kinds exist:
+
+.. list-table::
+   :widths: 15 85
+
+   * - 0
+     - **SuperMaster** — built-in 49 152-byte dictionary, decompressed
+       from a static blob embedded in the library.
+   * - 1
+     - **NoMaster** — 512 zero bytes (minimal dictionary).
+   * - ≥ 2
+     - **Custom master** — archive-specific, described by a MASMETA
+       record in the central directory.
+
+MASMETA (20 bytes):
+
+.. list-table::
+   :widths: 15 15 70
+
+   * - Offset
+     - Size
+     - Field
+   * - 0
+     - 4
+     - Master index (≥ 2)
+   * - 4
+     - 4
+     - Content key (FNV-1a hash)
+   * - 8
+     - 4
+     - Total uncompressed size of referring files
+   * - 12
+     - 4
+     - Number of referring files
+   * - 16
+     - 2
+     - Master data length (uncompressed, ≤ 65 535)
+   * - 18
+     - 2
+     - Fletcher checksum of master data
+
+A master entry in the cdir is: type byte (3) + MASMETA (20) +
+COMPRESS (10) + LOCATION (8) = 39 bytes.  The compressed master data
+is stored at the location pointed to by LOCATION; it is itself
+compressed using another master (typically SuperMaster).
+
+Compression Format
+------------------
+
+UC2 uses LZ77 with Huffman entropy coding.  The bitstream consists of
+blocks, each containing:
+
+1. **Block-present flag** (1 bit): 1 = block follows, 0 = end of stream.
+
+2. **Huffman tree** encoded as:
+
+   - Tree-changed flag (1 bit): 0 = use default tree, 1 = new tree.
+   - Type flags (2 bits): ``has_lo | has_hi << 1``, controlling which
+     symbol ranges are encoded.
+   - Tree-encoding tree (15 × 3-bit lengths).
+   - Delta-coded symbol lengths with RLE (344 symbols total =
+     256 literals + 60 distance + 28 length).
+
+3. **Compressed data**: Huffman-coded literals and distance/length pairs.
+
+4. **End-of-block marker**: distance = 64001 with length = 3.
+
+Distance Encoding
+~~~~~~~~~~~~~~~~~
+
+60 distance symbols in 4 tiers:
+
+- Tier 0: distances 1--15 (0 extra bits)
+- Tier 1: distances 16--255 (4 extra bits)
+- Tier 2: distances 256--4095 (8 extra bits)
+- Tier 3: distances 4096--64000 (12 extra bits)
+
+Length Encoding
+~~~~~~~~~~~~~~~
+
+28 length symbols with varying extra bits, covering lengths 3--35482.
+
+Delta-Coded Trees
+~~~~~~~~~~~~~~~~~
+
+Symbol code lengths are delta-coded against the previous block's
+lengths using the ``vval`` lookup table.  The first block's default
+lengths are hard-coded.  The delta stream uses 14 delta codes (0--13)
+plus a repeat code for RLE compression.
+
+Fletcher Checksum
+-----------------
+
+UC2 uses an XOR-based Fletcher checksum (initial value 0xA55A) for
+both file data integrity and central directory validation.  Bytes are
+processed in little-endian 16-bit words with a carry flag for
+odd-length data.
--- a/docs/history.rst
+++ b/docs/history.rst
@@ -0,0 +1,35 @@
+History
+=======
+
+.. list-table::
+   :widths: 20 80
+
+   * - **1992--1996**
+     - UltraCompressor II created by Nico de Vries.  Releases v1.0
+       through v2.3 for DOS.  Notable features: LZ77+Huffman
+       compression, master-block deduplication, file versioning,
+       and multi-volume spanning.
+
+   * - **2015**
+     - Source code released under LGPL-3.0 by Danny Bezemer.
+
+   * - **2020--2021**
+     - Jan Bobrowski writes `unuc2/libunuc2
+       <http://torinak.com/~jb/unuc2/>`_, a clean-room portable
+       decompressor in C.
+
+   * - **2026**
+     - UC2 v3.0.0: cross-platform revival by Eremey Valetov.
+       CMake build system, Linux/macOS/Windows/DOS targets,
+       original compression engine reimplemented, Sphinx
+       documentation, CI/CD pipeline.
+
+Credits
+-------
+
+- **Nico de Vries** — original UC2 author
+- **Danny Bezemer** — source code release
+- **Jan Bobrowski** — portable decompressor (libunuc2)
+- **Jan-Pieter Cornet** — testing and format documentation
+- **Vladislav Sagunov** — UC2 resources and documentation
+- **Eremey Valetov** — v3.0.0 revival and ongoing development
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -0,0 +1,22 @@
+UC2 — UltraCompressor II
+========================
+
+A cross-platform revival of UltraCompressor II, the DOS-era archiver by
+Nico de Vries (1992--1996).  UC2 was notable for its advanced
+deduplication ("master blocks"), file versioning, and competitive
+compression ratios on the hardware of its day.
+
+UC2 v3 brings it back as a modern, portable C99 tool with both
+decompression and compression, targeting Linux, macOS, Windows, and DOS.
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents
+
+   quickstart
+   usage
+   library
+   format
+   building
+   history
+   roadmap
--- a/docs/library.rst
+++ b/docs/library.rst
@@ -0,0 +1,174 @@
+Library API (libuc2)
+====================
+
+``libuc2`` provides C99 functions for reading, extracting, and
+compressing UC2 archives.  The library is callback-based: callers supply
+I/O and memory callbacks, making it suitable for embedded, DOS, and
+freestanding environments.
+
+Header: ``<uc2/libuc2.h>``
+
+Archive Reading
+---------------
+
+.. c:function:: int uc2_identify(void *magic, unsigned magic_size)
+
+   Check whether a buffer contains a UC2 archive header.
+
+   :param magic: Pointer to the first 4--21 bytes of the file.
+   :param magic_size: Number of bytes available.
+   :returns: ``1`` if UC2, ``0`` if not, ``-1`` if more bytes needed.
+
+.. c:function:: uc2_handle uc2_open(struct uc2_io *io, void *io_ctx)
+
+   Open a UC2 archive.  The caller provides I/O callbacks via
+   :c:type:`uc2_io`.  Returns ``NULL`` on allocation failure.
+
+.. c:function:: uc2_handle uc2_close(uc2_handle h)
+
+   Close the archive and free all resources.  Always returns ``NULL``.
+
+Directory Enumeration
+---------------------
+
+.. c:function:: int uc2_read_cdir(uc2_handle h, struct uc2_entry *entry)
+
+   Read the next central directory entry.
+
+   :returns:
+      - ``UC2_End`` (0): end of directory, *entry* not filled.
+      - ``UC2_BareEntry`` (1): entry filled, no tags.
+      - ``UC2_TaggedEntry`` (3): entry filled, call :c:func:`uc2_get_tag`
+        to read tags (long filename, etc.).
+      - Negative value on error.
+
+   Directories appear before their contents.  Duplicate filenames are
+   listed oldest-first.
+
+.. c:function:: int uc2_get_tag(uc2_handle h, struct uc2_entry *entry, char **tag, void **data, unsigned *data_len)
+
+   Read a tag from a tagged entry.  Call repeatedly until it returns
+   ``UC2_End``.
+
+.. c:function:: int uc2_finish_cdir(uc2_handle h, char label[12])
+
+   Read the archive tail and retrieve the volume label.
+
+Extraction
+----------
+
+.. c:function:: int uc2_extract(uc2_handle h, struct uc2_xinfo *xi, unsigned size, int (*write)(void *ctx, const void *ptr, unsigned len), void *ctx)
+
+   Decompress a file entry.  Call only after the entire central
+   directory has been read.  The *write* callback receives decompressed
+   data in chunks.
+
+Compression
+-----------
+
+.. c:function:: int uc2_compress(int level, int (*read)(void *ctx, void *buf, unsigned len), void *read_ctx, int (*write)(void *ctx, const void *ptr, unsigned len), void *write_ctx, unsigned size, unsigned short *checksum_out, unsigned *compressed_size_out)
+
+   Compress raw data into a UC2 bitstream (no archive framing).
+
+   :param level: Compression level: 2 = Fast, 3 = Normal, 4 = Tight
+                 (default), 5 = Ultra.
+   :param read: Callback returning bytes read (0 at EOF, <0 on error).
+   :param write: Callback returning <0 on error.
+   :param size: Total input size in bytes.
+   :param checksum_out: Receives the Fletcher checksum of the input.
+   :param compressed_size_out: Receives the compressed size.
+   :returns: 0 on success, negative ``UC2_*`` error code on failure.
+
+.. c:function:: int uc2_compress_ex(int level, const void *master, unsigned master_size, int (*read)(void *ctx, void *buf, unsigned len), void *read_ctx, int (*write)(void *ctx, const void *ptr, unsigned len), void *write_ctx, unsigned size, unsigned short *checksum_out, unsigned *compressed_size_out)
+
+   Compress with a master-block dictionary prefix.  The master data
+   pre-fills the LZ77 sliding window, allowing back-references into
+   the master for cross-file deduplication.  Pass ``NULL`` / ``0`` for
+   no master (equivalent to :c:func:`uc2_compress`).
+
+   The CLI uses the built-in SuperMaster (49 KB) by default.
+
+.. c:function:: int uc2_get_supermaster(void *buf, unsigned buf_size)
+
+   Decompress the built-in SuperMaster into *buf* (must be at least
+   49152 bytes).  Returns ``49152`` on success, negative error code on
+   failure.
+
+I/O Callbacks
+-------------
+
+.. c:struct:: uc2_io
+
+   .. c:member:: int (*read)(void *io_ctx, unsigned pos, void *buf, unsigned len)
+
+      Read *len* bytes from the archive at offset *pos* into *buf*.
+      Return number of bytes read (less if EOF), or negative on error.
+
+   .. c:member:: void *(*alloc)(void *io_ctx, unsigned size)
+
+      Allocate memory.  Return ``NULL`` on failure.
+
+   .. c:member:: void (*free)(void *io_ctx, void *ptr)
+
+      Free memory.
+
+   .. c:member:: void (*warn)(void *io_ctx, char *fmt, ...)
+
+      Optional warning callback.
+
+Data Structures
+---------------
+
+.. c:struct:: uc2_entry
+
+   A directory entry.
+
+   .. c:member:: unsigned dirid
+
+      Parent directory (0 = root).
+
+   .. c:member:: unsigned id
+
+      Directory index (directories only).
+
+   .. c:member:: unsigned size
+
+      Uncompressed file size.
+
+   .. c:member:: unsigned csize
+
+      Compressed file size.
+
+   .. c:member:: unsigned dos_time
+
+      DOS-format timestamp.
+
+   .. c:member:: unsigned char attr
+
+      DOS file attributes.
+
+   .. c:member:: char name[300]
+
+      Filename (UTF-8, NUL-terminated).  Populated after tags are read.
+
+Error Codes
+-----------
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Constant
+     - Meaning
+   * - ``UC2_UserFault`` (-2)
+     - User callback refused to cooperate
+   * - ``UC2_BadState`` (-3)
+     - API called in wrong order
+   * - ``UC2_Damaged`` (-4)
+     - Archive data is corrupt
+   * - ``UC2_Truncated`` (-5)
+     - Unexpected end of data
+   * - ``UC2_Unimplemented`` (-6)
+     - Feature not yet implemented
+   * - ``UC2_InternalError`` (-7)
+     - Internal logic error
--- a/docs/license-audit.md
+++ b/docs/license-audit.md
@@ -0,0 +1,160 @@
+# UC2 License Audit
+
+Status: 2026-05-03. Maintained by Eremey Valetov.
+
+UC2 v3 builds on three layers of prior work, each released under its
+own free-software licence.  This document records per-file provenance,
+the LGPL-3.0 -> GPL-3.0 transition rationale, and the SPDX identifiers
+applied across the source tree.
+
+## Layer 1: Nico de Vries (1992-1996), released 2015
+
+Nico de Vries authored UltraCompressor II as proprietary DOS software
+between 1992 and 1996, with co-development from Danny Bezemer,
+Jan-Pieter Cornet, and others credited in the original `U_MANUAL.TXT`.
+In December 2015, Vladislav Sagunov asked de Vries whether the source
+could be re-released under a free licence; de Vries agreed and
+personally published the full source under the GNU Lesser General
+Public License v3 (LGPL-3.0), with a small Borland C/C++ runtime
+carve-out for DOS-specific code.
+
+The 2015 release is preserved in this repository under
+`original/UC2_source/` byte-for-byte unchanged, including its license
+header (`GNU LESSER GENERAL PUBLIC LICENSE V3.txt`) and the original
+binaries (`uc2pro.exe`, `uc237b.exe`, `ue.exe`).  No file in `lib/` or
+`cli/` is a verbatim copy of any file in that release.  The 2015 source
+serves as the format specification: it is read for documentation
+purposes (the on-disk archive layout, the SuperMaster dictionary
+contents, the Huffman tree encoding) but its C code is not linked in.
+
+Relicensing impact: none.  Layer 1 is preserved under its original
+LGPL-3.0 licence; nothing is moved upward to GPL-3.0.
+
+## Layer 2: Jan Bobrowski (2020-2021), libunuc2 / unuc2
+
+Jan Bobrowski wrote a clean-room portable decompressor for UC2 v3
+archives between 2020 and 2021.  He released two products:
+
+- `libunuc2` (decompression library) under LGPL-3.0
+- `unuc2` (CLI tool) under GPL-3.0-or-later
+
+The upstream source is preserved in `original/unuc2-0.6/`.  The
+following files in this repository derive from Bobrowski's work and
+retain his original licence:
+
+| Current file                          | Upstream origin                          | Licence       |
+|---------------------------------------|------------------------------------------|---------------|
+| `lib/src/decompress.c`                | `original/unuc2-0.6/libunuc2.c`          | LGPL-3.0-only |
+| `lib/src/list.h`                      | `original/unuc2-0.6/list.h` (identical)  | LGPL-3.0-only |
+| `lib/src/super.bin`                   | `original/unuc2-0.6/super.bin` (identical) | data (de Vries) |
+| `lib/include/uc2/libuc2.h`            | `original/unuc2-0.6/libunuc2.h`          | LGPL-3.0-only |
+| `cli/src/main.c`                      | `original/unuc2-0.6/unuc2.c`             | GPL-3.0-or-later |
+| `cli/src/compat/compat_win32.c`       | `original/unuc2-0.6/compat/compat.c` (Win32 portions) | LGPL-3.0-only |
+| `cli/src/compat/compat_dos.c`         | derived from `compat/compat.c` (DOS adaptation by Valetov) | LGPL-3.0-only |
+
+Modifications by Valetov in 2026 are released under the same licence
+as the file's upstream origin (LGPL-3.0 stays LGPL-3.0; GPL-3.0 stays
+GPL-3.0).  No unilateral upgrade from LGPL to GPL has been applied to
+Bobrowski's work.
+
+`lib/src/super.bin` is the SuperMaster dictionary block from the 1992
+distribution.  It is data, not code: a fixed binary table used as a
+compression-context priming dictionary.  It is bit-identical to the
+file in Bobrowski's release and to the data extracted from de Vries's
+1992 binaries.
+
+## Layer 3: Eremey Valetov (2026), UC2 v3 revival
+
+The following files are new work by Valetov, originally authored for
+the UC2 v3 revival project, released under GPL-3.0-or-later:
+
+| File                                  | Function                                           |
+|---------------------------------------|----------------------------------------------------|
+| `lib/src/compress.c`                  | LZ77+Huffman compressor (inverse of decompress.c)  |
+| `lib/src/uc2_tables.c`                | Huffman delta-coding lookup tables                 |
+| `lib/src/uc2_internal.h`              | Shared compressor/decompressor types and constants |
+| `lib/src/uc2_cdc.c` + `.h`            | Content-defined chunking (Gear hash)               |
+| `lib/src/uc2_merkle.c` + `.h`         | Merkle DAG of deduplicated blocks                  |
+| `lib/src/uc2_blockstore.c` + `.h`     | Cross-archive content-addressable block store     |
+| `lib/src/uc2_simhash.c` + `.h`        | SimHash near-duplicate detection                   |
+| `lib/src/uc2_delta.c` + `.h`          | Binary delta compression                           |
+| `lib/src/uc2_rans.c` + `.h`           | rANS entropy coder (method 10)                     |
+| `lib/src/uc2_dict.c` + `.h`           | Dictionary metadata for cross-archive sharing      |
+| `lib/src/uc2_preprocess.c` + `.h`     | BCJ / BWT / delta-filter preprocessing             |
+| `lib/src/uc2_lz4.c` + `.h`            | LZ4 ultra-fast compression                         |
+| `lib/src/uc2_blake3.c` + `.h`         | BLAKE3 cryptographic hashing                       |
+| `lib/src/uc2_sha256.c` + `.h`         | SHA-256 (FIPS 180-4)                               |
+| `lib/src/uc2_ots.c` + `.h`            | OpenTimestamps proof parser, walker, trailer      |
+| `cli/src/compat/getopt.c`             | Minimal POSIX getopt for MSVC                      |
+| `cli/src/main.c` (post-`9525a81` additions) | OTS attach/extract/info, archive creation, scanning, benchmark | GPL-3.0-or-later (matches upstream `unuc2.c`) |
+
+The Phase 3-7 modules are independent implementations.  They reference
+the UC2 v3 archive format (which is a bitstream layout, not a
+copyrighted work) and use BLAKE3, SHA-256, LZ4, rANS, etc. from
+public-domain or self-authored reference implementations.  None of
+these modules link to or derive from Bobrowski's code beyond using
+shared header types declared in `uc2_internal.h`.
+
+## Relicensing rationale
+
+The composite project links Bobrowski's LGPL-3.0 library (`lib/`) into
+a GPL-3.0-or-later executable (`cli/`).  This combination is permitted
+by LGPL-3.0 sec. 4 (Combined Works): the LGPL library may be used in
+GPL-licensed work without requiring the library itself to be relicensed.
+
+No code has been moved from LGPL-3.0 to GPL-3.0 in this project.
+LGPL §3 permits a one-way upgrade from LGPL to GPL but exercising it
+is unnecessary here: the LGPL files remain LGPL, the GPL files remain
+GPL, and the combined work is distributable under GPL-3.0-or-later (as
+recorded in the project root `LICENSE` file).
+
+If a downstream user wishes to redistribute `lib/` standalone under
+LGPL-3.0, the LGPL-3.0 files are individually identifiable via their
+SPDX-License-Identifier headers.
+
+## SPDX policy
+
+All source files in `lib/` and `cli/` carry one of two SPDX
+identifiers near the top:
+
+- `SPDX-License-Identifier: LGPL-3.0-only` for files derived from
+  Bobrowski's libunuc2 / compat code.
+- `SPDX-License-Identifier: GPL-3.0-or-later` for `cli/src/main.c`
+  (matches Bobrowski's original GPL-3.0-or-later choice for the CLI
+  tool) and for all Valetov-authored Phase 2 through Phase 7 work.
+
+Original copyright lines authored by Bobrowski are preserved verbatim
+where present.  Where Valetov has added substantial new content to a
+Bobrowski-originated file (notably `cli/src/main.c` and
+`compat_dos.c`), an additional Valetov copyright line has been added
+without removing the original.
+
+The 2015 LGPL-3.0 release in `original/UC2_source/` and the 2020-2021
+release in `original/unuc2-0.6/` are preserved unchanged and are not
+subject to this policy: they retain whatever licence headers their
+authors shipped them with.
+
+## Audit checklist
+
+- [x] LGPL-3.0 release by Bezemer/de Vries preserved unchanged in
+      `original/UC2_source/`
+- [x] LGPL-3.0 / GPL-3.0 release by Bobrowski preserved unchanged in
+      `original/unuc2-0.6/`
+- [x] Per-file provenance table above
+- [x] SPDX-License-Identifier on every source file in `lib/` and `cli/`
+- [x] CREDITS.md attributes Bobrowski specifically for libunuc2-derived
+      files, not as generic "inspiration"
+- [x] LICENSE-HISTORY summary published as this file
+      (`docs/license-audit.md`)
+- [x] No silent LGPL-to-GPL upgrade: every Bobrowski-origin file
+      retains LGPL-3.0-only
+
+## References
+
+- LGPL-3.0 text: <https://www.gnu.org/licenses/lgpl-3.0.html>
+- GPL-3.0 text: see `LICENSE` in repository root
+- Bobrowski upstream: <http://torinak.com/~jb/unuc2/>
+- Bezemer 2015 release notes: `original/UC2_source/Read Me First.txt`
+- LGPL-3.0 sec. 3 (allowing one-way upgrade to GPL):
+  <https://www.gnu.org/licenses/lgpl-3.0.html#section3>
+- LGPL-3.0 sec. 4 (Combined Works): same document, sec. 4
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -0,0 +1,26 @@
+Quick Start
+===========
+
+Building
+--------
+
+Requires CMake >= 3.16 and a C99 compiler (GCC, Clang, or MSVC).
+
+.. code-block:: sh
+
+   cmake -B build
+   cmake --build build
+
+The binary is at ``build/cli/uc2``.
+
+Basic Usage
+-----------
+
+.. code-block:: sh
+
+   uc2 -w archive.uc2 file1 file2   # Create archive
+   uc2 archive.uc2                  # Extract all files
+   uc2 -l archive.uc2               # List contents
+   uc2 -t archive.uc2               # Test archive integrity
+   uc2 -d /tmp/out archive.uc2      # Extract to directory
+   uc2 -w -L 5 big.uc2 data/*      # Create with Ultra compression
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -0,0 +1,2 @@
+sphinx>=7.0
+furo
--- a/docs/roadmap.rst
+++ b/docs/roadmap.rst
@@ -0,0 +1,42 @@
+Roadmap
+=======
+
+The development roadmap is maintained in ``ROADMAP.md`` at the project
+root.  Key phases:
+
+1. **Decompression MVP**  --  Done. Portable decompressor, CLI tool,
+   CMake build system.
+
+2. **Original Compression Engine**  --  Done. LZ77+Huffman compressor
+   with custom Huffman trees, full backward compatibility, and UC2
+   personality (``-q`` for scripting).  Automated DOSBox-X round-trip
+   validates 4+5 files in both directions.
+
+3. **Modernized Master-Block Deduplication**  --  Done.
+   CDC with Gear hash, Merkle DAG with content addressing,
+   cross-archive block store, SimHash near-duplicate detection,
+   and delta compression.  All Phase 3 items complete.
+
+4. **Modern Compression Backends**  --  Done.  rANS entropy coder (method
+   10), zstd-style dictionary metadata, content-aware preprocessing
+   (BCJ, BWT, delta), LZ4 ultra-fast, benchmark mode (``uc2 -B``).
+
+5. **Quantum-Resistant Encryption**  --  CRYSTALS-Kyber + AES-256-GCM.
+
+6. **DOS / FreeDOS / Retro-Computing**  --  DJGPP toolchain, vintage
+   hardware support, self-extracting archives.
+
+7. **Cryptographic Integrity & Timestamping**  --  BLAKE3 hashing done
+   (pure C, 256-bit digests).  Remaining: integration into archive
+   blocks, OpenTimestamps.
+
+8. **Decentralized & Cloud Integration**  --  IPFS pinning,
+   content-addressable dedup, cloud archiving.
+
+9. **Zero-Knowledge Proofs**  --  Privacy-preserving archive verification.
+
+10. **Ecosystem Integrations**  --  libarchive plugin, streaming dedup
+    ingestion, file manager plugins.
+
+See the full roadmap: `ROADMAP.md on GitHub
+<https://github.com/evvaletov/uc2/blob/main/ROADMAP.md>`_.
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -0,0 +1,93 @@
+Command-Line Usage
+==================
+
+Synopsis
+--------
+
+.. code-block:: none
+
+   uc2 [options] archive.uc2 [patterns...]
+   uc2 -w [-L level] archive.uc2 files...
+
+Modes
+-----
+
+``uc2 archive.uc2``
+   Extract all files to the current directory.
+
+``uc2 -l archive.uc2``
+   List archive contents.
+
+``uc2 -t archive.uc2``
+   Test archive integrity (decompress and verify checksums without
+   writing files).
+
+``uc2 -p archive.uc2 filename``
+   Extract a file to stdout.
+
+``uc2 -w archive.uc2 files...``
+   Create a new archive from the given files.  The original LZ77+Huffman
+   algorithm is used.  Compression level defaults to 4 (Tight); use
+   ``-L`` to change it.
+
+   The archiver automatically groups similar files using content
+   fingerprinting: files sharing identical first 4096 bytes are assigned a
+   custom master block built from the largest file in the group.  This
+   pre-fills the LZ77 sliding window with shared content, improving
+   compression for collections of structurally similar files (e.g. log
+   rotations, versioned configs, same-format data files).  Files that
+   don't group (or are smaller than 1 KB) use the built-in 49 KB
+   SuperMaster dictionary.
+
+Options
+-------
+
+.. list-table::
+   :header-rows: 1
+   :widths: 15 85
+
+   * - Flag
+     - Description
+   * - ``-l``
+     - List archive contents
+   * - ``-t``
+     - Test archive integrity
+   * - ``-w``
+     - Create archive
+   * - ``-L n``
+     - Compression level: 2 = Fast, 3 = Normal, 4 = Tight (default),
+       5 = Ultra
+   * - ``-a``
+     - Include all file versions (not just latest)
+   * - ``-d path``
+     - Extract to specified directory
+   * - ``-f``
+     - Overwrite existing files
+   * - ``-p``
+     - Extract to stdout
+   * - ``-D``
+     - Skip directory metadata; ``-DD`` also skips file metadata
+   * - ``-T``
+     - Tab-separated output (for scripting)
+
+Pattern Matching
+----------------
+
+File patterns use glob syntax.  Only files matching the pattern are
+listed or extracted:
+
+.. code-block:: sh
+
+   uc2 -l archive.uc2 '*.txt'      # List only .txt files
+   uc2 archive.uc2 'src/*'          # Extract src/ subtree
+
+Exit Codes
+----------
+
+.. list-table::
+   :widths: 15 85
+
+   * - ``0``
+     - Success
+   * - ``1``
+     - Error (damaged archive, I/O failure, etc.)
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -1,6 +1,6 @@
 # libuc2 — UC2 decompression library

-set(LIBUC2_SOURCES src/decompress.c)
+set(LIBUC2_SOURCES src/decompress.c src/compress.c src/uc2_tables.c src/uc2_cdc.c src/uc2_merkle.c src/uc2_blockstore.c src/uc2_simhash.c src/uc2_delta.c src/uc2_rans.c src/uc2_dict.c src/uc2_preprocess.c src/uc2_lz4.c src/uc2_blake3.c src/uc2_sha256.c src/uc2_ots.c src/uc2_ingest.c)

 # Embed super.bin: use .S with .incbin on GCC/Clang, generated C array on MSVC
 if(MSVC)
--- a/lib/include/uc2/libuc2.h
+++ b/lib/include/uc2/libuc2.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: LGPL-3.0-only */
+
 #ifndef LIBUC2_H
 #define LIBUC2_H

@@ -83,6 +85,43 @@ UC2_API int uc2_extract(

 UC2_API const char *uc2_message(uc2_handle, int ret);

+/* Compress raw data into a UC2 bitstream (no archive framing).
+   level: 2=Fast, 3=Normal, 4=Tight(default), 5=Ultra.
+   read() should return bytes read (0 at EOF, <0 on error).
+   write() should return <0 on error.
+   Returns 0 on success, negative UC2_* error code on failure. */
+UC2_API int uc2_compress(
+	int level,
+	int (*read)(void *context, void *buf, unsigned len),
+	void *read_ctx,
+	int (*write)(void *context, const void *ptr, unsigned len),
+	void *write_ctx,
+	unsigned size,
+	unsigned short *checksum_out,
+	unsigned *compressed_size_out
+);
+
+/* Compress with a master-block dictionary prefix.
+   The master data pre-fills the LZ77 sliding window, allowing
+   back-references into the master for cross-file deduplication.
+   Set master=NULL, master_size=0 for no master (same as uc2_compress). */
+UC2_API int uc2_compress_ex(
+	int level,
+	const void *master, unsigned master_size,
+	int (*read)(void *context, void *buf, unsigned len),
+	void *read_ctx,
+	int (*write)(void *context, const void *ptr, unsigned len),
+	void *write_ctx,
+	unsigned size,
+	unsigned short *checksum_out,
+	unsigned *compressed_size_out
+);
+
+/* Decompress the built-in SuperMaster (49152 bytes).
+   buf must be at least 49152 bytes.
+   Returns 49152 on success, negative UC2_* error code on failure. */
+UC2_API int uc2_get_supermaster(void *buf, unsigned buf_size);
+
 struct uc2_io {
 	/* Read len bytes from the archive at offset pos into buf.
 	   Return number of bytes read, or less if eof.
--- a/lib/include/uc2/uc2_blake3.h
+++ b/lib/include/uc2/uc2_blake3.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* BLAKE3 cryptographic hashing for archive integrity.
+ *
+ * BLAKE3 is a fast cryptographic hash based on the Bao tree hashing
+ * mode and the BLAKE2s compression function.  It produces 256-bit
+ * (32-byte) digests suitable for content verification, integrity
+ * checking, and content-addressable storage.
+ *
+ * This is a simplified single-threaded implementation (~300 lines).
+ * For full BLAKE3 features (keyed hashing, KDF, XOF), see the
+ * reference implementation at github.com/BLAKE3-team/BLAKE3.
+ *
+ * Usage:
+ *   struct uc2_blake3 ctx;
+ *   uc2_blake3_init(&ctx);
+ *   uc2_blake3_update(&ctx, data, len);
+ *   uint8_t hash[32];
+ *   uc2_blake3_final(&ctx, hash);
+ *
+ *   // Or one-shot:
+ *   uc2_blake3_hash(data, len, hash);
+ */
+
+#ifndef UC2_BLAKE3_H
+#define UC2_BLAKE3_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define UC2_BLAKE3_OUT_LEN  32
+#define UC2_BLAKE3_BLOCK_LEN 64
+#define UC2_BLAKE3_CHUNK_LEN 1024
+
+struct uc2_blake3 {
+	uint32_t cv[8];        /* chaining value */
+	uint8_t buf[UC2_BLAKE3_BLOCK_LEN];
+	uint8_t buf_len;
+	uint64_t counter;
+	uint8_t flags;
+	/* Stack for tree hashing */
+	uint32_t cv_stack[8 * 54]; /* max tree depth */
+	uint8_t cv_stack_len;
+	uint64_t chunk_counter;
+	uint8_t blocks_compressed;
+};
+
+/* Initialize hasher. */
+void uc2_blake3_init(struct uc2_blake3 *ctx);
+
+/* Feed data to the hasher. */
+void uc2_blake3_update(struct uc2_blake3 *ctx, const void *data, size_t len);
+
+/* Finalize and produce hash. */
+void uc2_blake3_final(const struct uc2_blake3 *ctx, uint8_t out[UC2_BLAKE3_OUT_LEN]);
+
+/* One-shot hash. */
+void uc2_blake3_hash(const void *data, size_t len, uint8_t out[UC2_BLAKE3_OUT_LEN]);
+
+/* Compare two hashes (constant-time). Returns 1 if equal. */
+int uc2_blake3_equal(const uint8_t a[UC2_BLAKE3_OUT_LEN],
+                     const uint8_t b[UC2_BLAKE3_OUT_LEN]);
+
+#endif
--- a/lib/include/uc2/uc2_blockstore.h
+++ b/lib/include/uc2/uc2_blockstore.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Cross-archive block store for content-addressable deduplication.
+ *
+ * Stores unique CDC chunks indexed by 64-bit content hash.  Multiple
+ * archives can share blocks through the store, enabling cross-archive
+ * and cross-version dedup.
+ *
+ * The store is a directory of chunk files named by their hash.  A
+ * manifest maps (archive, file, chunk_index) -> chunk_hash, enabling
+ * reconstruction of any file from its chunk list.
+ *
+ * Usage:
+ *   struct uc2_blockstore bs;
+ *   uc2_blockstore_open(&bs, "/path/to/store");
+ *   uc2_blockstore_ingest(&bs, &merkle_tree, data, len);
+ *   // ... later, from a different archive:
+ *   int new_chunks = uc2_blockstore_ingest(&bs, &tree2, data2, len2);
+ *   // new_chunks < tree2.nchunks means dedup happened
+ *   uc2_blockstore_close(&bs);
+ */
+
+#ifndef UC2_BLOCKSTORE_H
+#define UC2_BLOCKSTORE_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include "uc2_merkle.h"
+
+/* Block store state. */
+struct uc2_blockstore {
+	char *path;        /* store directory path */
+	int nblocks;       /* number of unique blocks stored */
+	int64_t total_bytes; /* total bytes of unique block data */
+	int64_t saved_bytes; /* bytes saved by dedup */
+};
+
+/* Open or create a block store at the given directory path.
+ * Returns 0 on success, -1 on error. */
+int uc2_blockstore_open(struct uc2_blockstore *bs, const char *path);
+
+/* Ingest a file's chunks into the store.  Only stores chunks not
+ * already present (dedup).  Returns the number of NEW chunks stored
+ * (0 = fully deduplicated). */
+int uc2_blockstore_ingest(struct uc2_blockstore *bs,
+                          const struct uc2_merkle *tree,
+                          const uint8_t *data, size_t len);
+
+/* Check if a chunk exists in the store. */
+int uc2_blockstore_has(const struct uc2_blockstore *bs, uint64_t hash);
+
+/* Read a chunk from the store into buf (must be large enough).
+ * Returns chunk length, or -1 on error. */
+int uc2_blockstore_read(const struct uc2_blockstore *bs,
+                        uint64_t hash, uint8_t *buf, size_t buf_size);
+
+/* Get dedup statistics. */
+static inline int64_t uc2_blockstore_saved(const struct uc2_blockstore *bs)
+{
+	return bs->saved_bytes;
+}
+
+/* Close the block store (frees internal state, does not delete files). */
+void uc2_blockstore_close(struct uc2_blockstore *bs);
+
+#endif
--- a/lib/include/uc2/uc2_cdc.h
+++ b/lib/include/uc2/uc2_cdc.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Content-defined chunking (CDC) for UC2 deduplication.
+ *
+ * Uses the Gear rolling hash for fast, content-aware chunk boundary
+ * detection.  Gear hash is a simple multiplicative hash that XORs each
+ * byte with a pre-computed random table, giving O(1) per-byte updates.
+ *
+ * Typical usage:
+ *   struct uc2_chunker c;
+ *   uc2_chunker_init(&c, 13);  // avg chunk ~8KB (2^13)
+ *   while (uc2_chunker_next(&c, data, len, &chunk_off, &chunk_len))
+ *       process(data + chunk_off, chunk_len);
+ */
+
+#ifndef UC2_CDC_H
+#define UC2_CDC_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Gear hash: fast rolling hash with O(1) per-byte update. */
+uint32_t uc2_gear_hash(const uint8_t *data, size_t len);
+
+/* CDC chunker state. */
+struct uc2_chunker {
+	uint32_t mask;       /* boundary mask: (1 << bits) - 1 */
+	size_t   min_chunk;  /* minimum chunk size */
+	size_t   max_chunk;  /* maximum chunk size */
+	size_t   pos;        /* current position in data */
+};
+
+/* Initialize chunker.
+ *   bits:      target chunk size exponent (avg chunk = 2^bits bytes).
+ *              Recommended: 13 (8KB), 14 (16KB), or 15 (32KB).
+ *   min_chunk: minimum chunk size (0 = bits-2 default)
+ *   max_chunk: maximum chunk size (0 = bits+2 default) */
+void uc2_chunker_init(struct uc2_chunker *c, int bits,
+                      size_t min_chunk, size_t max_chunk);
+
+/* Find the next chunk boundary in [data, data+len).
+ * Returns 1 and sets *chunk_len if a chunk was found.
+ * Returns 0 when all data has been consumed (final chunk).
+ * Call repeatedly until it returns 0. */
+int uc2_chunker_next(struct uc2_chunker *c,
+                     const uint8_t *data, size_t len,
+                     size_t *chunk_off, size_t *chunk_len);
+
+/* Reset chunker for a new data stream. */
+void uc2_chunker_reset(struct uc2_chunker *c);
+
+/* FNV-1a hash for chunk content addressing. */
+uint32_t uc2_fnv1a(const uint8_t *data, size_t len);
+
+#endif
--- a/lib/include/uc2/uc2_delta.h
+++ b/lib/include/uc2/uc2_delta.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Delta compression for file versioning.
+ *
+ * Computes a compact binary delta between a source (old) and target
+ * (new) file.  The delta encodes copy-from-source and insert-new-data
+ * instructions, similar to xdelta/bsdiff.
+ *
+ * The delta can be applied to reconstruct the target from the source.
+ * Combined with master blocks, this enables version-level dedup:
+ * store the first version as a master, subsequent versions as deltas.
+ *
+ * Usage:
+ *   uint8_t *delta; size_t delta_len;
+ *   uc2_delta_encode(src, src_len, tgt, tgt_len, &delta, &delta_len);
+ *   uint8_t *reconstructed; size_t recon_len;
+ *   uc2_delta_apply(src, src_len, delta, delta_len, &reconstructed, &recon_len);
+ *   // reconstructed == tgt
+ *   free(delta); free(reconstructed);
+ */
+
+#ifndef UC2_DELTA_H
+#define UC2_DELTA_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Encode a delta from source to target.
+ * Allocates *out_delta (caller must free).
+ * Returns 0 on success, -1 on error. */
+int uc2_delta_encode(const uint8_t *src, size_t src_len,
+                     const uint8_t *tgt, size_t tgt_len,
+                     uint8_t **out_delta, size_t *out_delta_len);
+
+/* Apply a delta to source to reconstruct target.
+ * Allocates *out_tgt (caller must free).
+ * Returns 0 on success, -1 on error. */
+int uc2_delta_apply(const uint8_t *src, size_t src_len,
+                    const uint8_t *delta, size_t delta_len,
+                    uint8_t **out_tgt, size_t *out_tgt_len);
+
+/* Delta format:
+ *   Header: "UC2D" (4 bytes) + target_len (4 bytes LE)
+ *   Instructions:
+ *     COPY:   0x01 + offset(4 LE) + length(4 LE)  — copy from source
+ *     INSERT: 0x02 + length(4 LE) + data[length]  — insert new bytes
+ *     END:    0x00                                 — end of delta
+ */
+
+#endif
--- a/lib/include/uc2/uc2_dict.h
+++ b/lib/include/uc2/uc2_dict.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Dictionary management for zstd-inspired dictionary compression.
+ *
+ * Formalizes UC2's master blocks as proper dictionaries with content
+ * hashes (IDs), integrity checksums, and cross-archive sharing.
+ * Combined with the block store (uc2_blockstore.h), this enables
+ * distributed dedup: archives in different locations can reference
+ * shared dictionaries by content hash.
+ *
+ * Usage:
+ *   struct uc2_dict dict;
+ *   uc2_dict_create(&dict, master_data, master_size);
+ *   uint64_t id = uc2_dict_id(&dict);
+ *   // Store/share/reference by id...
+ *   uc2_dict_verify(&dict);  // check integrity
+ *   uc2_dict_free(&dict);
+ */
+
+#ifndef UC2_DICT_H
+#define UC2_DICT_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Dictionary header (serialized in archive or block store). */
+#define UC2_DICT_MAGIC 0x44324355  /* "UC2D" */
+
+struct uc2_dict {
+	uint64_t id;          /* content hash (FNV-1a 64-bit of data) */
+	uint32_t checksum;    /* FNV-1a 32-bit integrity check */
+	uint32_t size;        /* dictionary data size */
+	uint8_t *data;        /* dictionary content (owned) */
+};
+
+/* Serialized dictionary header (24 bytes, stored in archive/block store). */
+struct uc2_dict_header {
+	uint32_t magic;       /* UC2_DICT_MAGIC */
+	uint64_t id;          /* content hash */
+	uint32_t checksum;    /* integrity */
+	uint32_t size;        /* data size following header */
+	uint32_t reserved;    /* future use */
+};
+
+/* Create a dictionary from raw master data.
+ * Computes id (content hash) and checksum.  Copies data (caller
+ * can free the original after this call). */
+int uc2_dict_create(struct uc2_dict *dict, const uint8_t *data, size_t size);
+
+/* Get dictionary ID (content hash for cross-archive sharing). */
+static inline uint64_t uc2_dict_id(const struct uc2_dict *dict)
+{
+	return dict->id;
+}
+
+/* Verify dictionary integrity (returns 1 if valid, 0 if corrupted). */
+int uc2_dict_verify(const struct uc2_dict *dict);
+
+/* Serialize dictionary to a buffer (header + data).
+ * Allocates *out (caller must free).  Returns total size. */
+size_t uc2_dict_serialize(const struct uc2_dict *dict, uint8_t **out);
+
+/* Deserialize dictionary from a buffer.
+ * Returns 0 on success, -1 on error. */
+int uc2_dict_deserialize(struct uc2_dict *dict, const uint8_t *buf, size_t len);
+
+/* Check if two dictionaries have the same content (by ID). */
+static inline int uc2_dict_match(const struct uc2_dict *a, const struct uc2_dict *b)
+{
+	return a->id == b->id;
+}
+
+/* Free dictionary data. */
+void uc2_dict_free(struct uc2_dict *dict);
+
+#endif
--- a/lib/include/uc2/uc2_ingest.h
+++ b/lib/include/uc2/uc2_ingest.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Streaming dedup ingest for UC2.
+ *
+ * uc2 --ingest <archive> reads a byte stream (typically stdin from
+ * tar / rsync / cp -a), splits it via CDC, deduplicates chunks, and
+ * writes a self-contained archive file.  uc2 --ingest-restore <archive>
+ * reverses this.
+ *
+ * Two on-disk formats are supported:
+ *
+ *   v1 (legacy): manifest in <archive>, chunk data in a sidecar
+ *   blockstore directory at <archive>.blocks/.  Cross-archive dedup
+ *   works through shared blockstore directories.  Read-only now;
+ *   writer defaults to v2.
+ *
+ *   v2 (default): archive is self-contained -- chunks are stored in
+ *   an embedded pool inside the archive itself.  No sidecar
+ *   directory.  Each manifest entry carries its chunk's absolute
+ *   file offset; deduplicated chunks share a single offset.
+ *
+ * Manifest layouts (all little-endian):
+ *
+ *   v1: +0   8B   magic "UC2INGST"
+ *       +8   1B   version (1)
+ *       +9   1B   cdc_bits
+ *      +10   2B   reserved
+ *      +12   4B   chunk_count
+ *      +16   ...  chunk_count * 12B:  8B hash, 4B length
+ *
+ *   v2: +0   8B   magic "UC2INGST"
+ *       +8   1B   version (2)
+ *       +9   1B   cdc_bits
+ *      +10   2B   reserved
+ *      +12   4B   chunk_count
+ *      +16   ...  chunk_count * 16B:  8B hash, 4B length, 4B offset
+ *      ...  chunk pool: unique chunks back-to-back at recorded offsets
+ *
+ * Limitations:
+ *   - The whole stream is buffered in memory before chunking.  Suits
+ *     CDC's locality-of-boundary requirement and is fine for streams
+ *     up to a few GB.  True streaming is a future revision.
+ *   - The format is not yet a UC2 v3 archive consumable by uc2 -x /
+ *     -l; integrating with the master-block layout is a follow-up.
+ */
+
+#ifndef UC2_INGEST_H
+#define UC2_INGEST_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+
+struct uc2_ingest_stats {
+	uint64_t bytes_in;       /* input stream length */
+	int      chunks_total;   /* total chunks in input */
+	int      chunks_new;     /* chunks newly stored */
+	int      chunks_dedup;   /* chunks already in the block store */
+	uint64_t bytes_stored;   /* bytes physically written this call */
+	uint64_t bytes_saved;    /* bytes saved by dedup */
+};
+
+/* Ingest len bytes of data into archive_path.  The block store lives
+ * at <archive_path>.blocks/.  cdc_bits selects the average chunk
+ * size (13 = 8 KiB; 0 picks a sensible default). */
+int uc2_ingest_write(const char *archive_path,
+                     const uint8_t *data, size_t len,
+                     int cdc_bits,
+                     struct uc2_ingest_stats *stats);
+
+/* Restore the byte stream described by an ingest manifest.  Reads
+ * chunks from <archive_path>.blocks/ and writes them in order to out. */
+int uc2_ingest_restore(const char *archive_path, FILE *out);
+
+#endif
--- a/lib/include/uc2/uc2_lz4.h
+++ b/lib/include/uc2/uc2_lz4.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* LZ4-compatible ultra-fast compression.
+ *
+ * Minimal LZ4-like compressor optimized for speed over ratio.
+ * Uses a single-probe hash table (no chains) for O(1) match finding.
+ * Suitable for real-time or low-resource scenarios where decompression
+ * speed is critical and compression ratio is secondary.
+ *
+ * Format: sequence of literal/match tokens:
+ *   [token] [literal_length_ext?] [literals] [offset:16LE] [match_length_ext?]
+ *   token = (literal_len:4 << 4) | match_len:4
+ *   If literal_len == 15: read additional bytes until < 255
+ *   If match_len == 15: read additional bytes until < 255
+ *   Match lengths are +4 (minimum match = 4)
+ *
+ * Usage:
+ *   size_t bound = uc2_lz4_bound(src_len);
+ *   uint8_t *dst = malloc(bound);
+ *   size_t clen = uc2_lz4_compress(src, src_len, dst, bound);
+ *   size_t dlen = uc2_lz4_decompress(dst, clen, out, out_cap);
+ */
+
+#ifndef UC2_LZ4_H
+#define UC2_LZ4_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Maximum compressed size for a given input length. */
+static inline size_t uc2_lz4_bound(size_t src_len)
+{
+	return src_len + src_len / 255 + 16;
+}
+
+/* Compress src into dst.  Returns compressed size, or 0 on error.
+ * dst must be at least uc2_lz4_bound(src_len) bytes. */
+size_t uc2_lz4_compress(const uint8_t *src, size_t src_len,
+                        uint8_t *dst, size_t dst_cap);
+
+/* Decompress src into dst.  Returns decompressed size, or 0 on error.
+ * dst must be large enough for the original data. */
+size_t uc2_lz4_decompress(const uint8_t *src, size_t src_len,
+                          uint8_t *dst, size_t dst_cap);
+
+#endif
--- a/lib/include/uc2/uc2_merkle.h
+++ b/lib/include/uc2/uc2_merkle.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Merkle DAG for content-addressable deduplication.
+ *
+ * Builds a Merkle tree from CDC chunks: each file is represented as a
+ * list of chunk hashes.  The file's root hash is derived from the
+ * concatenated chunk hashes, enabling structural comparison.
+ *
+ * Usage:
+ *   struct uc2_merkle tree;
+ *   uc2_merkle_build(&tree, data, len, 13);
+ *   uint64_t root = uc2_merkle_root(&tree);
+ *   int shared = uc2_merkle_common(&tree_a, &tree_b);
+ *   uc2_merkle_free(&tree);
+ */
+
+#ifndef UC2_MERKLE_H
+#define UC2_MERKLE_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* A chunk in the Merkle tree. */
+struct uc2_chunk {
+	uint64_t hash;     /* content hash of this chunk */
+	uint32_t offset;   /* offset within the file */
+	uint32_t length;   /* chunk length in bytes */
+};
+
+/* Merkle tree for one file. */
+struct uc2_merkle {
+	struct uc2_chunk *chunks;
+	int nchunks;
+	int capacity;
+	uint64_t root;     /* root hash (hash of chunk hash list) */
+};
+
+/* Build a Merkle tree from file data.
+ *   tree:  output tree (caller must call uc2_merkle_free later)
+ *   data:  file content
+ *   len:   file length
+ *   bits:  CDC chunk size exponent (13 = avg 8KB) */
+void uc2_merkle_build(struct uc2_merkle *tree,
+                      const uint8_t *data, size_t len, int bits);
+
+/* Get the root hash of a Merkle tree. */
+static inline uint64_t uc2_merkle_root(const struct uc2_merkle *tree)
+{
+	return tree->root;
+}
+
+/* Count chunks shared between two Merkle trees (by hash). */
+int uc2_merkle_common(const struct uc2_merkle *a, const struct uc2_merkle *b);
+
+/* Compute the fraction of bytes in tree A covered by shared chunks with B.
+ * Returns 0.0 (no overlap) to 1.0 (identical content). */
+double uc2_merkle_similarity(const struct uc2_merkle *a,
+                             const struct uc2_merkle *b);
+
+/* Free a Merkle tree's chunk array. */
+void uc2_merkle_free(struct uc2_merkle *tree);
+
+/* 64-bit content hash (FNV-1a 64-bit). */
+uint64_t uc2_hash64(const uint8_t *data, size_t len);
+
+#endif
--- a/lib/include/uc2/uc2_ots.h
+++ b/lib/include/uc2/uc2_ots.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* OpenTimestamps integration.
+ *
+ * UC2 stores an OpenTimestamps proof in a magic-bracketed sidecar
+ * trailer appended after the regular UC2 archive bytes.  The trailer
+ * does not affect compatibility with the original UC2 Pro reader,
+ * which uses the front header's recorded length.
+ *
+ * The proof itself is the standard `.ots` binary: a 31-byte header
+ * magic + version + file-hash op + leaf digest + serialized timestamp.
+ * Callers can extract the proof verbatim and run the standard
+ * `ots verify` tool on it.
+ *
+ * Local verification covers structural validity and the calendar-path
+ * subset of opcodes (APPEND, PREPEND, SHA256).  Proofs that use other
+ * crypto ops (SHA1, RIPEMD160, KECCAK256) are accepted as structurally
+ * valid but reported as not locally cryptographically verified;
+ * the standard `ots verify` should be used for full validation. */
+
+#ifndef UC2_OTS_H
+#define UC2_OTS_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* OTS opcodes. */
+enum {
+	UC2_OTS_OP_APPEND     = 0xf0, /* binary: append varbytes operand */
+	UC2_OTS_OP_PREPEND    = 0xf1, /* binary: prepend varbytes operand */
+	UC2_OTS_OP_REVERSE    = 0xf2, /* unary, deprecated */
+	UC2_OTS_OP_HEXLIFY    = 0xf3, /* unary */
+	UC2_OTS_OP_SHA1       = 0x02, /* unary */
+	UC2_OTS_OP_RIPEMD160  = 0x03, /* unary */
+	UC2_OTS_OP_SHA256     = 0x08, /* unary, file-hash op */
+	UC2_OTS_OP_KECCAK256  = 0x67, /* unary */
+	UC2_OTS_BRANCH        = 0xff,
+	UC2_OTS_ATTESTATION   = 0x00
+};
+
+#define UC2_OTS_HEADER_MAGIC \
+	"\x00OpenTimestamps\x00\x00Proof\x00\xbf\x89\xe2\xe8\x84\xe8\x92\x94"
+#define UC2_OTS_HEADER_MAGIC_LEN 31
+#define UC2_OTS_VERSION 0x01
+
+/* Attestation tags (8 bytes each). */
+#define UC2_OTS_TAG_PENDING  "\x83\xdf\xe3\x0d\x2e\xf9\x0c\x8e"
+#define UC2_OTS_TAG_BITCOIN  "\x05\x88\x96\x0d\x73\xd7\x19\x01"
+#define UC2_OTS_TAG_LITECOIN "\x06\x86\x9a\x0d\x73\xd7\x1b\x45"
+#define UC2_OTS_TAG_LEN 8
+
+/* Hard limits to bound parser cost on hostile input. */
+#define UC2_OTS_MAX_DIGEST_LEN  64
+#define UC2_OTS_MAX_VARBYTES    8192
+#define UC2_OTS_MAX_DEPTH       32
+#define UC2_OTS_MAX_VARINT      0xffffffffu
+
+/* Error codes. */
+enum {
+	UC2_OTS_OK              =  0,
+	UC2_OTS_ERR_TRUNCATED   = -1,
+	UC2_OTS_ERR_NONCANONICAL= -2,
+	UC2_OTS_ERR_OVERFLOW    = -3,
+	UC2_OTS_ERR_BAD_MAGIC   = -4,
+	UC2_OTS_ERR_BAD_VERSION = -5,
+	UC2_OTS_ERR_BAD_HASH_OP = -6,
+	UC2_OTS_ERR_DEPTH       = -7,
+	UC2_OTS_ERR_TOO_LARGE   = -8,
+	UC2_OTS_ERR_BAD_OP      = -9
+};
+
+/* Verification result reported by uc2_ots_walk. */
+enum {
+	UC2_OTS_RESULT_VERIFIED       = 1, /* leaf reaches all attestations via supported ops only */
+	UC2_OTS_RESULT_STRUCTURAL     = 2, /* parses cleanly but contains unsupported ops */
+	UC2_OTS_RESULT_LEAF_MISMATCH  = 3  /* shape OK but leaf digest doesn't match input */
+};
+
+/* Attestation summary callback.  Called once per attestation reached.
+ * `digest` is the digest at the leaf where the attestation was emitted.
+ * Return non-zero to abort the walk.
+ *
+ * Note: the digest is only meaningful when uc2_ots_walk returns
+ * UC2_OTS_RESULT_VERIFIED.  When the walker returns
+ * UC2_OTS_RESULT_STRUCTURAL the proof contains unsupported unary ops
+ * (SHA1, RIPEMD160, KECCAK256, REVERSE, HEXLIFY) which leave the digest
+ * unchanged for structural traversal; the digest passed to the callback
+ * does not represent the cryptographic state at that leaf. */
+typedef int (*uc2_ots_attest_cb)(void *ctx,
+                                 const uint8_t *tag /* 8 bytes */,
+                                 const uint8_t *payload, size_t payload_len,
+                                 const uint8_t *digest, size_t digest_len);
+
+/* OTS varint codec.  *out_value is set on success; *consumed is the
+ * number of input bytes read. */
+int uc2_ots_varint_decode(const uint8_t *in, size_t in_len,
+                          uint64_t *out_value, size_t *consumed);
+size_t uc2_ots_varint_encode(uint64_t value, uint8_t out[10]);
+
+/* Parse the .ots file envelope (header magic + version + file-hash op +
+ * leaf digest + timestamp body).  Sets out_* pointers into the input
+ * buffer; no allocation. */
+int uc2_ots_parse_file(const uint8_t *file, size_t file_len,
+                       uint8_t *out_hash_op,
+                       const uint8_t **out_leaf_digest,
+                       size_t *out_leaf_digest_len,
+                       const uint8_t **out_body,
+                       size_t *out_body_len);
+
+/* Build a .ots file from a leaf digest and a serialized timestamp body.
+ * Returns total bytes written, or a negative error code. */
+int uc2_ots_serialize_file(uint8_t hash_op,
+                           const uint8_t *leaf_digest, size_t leaf_digest_len,
+                           const uint8_t *body, size_t body_len,
+                           uint8_t *out, size_t out_cap);
+
+/* Walk a serialized timestamp body from `leaf_digest`, applying ops and
+ * invoking `cb` for each attestation reached.  Returns one of
+ * UC2_OTS_RESULT_* on structural success, or a negative error code. */
+int uc2_ots_walk(const uint8_t *body, size_t body_len,
+                 const uint8_t *leaf_digest, size_t leaf_digest_len,
+                 uc2_ots_attest_cb cb, void *ctx);
+
+/* UC2 OTS trailer.
+ *
+ * Layout (all integers little-endian, 32-bit unsigned):
+ *
+ *   [archive bytes ...]
+ *   "UC2-OTS\0"     (8 bytes, front magic)
+ *   u32  version     (= 1)
+ *   u32  archive_len (length of preceding archive bytes)
+ *   u32  proof_len
+ *   bytes  proof    (proof_len bytes, raw .ots file)
+ *   u32  proof_len   (duplicate, for reverse-scan)
+ *   "UC2-OTS\0"     (8 bytes, back magic)
+ */
+
+#define UC2_OTS_TRAILER_MAGIC      "UC2-OTS\0"
+#define UC2_OTS_TRAILER_MAGIC_LEN  8
+#define UC2_OTS_TRAILER_VERSION    1u
+#define UC2_OTS_TRAILER_HEAD_LEN   (UC2_OTS_TRAILER_MAGIC_LEN + 4 + 4 + 4)
+#define UC2_OTS_TRAILER_TAIL_LEN   (4 + UC2_OTS_TRAILER_MAGIC_LEN)
+#define UC2_OTS_TRAILER_OVERHEAD   (UC2_OTS_TRAILER_HEAD_LEN + UC2_OTS_TRAILER_TAIL_LEN)
+#define UC2_OTS_TRAILER_MAX_PROOF  (1u << 20)
+
+/* Build a trailer for an existing archive of length archive_len.
+ * Writes [front magic | version | archive_len | proof_len | proof | proof_len | back magic]
+ * to out.  Returns total bytes written, or negative on error. */
+int uc2_ots_trailer_build(uint32_t archive_len,
+                          const uint8_t *proof, size_t proof_len,
+                          uint8_t *out, size_t out_cap);
+
+/* Read a trailer from the end of a file image.  On success sets
+ *   *out_archive_len = length of preceding archive (the SHA-256 region)
+ *   *out_proof, *out_proof_len = pointer/length of proof inside `file`
+ * Returns:
+ *   UC2_OTS_OK if a well-formed trailer is present,
+ *   1 if no trailer (back magic absent),
+ *   negative error code if the back magic is present but the trailer is malformed. */
+int uc2_ots_trailer_parse(const uint8_t *file, size_t file_len,
+                          uint32_t *out_archive_len,
+                          const uint8_t **out_proof, size_t *out_proof_len);
+
+/* Convenience: get a human-readable name for a known attestation tag,
+ * or NULL if unknown. */
+const char *uc2_ots_attest_name(const uint8_t tag[UC2_OTS_TAG_LEN]);
+
+#endif
--- a/lib/include/uc2/uc2_preprocess.h
+++ b/lib/include/uc2/uc2_preprocess.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Content-aware preprocessing filters for improved compression.
+ *
+ * These transforms are applied BEFORE compression to expose redundancy
+ * that LZ77+entropy coding can exploit more efficiently.  Each filter
+ * is reversible (apply/revert) and content-type specific.
+ *
+ * Filters:
+ *   BCJ  — x86 branch/call/jump address normalization (E8/E9 transform)
+ *   BWT  — Burrows-Wheeler transform for text (groups similar contexts)
+ *   Delta — byte-wise delta encoding for structured/tabular data
+ */
+
+#ifndef UC2_PREPROCESS_H
+#define UC2_PREPROCESS_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* --- BCJ (Branch/Call/Jump) filter for x86 executables --- */
+
+/* Convert relative x86 CALL/JMP addresses to absolute.
+ * This makes the same function called from different locations produce
+ * identical byte sequences, improving LZ77 matching.
+ * Operates in-place.  Returns 0 on success. */
+int uc2_bcj_apply(uint8_t *data, size_t len);
+
+/* Revert BCJ transform (absolute → relative). */
+int uc2_bcj_revert(uint8_t *data, size_t len);
+
+/* --- BWT (Burrows-Wheeler Transform) for text --- */
+
+/* Apply BWT to data.  Allocates *out (caller must free).
+ * Sets *primary_index to the BWT primary index (needed for revert).
+ * Returns 0 on success. */
+int uc2_bwt_apply(const uint8_t *data, size_t len,
+                  uint8_t **out, uint32_t *primary_index);
+
+/* Revert BWT.  Allocates *out (caller must free).
+ * Returns 0 on success. */
+int uc2_bwt_revert(const uint8_t *data, size_t len,
+                   uint32_t primary_index, uint8_t **out);
+
+/* --- Delta filter for structured data --- */
+
+/* Apply byte-wise delta encoding (each byte = current - previous).
+ * Operates in-place.  Stride controls the delta distance (1 = adjacent
+ * bytes, 2 = every other byte, etc.).  Stride 1 is best for sequential
+ * data; stride 2+ for interleaved multi-channel data. */
+void uc2_delta_filter_apply(uint8_t *data, size_t len, int stride);
+
+/* Revert byte-wise delta encoding.  Operates in-place. */
+void uc2_delta_filter_revert(uint8_t *data, size_t len, int stride);
+
+/* --- Content detection --- */
+
+/* Detect likely content type for automatic filter selection.
+ * Returns one of the UC2_CONTENT_* constants. */
+#define UC2_CONTENT_BINARY  0  /* generic binary / unknown */
+#define UC2_CONTENT_TEXT    1  /* text (high ASCII printable ratio) */
+#define UC2_CONTENT_X86     2  /* x86 executable (MZ/PE/ELF header) */
+#define UC2_CONTENT_STRUCT  3  /* structured/tabular (regular patterns) */
+
+int uc2_detect_content(const uint8_t *data, size_t len);
+
+#endif
--- a/lib/include/uc2/uc2_rans.h
+++ b/lib/include/uc2/uc2_rans.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* rANS (range Asymmetric Numeral Systems) entropy coder.
+ *
+ * Drop-in replacement for Huffman coding with ~5-15% better compression
+ * on skewed distributions.  Uses table-based rANS with 32-bit state
+ * and frequencies normalized to a power of 2.
+ *
+ * Usage:
+ *   struct uc2_rans_enc enc;
+ *   uc2_rans_enc_init(&enc, freqs, nsym);
+ *   for each symbol: uc2_rans_encode(&enc, sym, &out_buf, &out_pos);
+ *   uc2_rans_enc_flush(&enc, &out_buf, &out_pos);
+ *
+ *   struct uc2_rans_dec dec;
+ *   uc2_rans_dec_init(&dec, freqs, nsym, in_buf, in_len);
+ *   for each symbol: int sym = uc2_rans_decode(&dec);
+ */
+
+#ifndef UC2_RANS_H
+#define UC2_RANS_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Frequency table precision: frequencies sum to 1 << PROB_BITS */
+#define UC2_RANS_PROB_BITS 12
+#define UC2_RANS_PROB_SCALE (1 << UC2_RANS_PROB_BITS)
+
+/* Maximum symbols supported */
+#define UC2_RANS_MAX_SYMS 344
+
+/* Normalized frequency table. */
+struct uc2_rans_table {
+	uint16_t freq[UC2_RANS_MAX_SYMS];     /* normalized frequencies */
+	uint16_t cumfreq[UC2_RANS_MAX_SYMS];  /* cumulative frequencies */
+	int nsym;
+};
+
+/* Build normalized frequency table from raw counts.
+ * Frequencies are scaled to sum to UC2_RANS_PROB_SCALE. */
+void uc2_rans_build_table(struct uc2_rans_table *tab,
+                          const uint32_t *raw_freq, int nsym);
+
+/* --- Encoder --- */
+
+struct uc2_rans_enc {
+	uint32_t state;
+	const struct uc2_rans_table *tab;
+	/* Reverse buffer: rANS encodes in reverse order */
+	uint8_t *rev_buf;
+	size_t rev_pos;
+	size_t rev_cap;
+};
+
+/* Initialize encoder. */
+void uc2_rans_enc_init(struct uc2_rans_enc *enc,
+                       const struct uc2_rans_table *tab);
+
+/* Encode one symbol.  Symbols must be encoded in REVERSE order
+ * (last symbol first).  Use uc2_rans_enc_flush to finalize. */
+void uc2_rans_encode(struct uc2_rans_enc *enc, int sym);
+
+/* Finalize encoding: write state and return the compressed data.
+ * Caller must free *out_data. Returns compressed size. */
+size_t uc2_rans_enc_finish(struct uc2_rans_enc *enc,
+                           uint8_t **out_data);
+
+/* Free encoder resources. */
+void uc2_rans_enc_free(struct uc2_rans_enc *enc);
+
+/* --- Decoder --- */
+
+struct uc2_rans_dec {
+	uint32_t state;
+	const struct uc2_rans_table *tab;
+	const uint8_t *data;
+	size_t pos;
+	size_t len;
+	/* Reverse lookup: cumfreq → symbol (for fast decoding) */
+	uint16_t lookup[UC2_RANS_PROB_SCALE];
+};
+
+/* Initialize decoder from compressed data. */
+void uc2_rans_dec_init(struct uc2_rans_dec *dec,
+                       const struct uc2_rans_table *tab,
+                       const uint8_t *data, size_t len);
+
+/* Decode one symbol. */
+int uc2_rans_decode(struct uc2_rans_dec *dec);
+
+#endif
--- a/lib/include/uc2/uc2_sha256.h
+++ b/lib/include/uc2/uc2_sha256.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* SHA-256 (FIPS 180-4) -- pure C implementation.
+ *
+ * Used by the OpenTimestamps integration; calendars accept SHA-256
+ * digests as proof leaves. */
+
+#ifndef UC2_SHA256_H
+#define UC2_SHA256_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define UC2_SHA256_OUT_LEN 32
+#define UC2_SHA256_BLOCK_LEN 64
+
+struct uc2_sha256 {
+	uint32_t state[8];
+	uint64_t bitcount;
+	uint8_t buf[UC2_SHA256_BLOCK_LEN];
+	size_t buf_len;
+};
+
+void uc2_sha256_init(struct uc2_sha256 *ctx);
+void uc2_sha256_update(struct uc2_sha256 *ctx, const void *data, size_t len);
+void uc2_sha256_final(struct uc2_sha256 *ctx, uint8_t out[UC2_SHA256_OUT_LEN]);
+void uc2_sha256_hash(const void *data, size_t len, uint8_t out[UC2_SHA256_OUT_LEN]);
+
+#endif
--- a/lib/include/uc2/uc2_simhash.h
+++ b/lib/include/uc2/uc2_simhash.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Near-duplicate detection via SimHash.
+ *
+ * SimHash produces a fixed-size fingerprint where similar documents
+ * have fingerprints with small Hamming distance.  Two files are
+ * "near-duplicates" if their SimHash fingerprints differ in fewer
+ * than a threshold number of bits.
+ *
+ * This detects patched executables, slightly edited documents, and
+ * minor revisions — cases where CDC chunks might not align but the
+ * overall content is structurally similar.
+ *
+ * Usage:
+ *   uint64_t h1 = uc2_simhash(data1, len1);
+ *   uint64_t h2 = uc2_simhash(data2, len2);
+ *   int dist = uc2_hamming(h1, h2);
+ *   if (dist <= 10) // near-duplicates
+ */
+
+#ifndef UC2_SIMHASH_H
+#define UC2_SIMHASH_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Compute a 64-bit SimHash fingerprint.
+ * Uses 4-byte shingles hashed with FNV-1a, accumulated into a
+ * 64-bit vector where each bit is the majority vote of all
+ * shingle hash bits. */
+uint64_t uc2_simhash(const uint8_t *data, size_t len);
+
+/* Hamming distance between two SimHash fingerprints (0-64). */
+int uc2_hamming(uint64_t a, uint64_t b);
+
+/* Check if two fingerprints are near-duplicates.
+ * threshold: max Hamming distance (recommended: 6-10 for text,
+ * 3-6 for binary). */
+static inline int uc2_is_near_dup(uint64_t a, uint64_t b, int threshold)
+{
+	return uc2_hamming(a, b) <= threshold;
+}
+
+#endif
--- a/lib/src/compress.c
+++ b/lib/src/compress.c
--- a/lib/src/decompress.c
+++ b/lib/src/decompress.c
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: LGPL-3.0-only */
+
 /* UltraCompressor II decompression library.
   Copyright © Jan Bobrowski 2020, 2021
   torinak.com/~jb/unuc2/
@@ -11,11 +13,13 @@
 */

 #include <stdint.h>
+#include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 #include <assert.h>

 #include "uc2/libuc2.h"
+#include "uc2/uc2_rans.h"

 #if !defined NDEBUG && !defined NDIAG
 #include <stdio.h>
@@ -308,6 +312,7 @@ struct bits {
 	u32 bits;
 	unsigned have_bits;
 	unsigned head, tail;
+	int err;
 	struct reader *rd;
 	u8 buffer[4 << 10];
 };
@@ -318,6 +323,7 @@ static int bits_init(struct bits *bi, struct reader *rd)
 	bi->tail = 0;
 	bi->bits = 0;
 	bi->have_bits = 0;
+	bi->err = 0;
 	bi->rd = rd;
 	return 0;
 }
@@ -331,16 +337,23 @@ static void bits_skip(struct bits *bi, unsigned n)
 static int bits_feed(struct bits *bi, unsigned n)
 {
 	assert(n <= 16);
+	if (bi->err)
+		return bi->err;
 	if (bi->have_bits < n) {
-		unsigned have = bi->tail - bi->head;
-		if (have <= 1) {
-			if (have == 1)
-				bi->buffer[0] = bi->buffer[bi->tail - 1];
+		/* The stream is consumed two bytes at a time; keep reading until
+		   at least a full pair is buffered (a reader may legally return
+		   short counts, including a single byte). */
+		while (bi->tail - bi->head < 2) {
+			unsigned have = bi->tail - bi->head;
+			if (have && bi->head)
+				bi->buffer[0] = bi->buffer[bi->head];
+			bi->head = 0;
 			bi->tail = have;
 			int r = bi->rd->read(bi->rd->context, bi->buffer + have, sizeof bi->buffer - have);
-			if (r <= 0)
-				return r ? r : UC2_Truncated;
-			bi->head = 0;
+			if (r <= 0) {
+				bi->err = r ? r : UC2_Truncated;
+				return bi->err;
+			}
 			bi->tail += r;
 		}
 		bi->bits = bi->bits << 16 | bi->buffer[bi->head] | bi->buffer[bi->head + 1] << 8;
@@ -1000,6 +1013,7 @@ int uc2_extract(
 /* decompress */

 static int decompressor_ultra(struct uc2_context *uc2, unsigned master, unsigned delta, struct reader *rd, struct writer *wr, unsigned limit, u16 *csum);
+static int decompressor_rans(struct uc2_context *uc2, unsigned master_id, struct reader *rd, struct writer *wr, unsigned limit, u16 *csum);

 static int decompressor(struct uc2_context *uc2, int method, struct reader *rd, struct writer *wr, unsigned master, unsigned len, u16 *csum)
 {
@@ -1021,6 +1035,8 @@ ultra:
 	} else if (method >= 21 && method <= 29) {
 		delta = 1;
 		goto ultra;
+	} else if (method == 10) {
+		ret = decompressor_rans(uc2, master, rd, wr, len, csum);
 	} else if (method == 80) {
 		uc2->message = "Turbo compression not implemented";
 		ret = UC2_Unimplemented;
@@ -1029,6 +1045,133 @@ ultra:
 	return ret;
 }

+/* Flush the unwritten window region [*wpos, tail) in ring order. */
+static int rans_flush(struct writer *wr, struct csum *cs, const u8 *buf,
+                      u16 *wpos, u16 tail)
+{
+	while (*wpos != tail) {
+		unsigned n = (u16)(tail - *wpos);
+		unsigned lin = 0x10000u - *wpos;
+		if (n > lin) n = lin;
+		csum_update(cs, buf + *wpos, n);
+		if (wr->write(wr->context, buf + *wpos, n) < 0)
+			return UC2_UserFault;
+		*wpos = (u16)(*wpos + n);
+	}
+	return 0;
+}
+
+/* rANS decompressor (method 10) */
+static int decompressor_rans(struct uc2_context *uc2, unsigned master_id,
+                             struct reader *rd, struct writer *wr,
+                             unsigned limit, u16 *csum)
+{
+	const unsigned EOB = 64001;
+	int ret;
+
+	u8 *buf = u_alloc(uc2, 65536);
+	if (!buf) return UC2_UserFault;
+
+	ret = use_master(uc2, buf, master_id);
+	if (ret < 0) { u_free(uc2, buf); return ret; }
+	u16 tail = (u16)ret;
+	u16 wpos = tail;  /* window position of the next unwritten output byte */
+	struct csum cs;
+	csum_init(&cs);
+	unsigned remaining = limit;
+
+	struct bits bi;
+	ret = bits_init(&bi, rd);
+	if (ret < 0) { u_free(uc2, buf); return ret; }
+
+	while (bits_get(&bi, 1) == 1) {  /* block-present */
+		unsigned nsyms = (unsigned)(bits_get(&bi, 8) & 0xff) << 8;
+		nsyms |= (unsigned)(bits_get(&bi, 8) & 0xff);
+		unsigned rlen = (unsigned)(bits_get(&bi, 8) & 0xff) << 8;
+		rlen |= (unsigned)(bits_get(&bi, 8) & 0xff);
+		if (bi.err) break;
+
+		u32 freqs[344];
+		for (int i = 0; i < 344; i++)
+			freqs[i] = (u32)(bits_get(&bi, 12) & 0xfff);
+		if (bi.err) break;
+
+		struct uc2_rans_table tab;
+		uc2_rans_build_table(&tab, freqs, 344);
+
+		u8 *rdata = u_alloc(uc2, rlen ? rlen : 1);
+		if (!rdata) { bits_destroy(&bi); u_free(uc2, buf); return UC2_UserFault; }
+		for (unsigned i = 0; i < rlen; i++)
+			rdata[i] = (u8)bits_get(&bi, 8);
+		if (bi.err) { u_free(uc2, rdata); break; }
+
+		struct uc2_rans_dec dec;
+		uc2_rans_dec_init(&dec, &tab, rdata, rlen);
+
+		/* Decode all nsyms symbols, including the trailing EOB pair and
+		   its extra bits: stopping at remaining == 0 would leave the bit
+		   cursor mid-block and desynchronize the next block-present bit. */
+		for (unsigned s = 0; s < nsyms; s++) {
+			int sym = uc2_rans_decode(&dec);
+			if (sym < 256) {
+				if (remaining) {
+					buf[tail++] = (u8)sym;
+					remaining--;
+					if ((u16)(tail - wpos) >= 0x8000) {
+						ret = rans_flush(wr, &cs, buf, &wpos, tail);
+						if (ret < 0) { bi.err = ret; break; }
+					}
+				}
+			} else if (sym < 316) {
+				int ds = sym - 256;
+				unsigned dist = (ds < 15) ? ds + 1 :
+					(ds < 30) ? (ds-15+1)*16 + (bits_get(&bi, 4) & 0xf) :
+					(ds < 45) ? (ds-30+1)*256 + (bits_get(&bi, 8) & 0xff) :
+					            (ds-45+1)*4096 + (bits_get(&bi, 12) & 0xfff);
+				if (bi.err) break;
+				if (dist == EOB) { s++; if (s < nsyms) uc2_rans_decode(&dec); break; }
+				s++;
+				if (s >= nsyms) break;
+				int ls = uc2_rans_decode(&dec) - 316;
+				if (ls < 0) ls = 0;
+				unsigned length = (ls < 8) ? ls + 3 :
+					(ls < 16) ? (ls-8)*2+11+(bits_get(&bi,1) & 0x1) :
+					(ls < 24) ? (ls-16)*8+27+(bits_get(&bi,3) & 0x7) :
+					(ls == 24) ? 91+(bits_get(&bi,6) & 0x3f) :
+					(ls == 25) ? 155+(bits_get(&bi,9) & 0x1ff) :
+					(ls == 26) ? 667+(bits_get(&bi,11) & 0x7ff) :
+					             2715+(bits_get(&bi,15) & 0x7fff);
+				if (bi.err) break;
+				for (unsigned j = 0; j < length && remaining > 0; j++) {
+					buf[tail] = buf[(u16)(tail - dist)];
+					tail++; remaining--;
+					if ((u16)(tail - wpos) >= 0x8000) {
+						ret = rans_flush(wr, &cs, buf, &wpos, tail);
+						if (ret < 0) { bi.err = ret; break; }
+					}
+				}
+				if (bi.err) break;
+			}
+		}
+		u_free(uc2, rdata);
+		if (bi.err) break;
+	}
+	if (bi.err) {
+		bits_destroy(&bi);
+		u_free(uc2, buf);
+		return bi.err;
+	}
+
+	/* Flush remaining output */
+	ret = rans_flush(wr, &cs, buf, &wpos, tail);
+	if (ret < 0) { bits_destroy(&bi); u_free(uc2, buf); return ret; }
+
+	bits_destroy(&bi);
+	u_free(uc2, buf);
+	if (csum) *csum = csum_get(&cs);
+	return limit - remaining;
+}
+
 /* cbuf */

 struct cbuffer {
@@ -1117,7 +1260,7 @@ enum {
 	NumLenCodes = NumDeltaCodes + NumExtraCodes,
 };

-const u8 vval[NumDeltaCodes][NumDeltaCodes] = {
+static const u8 vval[NumDeltaCodes][NumDeltaCodes] = {
 	{ 0,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0},
 	{ 2, 1, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0},
@@ -1519,3 +1662,26 @@ const char *uc2_message(struct uc2_context *uc2, int ret)
 	}
 	return s;
 }
+
+/* Decompress the built-in SuperMaster (49152 bytes) into caller's buffer.
+   Returns 49152 on success, negative UC2_* error code on failure. */
+static void *sm_alloc(void *ctx, unsigned size) { (void)ctx; return malloc(size); }
+static void sm_free(void *ctx, void *ptr) { (void)ctx; free(ptr); }
+
+int uc2_get_supermaster(void *buf, unsigned buf_size)
+{
+	if (buf_size < 49152)
+		return UC2_UserFault;
+
+	struct uc2_io io = { .alloc = sm_alloc, .free = sm_free };
+	struct uc2_context *uc2 = uc2_open(&io, NULL);
+	if (!uc2)
+		return UC2_UserFault;
+
+	int ret = resolve_master(uc2, SuperMaster);
+	if (ret >= 0)
+		memcpy(buf, uc2->supermaster, 49152);
+
+	uc2_close(uc2);
+	return ret < 0 ? ret : 49152;
+}
--- a/lib/src/list.h
+++ b/lib/src/list.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: LGPL-3.0-only */
+
 /* list.h by Jan Bobrowski. Inspired by list.h from Linux */

 #ifndef LIST_H
--- a/lib/src/uc2_blake3.c
+++ b/lib/src/uc2_blake3.c
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* BLAKE3 cryptographic hashing — simplified single-threaded implementation.
+ *
+ * Based on the BLAKE3 specification (github.com/BLAKE3-team/BLAKE3).
+ * Uses the BLAKE2s round function with Bao tree structure.
+ *
+ * This implementation handles the common case (single chunk, sequential
+ * hashing) and supports the tree structure for inputs > 1024 bytes. */
+
+#include "uc2/uc2_blake3.h"
+#include <string.h>
+
+/* BLAKE3 IV (same as BLAKE2s) */
+static const uint32_t IV[8] = {
+	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
+};
+
+/* Flags */
+enum {
+	CHUNK_START         = 1 << 0,
+	CHUNK_END           = 1 << 1,
+	PARENT              = 1 << 2,
+	ROOT                = 1 << 3,
+};
+
+/* Message schedule (BLAKE3 permutation) */
+static const uint8_t MSG_SCHEDULE[7][16] = {
+	{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15},
+	{2,6,3,10,7,0,4,13,1,11,12,5,9,14,15,8},
+	{3,4,10,12,13,2,7,14,6,5,9,0,11,15,8,1},
+	{10,7,12,9,14,3,13,15,4,0,11,2,5,8,1,6},
+	{12,13,9,11,15,10,14,8,7,2,5,3,0,1,6,4},
+	{9,14,11,5,8,12,15,1,13,3,0,10,2,6,4,7},
+	{11,15,5,0,1,9,8,6,14,10,2,12,3,4,7,13},
+};
+
+static uint32_t rotr(uint32_t x, int n) { return (x >> n) | (x << (32 - n)); }
+
+static void g(uint32_t *s, int a, int b, int c, int d, uint32_t mx, uint32_t my)
+{
+	s[a] = s[a] + s[b] + mx; s[d] = rotr(s[d] ^ s[a], 16);
+	s[c] = s[c] + s[d];      s[b] = rotr(s[b] ^ s[c], 12);
+	s[a] = s[a] + s[b] + my; s[d] = rotr(s[d] ^ s[a], 8);
+	s[c] = s[c] + s[d];      s[b] = rotr(s[b] ^ s[c], 7);
+}
+
+static void round_fn(uint32_t *s, const uint32_t *m)
+{
+	g(s,0,4, 8,12,m[0],m[1]); g(s,1,5, 9,13,m[2],m[3]);
+	g(s,2,6,10,14,m[4],m[5]); g(s,3,7,11,15,m[6],m[7]);
+	g(s,0,5,10,15,m[8],m[9]); g(s,1,6,11,12,m[10],m[11]);
+	g(s,2,7, 8,13,m[12],m[13]); g(s,3,4,9,14,m[14],m[15]);
+}
+
+static void compress(const uint32_t cv[8], const uint8_t block[64],
+                     uint8_t block_len, uint64_t counter, uint8_t flags,
+                     uint32_t out[16])
+{
+	uint32_t m[16];
+	for (int i = 0; i < 16; i++)
+		m[i] = (uint32_t)block[i*4] | ((uint32_t)block[i*4+1]<<8) |
+		       ((uint32_t)block[i*4+2]<<16) | ((uint32_t)block[i*4+3]<<24);
+
+	uint32_t s[16] = {
+		cv[0],cv[1],cv[2],cv[3],cv[4],cv[5],cv[6],cv[7],
+		IV[0],IV[1],IV[2],IV[3],
+		(uint32_t)counter, (uint32_t)(counter>>32),
+		block_len, flags
+	};
+
+	for (int r = 0; r < 7; r++) {
+		uint32_t pm[16];
+		for (int i = 0; i < 16; i++) pm[i] = m[MSG_SCHEDULE[r][i]];
+		round_fn(s, pm);
+	}
+
+	for (int i = 0; i < 8; i++) out[i] = s[i] ^ s[i+8];
+	for (int i = 8; i < 16; i++) out[i] = s[i] ^ cv[i-8];
+}
+
+static void cv_from_out(const uint32_t out[16], uint32_t cv[8])
+{
+	for (int i = 0; i < 8; i++) cv[i] = out[i];
+}
+
+/* Process one block within a chunk */
+static void chunk_block(struct uc2_blake3 *ctx, const uint8_t block[64],
+                        uint8_t block_len, uint8_t extra_flags)
+{
+	uint8_t flags = ctx->flags | extra_flags;
+	if (ctx->blocks_compressed == 0) flags |= CHUNK_START;
+
+	uint32_t out[16];
+	compress(ctx->cv, block, block_len, ctx->chunk_counter, flags, out);
+	cv_from_out(out, ctx->cv);
+	ctx->blocks_compressed++;
+}
+
+/* Finalize a chunk: compress the last block with CHUNK_END */
+static void chunk_finalize(struct uc2_blake3 *ctx, uint32_t cv_out[8])
+{
+	uint8_t flags = ctx->flags | CHUNK_END;
+	if (ctx->blocks_compressed == 0) flags |= CHUNK_START;
+
+	uint8_t block[64];
+	memset(block, 0, 64);
+	memcpy(block, ctx->buf, ctx->buf_len);
+
+	uint32_t out[16];
+	compress(ctx->cv, block, ctx->buf_len, ctx->chunk_counter, flags, out);
+	cv_from_out(out, cv_out);
+}
+
+/* Merge two chaining values as a parent node */
+static void parent_cv(const uint32_t left[8], const uint32_t right[8],
+                      uint32_t out_cv[8])
+{
+	uint8_t block[64];
+	for (int i = 0; i < 8; i++) {
+		block[i*4]   = (uint8_t)(left[i]);
+		block[i*4+1] = (uint8_t)(left[i]>>8);
+		block[i*4+2] = (uint8_t)(left[i]>>16);
+		block[i*4+3] = (uint8_t)(left[i]>>24);
+	}
+	for (int i = 0; i < 8; i++) {
+		block[32+i*4]   = (uint8_t)(right[i]);
+		block[32+i*4+1] = (uint8_t)(right[i]>>8);
+		block[32+i*4+2] = (uint8_t)(right[i]>>16);
+		block[32+i*4+3] = (uint8_t)(right[i]>>24);
+	}
+	uint32_t out[16];
+	compress(IV, block, 64, 0, PARENT, out);
+	cv_from_out(out, out_cv);
+}
+
+static void push_cv(struct uc2_blake3 *ctx, const uint32_t cv[8])
+{
+	/* Merge with stack entries that have matching tree levels */
+	uint32_t new_cv[8];
+	memcpy(new_cv, cv, 32);
+	uint64_t total = ctx->chunk_counter;
+	while (total & 1) {
+		ctx->cv_stack_len--;
+		parent_cv(&ctx->cv_stack[ctx->cv_stack_len * 8], new_cv, new_cv);
+		total >>= 1;
+	}
+	memcpy(&ctx->cv_stack[ctx->cv_stack_len * 8], new_cv, 32);
+	ctx->cv_stack_len++;
+}
+
+void uc2_blake3_init(struct uc2_blake3 *ctx)
+{
+	memset(ctx, 0, sizeof *ctx);
+	memcpy(ctx->cv, IV, 32);
+}
+
+void uc2_blake3_update(struct uc2_blake3 *ctx, const void *data, size_t len)
+{
+	const uint8_t *p = data;
+	while (len > 0) {
+		/* If buffer has a full block, process it */
+		if (ctx->buf_len == 64) {
+			chunk_block(ctx, ctx->buf, 64, 0);
+			ctx->buf_len = 0;
+
+			/* If we've filled a full chunk (1024 bytes = 16 blocks),
+			   finalize this chunk and start a new one */
+			if (ctx->blocks_compressed == 16) {
+				/* This was the 16th block; we need to finalize with the
+				   PREVIOUS block as the last, and this leftover starts a
+				   new chunk.  Actually, we process blocks as they come
+				   and finalize when the chunk is complete. */
+			}
+		}
+
+		/* Check if we're at a chunk boundary */
+		size_t chunk_bytes = (size_t)ctx->blocks_compressed * 64 + ctx->buf_len;
+		if (chunk_bytes >= UC2_BLAKE3_CHUNK_LEN && ctx->buf_len == 0 &&
+		    ctx->blocks_compressed > 0) {
+			/* Finalize current chunk — but we've already processed all
+			   blocks.  The last block was a full block, so re-compress
+			   it with CHUNK_END. */
+			/* Start new chunk */
+			uint32_t chunk_cv[8];
+			/* Recompute final block with CHUNK_END */
+			chunk_finalize(ctx, chunk_cv);
+			push_cv(ctx, chunk_cv);
+
+			ctx->chunk_counter++;
+			memcpy(ctx->cv, IV, 32);
+			ctx->blocks_compressed = 0;
+			ctx->flags = 0;
+		}
+
+		size_t take = 64 - ctx->buf_len;
+		if (take > len) take = len;
+		memcpy(ctx->buf + ctx->buf_len, p, take);
+		ctx->buf_len += (uint8_t)take;
+		p += take;
+		len -= take;
+	}
+}
+
+void uc2_blake3_final(const struct uc2_blake3 *ctx, uint8_t out[UC2_BLAKE3_OUT_LEN])
+{
+	/* Finalize current chunk */
+	uint32_t chunk_cv[8];
+	struct uc2_blake3 tmp = *ctx;
+
+	/* If this is the only chunk, it gets ROOT flag */
+	if (tmp.chunk_counter == 0 && tmp.cv_stack_len == 0) {
+		uint8_t flags = tmp.flags | CHUNK_START | CHUNK_END | ROOT;
+		uint8_t block[64];
+		memset(block, 0, 64);
+		memcpy(block, tmp.buf, tmp.buf_len);
+		uint32_t result[16];
+		compress(tmp.cv, block, tmp.buf_len, 0, flags, result);
+		for (int i = 0; i < 8; i++) {
+			out[i*4]   = (uint8_t)(result[i]);
+			out[i*4+1] = (uint8_t)(result[i]>>8);
+			out[i*4+2] = (uint8_t)(result[i]>>16);
+			out[i*4+3] = (uint8_t)(result[i]>>24);
+		}
+		return;
+	}
+
+	/* Multi-chunk: finalize current chunk */
+	chunk_finalize(&tmp, chunk_cv);
+
+	/* Merge with stack */
+	uint32_t cv[8];
+	memcpy(cv, chunk_cv, 32);
+	for (int i = (int)tmp.cv_stack_len - 1; i >= 0; i--) {
+		uint32_t merged[8];
+		parent_cv(&tmp.cv_stack[i * 8], cv, merged);
+		memcpy(cv, merged, 32);
+	}
+
+	/* Output with ROOT flag */
+	uint8_t block[64];
+	memset(block, 0, 64);
+	for (int i = 0; i < 8; i++) {
+		block[i*4]   = (uint8_t)(cv[i]);
+		block[i*4+1] = (uint8_t)(cv[i]>>8);
+		block[i*4+2] = (uint8_t)(cv[i]>>16);
+		block[i*4+3] = (uint8_t)(cv[i]>>24);
+	}
+	uint32_t result[16];
+	compress(IV, block, 32, 0, PARENT | ROOT, result);
+	for (int i = 0; i < 8; i++) {
+		out[i*4]   = (uint8_t)(result[i]);
+		out[i*4+1] = (uint8_t)(result[i]>>8);
+		out[i*4+2] = (uint8_t)(result[i]>>16);
+		out[i*4+3] = (uint8_t)(result[i]>>24);
+	}
+}
+
+void uc2_blake3_hash(const void *data, size_t len, uint8_t out[UC2_BLAKE3_OUT_LEN])
+{
+	struct uc2_blake3 ctx;
+	uc2_blake3_init(&ctx);
+	uc2_blake3_update(&ctx, data, len);
+	uc2_blake3_final(&ctx, out);
+}
+
+int uc2_blake3_equal(const uint8_t a[UC2_BLAKE3_OUT_LEN],
+                     const uint8_t b[UC2_BLAKE3_OUT_LEN])
+{
+	uint8_t diff = 0;
+	for (int i = 0; i < UC2_BLAKE3_OUT_LEN; i++)
+		diff |= a[i] ^ b[i];
+	return diff == 0;
+}
--- a/lib/src/uc2_blockstore.c
+++ b/lib/src/uc2_blockstore.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Cross-archive block store for content-addressable deduplication.
+ *
+ * Chunks are stored as individual files named by their 64-bit hash
+ * (hex encoded).  This is simple and portable  --  no database needed.
+ * For large stores, a two-level directory structure (first 2 hex chars
+ * as subdirectory) prevents filesystem performance issues. */
+
+#include "uc2/uc2_blockstore.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+static void hash_path(const struct uc2_blockstore *bs, uint64_t hash,
+                      char *buf, size_t buf_size)
+{
+	/* Two-level: store/AB/ABCDEF0123456789 */
+	snprintf(buf, buf_size, "%s/%02x/%016llx",
+	         bs->path, (unsigned)(hash >> 56) & 0xFF,
+	         (unsigned long long)hash);
+}
+
+static void ensure_subdir(const struct uc2_blockstore *bs, uint64_t hash)
+{
+	char dir[4096];
+	snprintf(dir, sizeof dir, "%s/%02x",
+	         bs->path, (unsigned)(hash >> 56) & 0xFF);
+	mkdir(dir, 0755);
+}
+
+int uc2_blockstore_open(struct uc2_blockstore *bs, const char *path)
+{
+	memset(bs, 0, sizeof *bs);
+	bs->path = strdup(path);
+	if (!bs->path) return -1;
+
+	/* Create store directory if it doesn't exist */
+	if (mkdir(path, 0755) < 0 && errno != EEXIST) {
+		free(bs->path);
+		bs->path = NULL;
+		return -1;
+	}
+	return 0;
+}
+
+int uc2_blockstore_has(const struct uc2_blockstore *bs, uint64_t hash)
+{
+	char fpath[4096];
+	hash_path(bs, hash, fpath, sizeof fpath);
+	struct stat st;
+	return stat(fpath, &st) == 0;
+}
+
+int uc2_blockstore_ingest(struct uc2_blockstore *bs,
+                          const struct uc2_merkle *tree,
+                          const uint8_t *data, size_t len)
+{
+	int new_chunks = 0;
+	for (int i = 0; i < tree->nchunks; i++) {
+		uint64_t h = tree->chunks[i].hash;
+		uint32_t off = tree->chunks[i].offset;
+		uint32_t clen = tree->chunks[i].length;
+
+		if (off + clen > len) continue;
+
+		if (uc2_blockstore_has(bs, h)) {
+			bs->saved_bytes += clen;
+			continue;
+		}
+
+		ensure_subdir(bs, h);
+		char fpath[4096];
+		hash_path(bs, h, fpath, sizeof fpath);
+		FILE *f = fopen(fpath, "wb");
+		if (!f) continue;
+		fwrite(data + off, 1, clen, f);
+		fclose(f);
+
+		bs->nblocks++;
+		bs->total_bytes += clen;
+		new_chunks++;
+	}
+	return new_chunks;
+}
+
+int uc2_blockstore_read(const struct uc2_blockstore *bs,
+                        uint64_t hash, uint8_t *buf, size_t buf_size)
+{
+	char fpath[4096];
+	hash_path(bs, hash, fpath, sizeof fpath);
+	FILE *f = fopen(fpath, "rb");
+	if (!f) return -1;
+	int n = (int)fread(buf, 1, buf_size, f);
+	fclose(f);
+	return n;
+}
+
+void uc2_blockstore_close(struct uc2_blockstore *bs)
+{
+	free(bs->path);
+	memset(bs, 0, sizeof *bs);
+}
--- a/lib/src/uc2_cdc.c
+++ b/lib/src/uc2_cdc.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Content-defined chunking (CDC) for UC2 deduplication.
+ *
+ * Gear hash: each byte updates the hash by shifting left and XORing
+ * with a pre-computed random table entry.  This gives uniform
+ * distribution and O(1) per-byte cost.  A chunk boundary is detected
+ * when (hash & mask) == 0, giving an average chunk size of 2^bits.
+ *
+ * Reference: "A Framework for Analyzing and Improving Content-Based
+ * Chunking Algorithms" (Xia et al., HP Labs, 2005).
+ */
+
+#include "uc2/uc2_cdc.h"
+#include <string.h>
+
+/* Gear hash lookup table: 256 random 32-bit values.
+   Generated from a PRNG seeded with the string "UC2 Gear CDC". */
+static const uint32_t gear_table[256] = {
+	0x5c27b2e4, 0x8a3b9c01, 0xf7e52d9f, 0x3d14a867, 0xc6f893b2, 0x91d047e5, 0x2e6b1fa8, 0xe4a37c63,
+	0x7f582b1d, 0xb90c64f6, 0x46d1e823, 0x13a95f7b, 0xd87e24c9, 0xa5430168, 0x6c9fb3d4, 0x028e7a1f,
+	0xfb614d93, 0x3742c856, 0x84b50fea, 0xc1d6937e, 0x590a2eb1, 0xaef41c67, 0x67c385d2, 0x0dbf694a,
+	0xe2984513, 0x76ab3dc8, 0x4517e29f, 0xb86a0c54, 0x1e23f7b6, 0xd3c58e41, 0x8a71b02d, 0xf09d43e8,
+	0x2b06d175, 0x9f48a623, 0xc3e71bdf, 0x54b2f906, 0x1d65c48a, 0xe83a074b, 0x72196ed3, 0xa4de8b17,
+	0x3fac5264, 0xd10738b9, 0x6ec4a1f5, 0x8593d642, 0x4a7f1d8e, 0xf6b2e071, 0x2748bc3a, 0xc981459d,
+	0x50f37e26, 0xbe269ac3, 0x13da4587, 0x9c07b1f4, 0x614ed368, 0xa7bc2f15, 0xd4f56c89, 0x38a19047,
+	0x876cb5e2, 0xe53d48ab, 0x42801d76, 0xfc17a93c, 0x0b9e62d1, 0x7654cf08, 0xcda37b94, 0x19e80e5f,
+	0xab3c91d7, 0x6271f4a6, 0xd8bf2843, 0x3506de71, 0xf94a637b, 0x8ed5b02c, 0x471c89e5, 0x0a63d4f9,
+	0xc4982e17, 0x7db15a8c, 0x12ef4360, 0xb637c9a5, 0x5f740ed8, 0xe1a8b524, 0x28c96f13, 0x93014876,
+	0xdae27b9d, 0x3d8f15c2, 0x815ca04e, 0xf47e6d39, 0x4b93d2f7, 0xa620be81, 0x69d7014a, 0xc5b4f836,
+	0x1c486aeb, 0x70a5931d, 0xef12dc64, 0x8279b508, 0xb6c34a9f, 0x57e82173, 0x0a1f7dc6, 0xde64c952,
+	0x43b0a819, 0xad5e37e4, 0x6897cb71, 0xf1240f9c, 0x342bc6a5, 0x9d1852e8, 0xc7fa9b34, 0x586d4e07,
+	0xb2a1d3f6, 0x2536ec89, 0x7ecb1047, 0xe408a5bd, 0x0f957e62, 0xd3ca81a0, 0x917f2d14, 0xfa42b6d9,
+	0x45d968b3, 0xbbe50c37, 0x1274f1e5, 0x6a9e3db8, 0xcf538241, 0x87a0c96f, 0x5eb75423, 0x31dc0fa7,
+	0xa41b63c4, 0xd96fae58, 0x4cd2891e, 0xf5863072, 0x0b17e4a6, 0x7c60bd9d, 0xe39845c1, 0xb85e2f17,
+	0x21a37689, 0x9e4fc153, 0xd702dba4, 0x5384e96f, 0xaf51067c, 0x64c83db1, 0xc2e7f548, 0x3a198c24,
+	0xf06b47d2, 0x85d2a19e, 0x4f3e5c63, 0x19c78b07, 0xe6a402db, 0x7b59d3f4, 0xbd146ea5, 0x0e82c917,
+	0xc3f01b76, 0x5da564a9, 0x32b9f852, 0xa847201c, 0x6e9cb7e3, 0x81635d38, 0x470ad1bf, 0xfc718946,
+	0x16ce3fa2, 0x9ab045e7, 0xd52c6814, 0x43f9bc79, 0xb8e213a6, 0x2f174e51, 0x657d90cd, 0xcda4f738,
+	0x0198269b, 0x7e3cdb54, 0xe26f8013, 0x39c154e7, 0xa45db39c, 0xd792e841, 0x58067f2b, 0xb3adc466,
+	0x1b41a5d0, 0x76e83917, 0xcf250b74, 0x84b7d2a8, 0x4dc69e53, 0xf01a47bf, 0x28f361c4, 0x93758c19,
+	0xe5c24037, 0x3a8ef956, 0x7e51b682, 0xc107da4f, 0x5269031d, 0xad84c7e6, 0x6eb3589a, 0x0f4ea143,
+	0xd8356fd7, 0x417c9e2b, 0xba20d364, 0x25f745a8, 0xf6c11e79, 0x7db8a30c, 0x830f52b4, 0x49617cd9,
+	0x1cda0e63, 0xa7b23148, 0xde46c5f2, 0x63895db7, 0xb21ea481, 0x574c6f0e, 0x0a8392c5, 0xc5f7b84a,
+	0x380e41d6, 0xed72d923, 0x91c5a687, 0x4a19f054, 0xf4a83b19, 0x673d8ec2, 0xbce1470b, 0x01567da4,
+	0xd8abc196, 0x2490534e, 0x7de7bf83, 0xc3348217, 0x5f629ed5, 0xa6b70468, 0x1c43d7a9, 0x89f56b30,
+	0x4508cfe1, 0xf27a1694, 0xb81e5d47, 0x05a9c3ba, 0xdac28f62, 0x61b740d5, 0x9e3f254c, 0x37d4a8e1,
+	0x8b612c97, 0xc419f035, 0x5d8e7ba6, 0xa2f3d14c, 0x16458db9, 0xeb27c673, 0x70da0e28, 0xbf9c53e4,
+	0x42a1679f, 0xde38b102, 0x95c42f56, 0x037bd8a1, 0xfc1645ed, 0x69ea9cb3, 0xad5f0374, 0x3487e1c9,
+	0xc0b29d15, 0x5e617a48, 0x8714c6bf, 0x1da93273, 0xf2d5e804, 0x764b5f96, 0xab86031d, 0x41c8b4e2,
+	0xd53a6927, 0x0f91dc83, 0xe8450b5a, 0x72f7a1c6, 0xbc234d90, 0x2dbe7641, 0x960cf5bd, 0x5b618a49,
+};
+
+uint32_t uc2_gear_hash(const uint8_t *data, size_t len)
+{
+	uint32_t h = 0;
+	for (size_t i = 0; i < len; i++)
+		h = (h << 1) + gear_table[data[i]];
+	return h;
+}
+
+void uc2_chunker_init(struct uc2_chunker *c, int bits,
+                      size_t min_chunk, size_t max_chunk)
+{
+	if (bits < 8)  bits = 8;
+	if (bits > 20) bits = 20;
+	c->mask = ((uint32_t)1 << bits) - 1;
+	c->min_chunk = min_chunk ? min_chunk : ((size_t)1 << (bits - 2));
+	c->max_chunk = max_chunk ? max_chunk : ((size_t)1 << (bits + 2));
+	c->pos = 0;
+}
+
+void uc2_chunker_reset(struct uc2_chunker *c)
+{
+	c->pos = 0;
+}
+
+int uc2_chunker_next(struct uc2_chunker *c,
+                     const uint8_t *data, size_t len,
+                     size_t *chunk_off, size_t *chunk_len)
+{
+	if (c->pos >= len)
+		return 0;
+
+	size_t start = c->pos;
+	size_t end = start + c->max_chunk;
+	if (end > len) end = len;
+
+	/* Skip minimum chunk size before checking boundaries */
+	size_t scan = start + c->min_chunk;
+	if (scan > end) scan = end;
+
+	uint32_t h = 0;
+	/* Prime the hash over the min_chunk prefix */
+	for (size_t i = start; i < scan; i++)
+		h = (h << 1) + gear_table[data[i]];
+
+	/* Scan for boundary: (hash & mask) == 0 */
+	for (size_t i = scan; i < end; i++) {
+		h = (h << 1) + gear_table[data[i]];
+		if ((h & c->mask) == 0) {
+			*chunk_off = start;
+			*chunk_len = i + 1 - start;
+			c->pos = i + 1;
+			return 1;
+		}
+	}
+
+	/* No boundary found: emit max_chunk or remaining data */
+	*chunk_off = start;
+	*chunk_len = end - start;
+	c->pos = end;
+	return (c->pos < len) ? 1 : 0;
+}
+
+uint32_t uc2_fnv1a(const uint8_t *data, size_t len)
+{
+	uint32_t h = 2166136261u;
+	for (size_t i = 0; i < len; i++) {
+		h ^= data[i];
+		h *= 16777619u;
+	}
+	return h;
+}
--- a/lib/src/uc2_delta.c
+++ b/lib/src/uc2_delta.c
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Delta compression for file versioning.
+ *
+ * Uses a hash-based matching approach: hash all 4-byte windows in the
+ * source, then scan the target looking for matching regions.  Matched
+ * regions become COPY instructions, unmatched regions become INSERT.
+ *
+ * This is a simplified version of the vcdiff/xdelta algorithm. */
+
+#include "uc2/uc2_delta.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define HASH_SIZE 65536
+#define WINDOW 4
+#define MIN_MATCH 8
+
+static uint32_t roll_hash(const uint8_t *p)
+{
+	return (uint32_t)p[0] | ((uint32_t)p[1] << 8) |
+	       ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
+}
+
+static void put32(uint8_t *p, uint32_t v)
+{
+	p[0] = v & 0xFF; p[1] = (v >> 8) & 0xFF;
+	p[2] = (v >> 16) & 0xFF; p[3] = (v >> 24) & 0xFF;
+}
+
+static uint32_t get32(const uint8_t *p)
+{
+	return (uint32_t)p[0] | ((uint32_t)p[1] << 8) |
+	       ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
+}
+
+/* Growable output buffer */
+struct obuf {
+	uint8_t *data;
+	size_t len, cap;
+};
+
+static int obuf_append(struct obuf *o, const void *data, size_t len)
+{
+	if (o->len + len > o->cap) {
+		size_t newcap = o->cap ? o->cap * 2 : 256;
+		while (newcap < o->len + len) newcap *= 2;
+		uint8_t *p = realloc(o->data, newcap);
+		if (!p) return -1;
+		o->data = p;
+		o->cap = newcap;
+	}
+	memcpy(o->data + o->len, data, len);
+	o->len += len;
+	return 0;
+}
+
+static int emit_copy(struct obuf *o, uint32_t offset, uint32_t length)
+{
+	uint8_t buf[9];
+	buf[0] = 0x01;
+	put32(buf + 1, offset);
+	put32(buf + 5, length);
+	return obuf_append(o, buf, 9);
+}
+
+static int emit_insert(struct obuf *o, const uint8_t *data, uint32_t length)
+{
+	uint8_t hdr[5];
+	hdr[0] = 0x02;
+	put32(hdr + 1, length);
+	if (obuf_append(o, hdr, 5) < 0) return -1;
+	return obuf_append(o, data, length);
+}
+
+int uc2_delta_encode(const uint8_t *src, size_t src_len,
+                     const uint8_t *tgt, size_t tgt_len,
+                     uint8_t **out_delta, size_t *out_delta_len)
+{
+	*out_delta = NULL;
+	*out_delta_len = 0;
+
+	/* Build hash table of source positions */
+	int32_t *htab = calloc(HASH_SIZE, sizeof(int32_t));
+	if (!htab) return -1;
+	for (size_t i = 0; i < HASH_SIZE; i++) htab[i] = -1;
+
+	if (src_len >= WINDOW) {
+		for (size_t i = 0; i <= src_len - WINDOW; i++) {
+			uint32_t h = roll_hash(src + i) & (HASH_SIZE - 1);
+			htab[h] = (int32_t)i;  /* last occurrence wins */
+		}
+	}
+
+	struct obuf out = {0};
+
+	/* Header */
+	uint8_t hdr[8] = {'U','C','2','D', 0,0,0,0};
+	put32(hdr + 4, (uint32_t)tgt_len);
+	obuf_append(&out, hdr, 8);
+
+	/* Scan target, emit COPY or INSERT */
+	size_t tpos = 0;
+	size_t insert_start = 0;
+	int in_insert = 0;
+
+	while (tpos + WINDOW <= tgt_len) {
+		uint32_t h = roll_hash(tgt + tpos) & (HASH_SIZE - 1);
+		int32_t spos = htab[h];
+
+		if (spos >= 0 && (size_t)spos + WINDOW <= src_len &&
+		    memcmp(src + spos, tgt + tpos, WINDOW) == 0) {
+			/* Extend match forward */
+			size_t match_len = WINDOW;
+			while (tpos + match_len < tgt_len &&
+			       (size_t)spos + match_len < src_len &&
+			       src[spos + match_len] == tgt[tpos + match_len])
+				match_len++;
+
+			if (match_len >= MIN_MATCH) {
+				/* Flush pending insert */
+				if (in_insert && tpos > insert_start)
+					emit_insert(&out, tgt + insert_start,
+					            (uint32_t)(tpos - insert_start));
+				in_insert = 0;
+
+				emit_copy(&out, (uint32_t)spos, (uint32_t)match_len);
+				tpos += match_len;
+				insert_start = tpos;
+				continue;
+			}
+		}
+
+		if (!in_insert) {
+			insert_start = tpos;
+			in_insert = 1;
+		}
+		tpos++;
+	}
+
+	/* Trailing bytes */
+	size_t trailing = tgt_len - tpos;
+	if (in_insert || trailing > 0) {
+		size_t ins_start = in_insert ? insert_start : tpos;
+		size_t ins_len = tgt_len - ins_start;
+		if (ins_len > 0)
+			emit_insert(&out, tgt + ins_start, (uint32_t)ins_len);
+	}
+
+	/* End marker */
+	uint8_t end = 0x00;
+	obuf_append(&out, &end, 1);
+
+	free(htab);
+	*out_delta = out.data;
+	*out_delta_len = out.len;
+	return 0;
+}
+
+int uc2_delta_apply(const uint8_t *src, size_t src_len,
+                    const uint8_t *delta, size_t delta_len,
+                    uint8_t **out_tgt, size_t *out_tgt_len)
+{
+	*out_tgt = NULL;
+	*out_tgt_len = 0;
+
+	if (delta_len < 9 || memcmp(delta, "UC2D", 4) != 0)
+		return -1;
+
+	uint32_t tgt_len = get32(delta + 4);
+	/* malloc(0) is implementation-defined; ensure at least one byte
+	 * so the returned pointer is canonical and free()-safe. */
+	uint8_t *tgt = malloc(tgt_len ? tgt_len : 1);
+	if (!tgt) return -1;
+
+	size_t dpos = 8;  /* after header */
+	size_t tpos = 0;
+
+	while (dpos < delta_len) {
+		uint8_t op = delta[dpos++];
+		if (op == 0x00) break;  /* END */
+
+		if (op == 0x01) {  /* COPY */
+			if (dpos + 8 > delta_len) goto err;
+			uint32_t offset = get32(delta + dpos); dpos += 4;
+			uint32_t length = get32(delta + dpos); dpos += 4;
+			if ((size_t)offset + length > src_len) goto err;
+			if (tpos + length > tgt_len) goto err;
+			memcpy(tgt + tpos, src + offset, length);
+			tpos += length;
+		} else if (op == 0x02) {  /* INSERT */
+			if (dpos + 4 > delta_len) goto err;
+			uint32_t length = get32(delta + dpos); dpos += 4;
+			if (dpos + length > delta_len) goto err;
+			if (tpos + length > tgt_len) goto err;
+			memcpy(tgt + tpos, delta + dpos, length);
+			dpos += length;
+			tpos += length;
+		} else goto err;
+	}
+
+	*out_tgt = tgt;
+	*out_tgt_len = tpos;
+	return 0;
+
+err:
+	free(tgt);
+	return -1;
+}
--- a/lib/src/uc2_dict.c
+++ b/lib/src/uc2_dict.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Dictionary management for zstd-inspired dictionary compression. */
+
+#include "uc2/uc2_dict.h"
+#include "uc2/uc2_merkle.h"
+#include "uc2/uc2_cdc.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* Serialization helpers (little-endian, alignment-safe) */
+static void put32(uint8_t *p, uint32_t v) {
+	p[0]=v; p[1]=v>>8; p[2]=v>>16; p[3]=v>>24;
+}
+static void put64(uint8_t *p, uint64_t v) {
+	for (int i = 0; i < 8; i++) p[i] = (uint8_t)(v >> (i*8));
+}
+static uint32_t get32(const uint8_t *p) {
+	return p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
+}
+static uint64_t get64(const uint8_t *p) {
+	uint64_t v = 0;
+	for (int i = 7; i >= 0; i--) v = (v << 8) | p[i];
+	return v;
+}
+
+int uc2_dict_create(struct uc2_dict *dict, const uint8_t *data, size_t size)
+{
+	memset(dict, 0, sizeof *dict);
+	if (!data || size == 0) return -1;
+	dict->data = malloc(size);
+	if (!dict->data) return -1;
+	memcpy(dict->data, data, size);
+	dict->size = (uint32_t)size;
+	dict->id = uc2_hash64(data, size);
+	dict->checksum = uc2_fnv1a(data, size);
+	return 0;
+}
+
+int uc2_dict_verify(const struct uc2_dict *dict)
+{
+	if (!dict->data || dict->size == 0) return 0;
+	return uc2_fnv1a(dict->data, dict->size) == dict->checksum;
+}
+
+/* Serialized format: magic(4) + id(8) + checksum(4) + size(4) + reserved(4) = 24 bytes */
+#define HDR_SIZE 24
+
+size_t uc2_dict_serialize(const struct uc2_dict *dict, uint8_t **out)
+{
+	size_t total = HDR_SIZE + dict->size;
+	uint8_t *buf = malloc(total);
+	if (!buf) { *out = NULL; return 0; }
+	put32(buf, UC2_DICT_MAGIC);
+	put64(buf + 4, dict->id);
+	put32(buf + 12, dict->checksum);
+	put32(buf + 16, dict->size);
+	put32(buf + 20, 0);
+	memcpy(buf + HDR_SIZE, dict->data, dict->size);
+	*out = buf;
+	return total;
+}
+
+int uc2_dict_deserialize(struct uc2_dict *dict, const uint8_t *buf, size_t len)
+{
+	memset(dict, 0, sizeof *dict);
+	if (len < HDR_SIZE) return -1;
+	if (get32(buf) != UC2_DICT_MAGIC) return -1;
+	uint32_t size = get32(buf + 16);
+	if (HDR_SIZE + size > len) return -1;
+	dict->id = get64(buf + 4);
+	dict->checksum = get32(buf + 12);
+	dict->size = size;
+	dict->data = malloc(size);
+	if (!dict->data) return -1;
+	memcpy(dict->data, buf + HDR_SIZE, size);
+	return 0;
+}
+
+void uc2_dict_free(struct uc2_dict *dict)
+{
+	free(dict->data);
+	memset(dict, 0, sizeof *dict);
+}
--- a/lib/src/uc2_ingest.c
+++ b/lib/src/uc2_ingest.c
@@ -0,0 +1,428 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+#include "uc2/uc2_ingest.h"
+#include "uc2/uc2_blockstore.h"
+#include "uc2/uc2_merkle.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static const char INGEST_MAGIC[8] = { 'U','C','2','I','N','G','S','T' };
+#define INGEST_VERSION_V1  1
+#define INGEST_VERSION_V2  2
+#define DEFAULT_CDC_BITS   13
+
+#define ENTRY_SIZE_V1   12u  /* 8B hash + 4B length */
+#define ENTRY_SIZE_V2   16u  /* 8B hash + 4B length + 4B offset */
+#define HEADER_SIZE     16u
+
+static char *make_blocks_path(const char *archive_path)
+{
+	size_t n = strlen(archive_path);
+	char *p = malloc(n + 8);
+	if (!p) return NULL;
+	memcpy(p, archive_path, n);
+	memcpy(p + n, ".blocks", 8); /* includes trailing NUL */
+	return p;
+}
+
+static void put_le32(uint8_t *p, uint32_t v)
+{
+	p[0] = (uint8_t)v;
+	p[1] = (uint8_t)(v >> 8);
+	p[2] = (uint8_t)(v >> 16);
+	p[3] = (uint8_t)(v >> 24);
+}
+
+static void put_le64(uint8_t *p, uint64_t v)
+{
+	for (int i = 0; i < 8; i++)
+		p[i] = (uint8_t)(v >> (i * 8));
+}
+
+static uint32_t get_le32(const uint8_t *p)
+{
+	return (uint32_t)p[0]
+	     | ((uint32_t)p[1] << 8)
+	     | ((uint32_t)p[2] << 16)
+	     | ((uint32_t)p[3] << 24);
+}
+
+static uint64_t get_le64(const uint8_t *p)
+{
+	uint64_t v = 0;
+	for (int i = 0; i < 8; i++)
+		v |= (uint64_t)p[i] << (i * 8);
+	return v;
+}
+
+/* Linear-probed open-addressing hash map: hash -> file offset.
+ * Used during write to record where each unique chunk lives so
+ * subsequent appearances of the same hash share an offset. */
+struct dedup_map {
+	uint64_t *keys;     /* 0 = empty slot */
+	uint32_t *offsets;  /* offset of chunk in archive */
+	int       cap;
+	int       len;
+};
+
+static int dedup_map_init(struct dedup_map *m, int initial_cap)
+{
+	/* Round up to power of two; mask-based probing requires it. */
+	int cap = 16;
+	while (cap < initial_cap) cap *= 2;
+	m->keys = calloc((size_t)cap, sizeof *m->keys);
+	m->offsets = calloc((size_t)cap, sizeof *m->offsets);
+	if (!m->keys || !m->offsets) {
+		free(m->keys); free(m->offsets);
+		m->keys = NULL; m->offsets = NULL;
+		return -1;
+	}
+	m->cap = cap;
+	m->len = 0;
+	return 0;
+}
+
+static void dedup_map_free(struct dedup_map *m)
+{
+	free(m->keys);
+	free(m->offsets);
+	m->keys = NULL;
+	m->offsets = NULL;
+}
+
+static int dedup_map_grow(struct dedup_map *m);
+
+/* Look up hash in map.  If present, return its offset via *out_off
+ * and return 1.  Else return 0 (caller may insert). */
+static int dedup_map_get(const struct dedup_map *m, uint64_t hash,
+                         uint32_t *out_off)
+{
+	if (m->cap == 0) return 0;
+	uint64_t mask = (uint64_t)m->cap - 1;
+	uint64_t i = hash & mask;
+	for (int probe = 0; probe < m->cap; probe++) {
+		if (m->keys[i] == 0) return 0;
+		if (m->keys[i] == hash) {
+			*out_off = m->offsets[i];
+			return 1;
+		}
+		i = (i + 1) & mask;
+	}
+	return 0;
+}
+
+static int dedup_map_put(struct dedup_map *m, uint64_t hash, uint32_t off)
+{
+	if (hash == 0) hash = 1;  /* sentinel collision: shift to 1 */
+	if ((m->len + 1) * 2 > m->cap) {
+		if (dedup_map_grow(m) != 0) return -1;
+	}
+	uint64_t mask = (uint64_t)m->cap - 1;
+	uint64_t i = hash & mask;
+	while (m->keys[i] != 0) {
+		if (m->keys[i] == hash) return 0;  /* already inserted */
+		i = (i + 1) & mask;
+	}
+	m->keys[i] = hash;
+	m->offsets[i] = off;
+	m->len++;
+	return 0;
+}
+
+static int dedup_map_grow(struct dedup_map *m)
+{
+	int ncap = m->cap ? m->cap * 2 : 16;
+	uint64_t *nkeys = calloc((size_t)ncap, sizeof *nkeys);
+	uint32_t *noffs = calloc((size_t)ncap, sizeof *noffs);
+	if (!nkeys || !noffs) {
+		free(nkeys); free(noffs);
+		return -1;
+	}
+	uint64_t mask = (uint64_t)ncap - 1;
+	for (int j = 0; j < m->cap; j++) {
+		uint64_t k = m->keys[j];
+		if (k == 0) continue;
+		uint64_t i = k & mask;
+		while (nkeys[i] != 0) i = (i + 1) & mask;
+		nkeys[i] = k;
+		noffs[i] = m->offsets[j];
+	}
+	free(m->keys);
+	free(m->offsets);
+	m->keys = nkeys;
+	m->offsets = noffs;
+	m->cap = ncap;
+	return 0;
+}
+
+int uc2_ingest_write(const char *archive_path,
+                     const uint8_t *data, size_t len,
+                     int cdc_bits,
+                     struct uc2_ingest_stats *stats)
+{
+	if (!archive_path)
+		return -1;
+	if (cdc_bits <= 0)
+		cdc_bits = DEFAULT_CDC_BITS;
+
+	struct uc2_merkle tree;
+	uc2_merkle_build(&tree, data, len, cdc_bits);
+
+	FILE *f = fopen(archive_path, "wb");
+	if (!f) {
+		uc2_merkle_free(&tree);
+		return -1;
+	}
+
+	/* Header */
+	uint8_t hdr[HEADER_SIZE];
+	memcpy(hdr, INGEST_MAGIC, 8);
+	hdr[8]  = INGEST_VERSION_V2;
+	hdr[9]  = (uint8_t)cdc_bits;
+	hdr[10] = 0;
+	hdr[11] = 0;
+	put_le32(hdr + 12, (uint32_t)tree.nchunks);
+	if (fwrite(hdr, 1, sizeof hdr, f) != sizeof hdr) {
+		fclose(f);
+		uc2_merkle_free(&tree);
+		return -1;
+	}
+
+	/* Reserve manifest entry table; we'll backfill offsets after
+	 * appending the chunk pool. */
+	long manifest_off = ftell(f);
+	size_t manifest_size = (size_t)tree.nchunks * ENTRY_SIZE_V2;
+	if (tree.nchunks > 0) {
+		uint8_t *zero = calloc(manifest_size, 1);
+		if (!zero) {
+			fclose(f);
+			uc2_merkle_free(&tree);
+			return -1;
+		}
+		size_t w = fwrite(zero, 1, manifest_size, f);
+		free(zero);
+		if (w != manifest_size) {
+			fclose(f);
+			uc2_merkle_free(&tree);
+			return -1;
+		}
+	}
+
+	/* Append unique chunks; record offset per hash. */
+	struct dedup_map dmap;
+	if (dedup_map_init(&dmap, tree.nchunks > 16 ? tree.nchunks * 2 : 16) != 0) {
+		fclose(f);
+		uc2_merkle_free(&tree);
+		return -1;
+	}
+
+	uint32_t *entry_offsets = calloc((size_t)tree.nchunks, sizeof *entry_offsets);
+	if (tree.nchunks > 0 && !entry_offsets) {
+		dedup_map_free(&dmap);
+		fclose(f);
+		uc2_merkle_free(&tree);
+		return -1;
+	}
+
+	int new_chunks = 0;
+	uint64_t bytes_appended = 0;
+	uint64_t bytes_saved = 0;
+	for (int i = 0; i < tree.nchunks; i++) {
+		uint64_t h = tree.chunks[i].hash;
+		uint32_t clen = tree.chunks[i].length;
+		uint32_t off;
+		if (dedup_map_get(&dmap, h, &off)) {
+			entry_offsets[i] = off;
+			bytes_saved += clen;
+			continue;
+		}
+		long here = ftell(f);
+		if (here < 0 || (uint64_t)here > 0xFFFFFFFFu) {
+			free(entry_offsets);
+			dedup_map_free(&dmap);
+			fclose(f);
+			uc2_merkle_free(&tree);
+			return -1;
+		}
+		off = (uint32_t)here;
+		entry_offsets[i] = off;
+		if (dedup_map_put(&dmap, h, off) != 0) {
+			free(entry_offsets);
+			dedup_map_free(&dmap);
+			fclose(f);
+			uc2_merkle_free(&tree);
+			return -1;
+		}
+		size_t w = fwrite(data + tree.chunks[i].offset, 1, clen, f);
+		if (w != clen) {
+			free(entry_offsets);
+			dedup_map_free(&dmap);
+			fclose(f);
+			uc2_merkle_free(&tree);
+			return -1;
+		}
+		bytes_appended += clen;
+		new_chunks++;
+	}
+
+	/* Backfill manifest entries. */
+	if (tree.nchunks > 0) {
+		if (fseek(f, manifest_off, SEEK_SET) != 0) {
+			free(entry_offsets);
+			dedup_map_free(&dmap);
+			fclose(f);
+			uc2_merkle_free(&tree);
+			return -1;
+		}
+		for (int i = 0; i < tree.nchunks; i++) {
+			uint8_t rec[ENTRY_SIZE_V2];
+			put_le64(rec, tree.chunks[i].hash);
+			put_le32(rec + 8, tree.chunks[i].length);
+			put_le32(rec + 12, entry_offsets[i]);
+			if (fwrite(rec, 1, sizeof rec, f) != sizeof rec) {
+				free(entry_offsets);
+				dedup_map_free(&dmap);
+				fclose(f);
+				uc2_merkle_free(&tree);
+				return -1;
+			}
+		}
+	}
+
+	free(entry_offsets);
+	dedup_map_free(&dmap);
+
+	if (fclose(f) != 0) {
+		uc2_merkle_free(&tree);
+		return -1;
+	}
+
+	if (stats) {
+		stats->bytes_in     = (uint64_t)len;
+		stats->chunks_total = tree.nchunks;
+		stats->chunks_new   = new_chunks;
+		stats->chunks_dedup = tree.nchunks - new_chunks;
+		stats->bytes_stored = bytes_appended;
+		stats->bytes_saved  = bytes_saved;
+	}
+
+	uc2_merkle_free(&tree);
+	return 0;
+}
+
+/* v1 restore: read manifest, fetch chunks from sidecar blockstore. */
+static int restore_v1(FILE *f, uint32_t nchunks, const char *archive_path,
+                      FILE *out)
+{
+	char *blocks_path = make_blocks_path(archive_path);
+	if (!blocks_path) return -1;
+
+	struct uc2_blockstore bs;
+	if (uc2_blockstore_open(&bs, blocks_path) != 0) {
+		free(blocks_path);
+		return -1;
+	}
+	free(blocks_path);
+
+	uint8_t *buf = NULL;
+	size_t buf_cap = 0;
+	int rc = 0;
+
+	for (uint32_t i = 0; i < nchunks; i++) {
+		uint8_t rec[ENTRY_SIZE_V1];
+		if (fread(rec, 1, sizeof rec, f) != sizeof rec) { rc = -1; break; }
+		uint64_t hash = get_le64(rec);
+		uint32_t clen = get_le32(rec + 8);
+
+		if (clen > buf_cap) {
+			uint8_t *p = realloc(buf, clen);
+			if (!p) { rc = -1; break; }
+			buf = p;
+			buf_cap = clen;
+		}
+
+		int n = uc2_blockstore_read(&bs, hash, buf, clen);
+		if (n != (int)clen) { rc = -1; break; }
+		if (fwrite(buf, 1, clen, out) != clen) { rc = -1; break; }
+	}
+
+	free(buf);
+	uc2_blockstore_close(&bs);
+	return rc;
+}
+
+/* v2 restore: chunk pool is in the same file; manifest entries
+ * carry absolute offsets. */
+static int restore_v2(FILE *f, uint32_t nchunks, FILE *out)
+{
+	/* Read full manifest table first, then seek to each chunk. */
+	if (nchunks == 0)
+		return 0;
+
+	uint8_t *manifest = malloc((size_t)nchunks * ENTRY_SIZE_V2);
+	if (!manifest) return -1;
+	if (fread(manifest, 1, (size_t)nchunks * ENTRY_SIZE_V2, f)
+	    != (size_t)nchunks * ENTRY_SIZE_V2) {
+		free(manifest);
+		return -1;
+	}
+
+	uint8_t *buf = NULL;
+	size_t buf_cap = 0;
+	int rc = 0;
+	for (uint32_t i = 0; i < nchunks; i++) {
+		const uint8_t *rec = manifest + (size_t)i * ENTRY_SIZE_V2;
+		uint32_t clen = get_le32(rec + 8);
+		uint32_t off  = get_le32(rec + 12);
+
+		if (clen > buf_cap) {
+			uint8_t *p = realloc(buf, clen);
+			if (!p) { rc = -1; break; }
+			buf = p;
+			buf_cap = clen;
+		}
+
+		if (fseek(f, (long)off, SEEK_SET) != 0) { rc = -1; break; }
+		if (fread(buf, 1, clen, f) != clen) { rc = -1; break; }
+		if (fwrite(buf, 1, clen, out) != clen) { rc = -1; break; }
+	}
+
+	free(buf);
+	free(manifest);
+	return rc;
+}
+
+int uc2_ingest_restore(const char *archive_path, FILE *out)
+{
+	if (!archive_path || !out)
+		return -1;
+
+	FILE *f = fopen(archive_path, "rb");
+	if (!f)
+		return -1;
+
+	uint8_t hdr[HEADER_SIZE];
+	if (fread(hdr, 1, sizeof hdr, f) != sizeof hdr) {
+		fclose(f);
+		return -1;
+	}
+	if (memcmp(hdr, INGEST_MAGIC, 8) != 0) {
+		fclose(f);
+		return -1;
+	}
+	uint32_t nchunks = get_le32(hdr + 12);
+
+	int rc;
+	if (hdr[8] == INGEST_VERSION_V2) {
+		rc = restore_v2(f, nchunks, out);
+	} else if (hdr[8] == INGEST_VERSION_V1) {
+		rc = restore_v1(f, nchunks, archive_path, out);
+	} else {
+		rc = -1;
+	}
+
+	fclose(f);
+	return rc;
+}
--- a/lib/src/uc2_internal.h
+++ b/lib/src/uc2_internal.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* UC2 format constants and shared types.
+   Used by both the compressor and decompressor. */
+
+#ifndef UC2_INTERNAL_H
+#define UC2_INTERNAL_H
+
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+
+/* Huffman tree parameters */
+enum {
+	MaxCodeBits  = 13,
+	LookupSize   = 1 << MaxCodeBits,   /* 8192 */
+
+	NumByteSym   = 256,
+	NumDistSym   = 60,
+	NumLenSym    = 28,
+	NumSymbols   = NumByteSym + NumDistSym + NumLenSym,   /* 344 */
+
+	NumLoAsciiSym = 28,    /* symbols 4..31 (0-3 are control) */
+	NumHiByteSym  = 128,   /* symbols 128..255 */
+
+	NumDeltaCodes = MaxCodeBits + 1,   /* 14 (code lengths 0..13) */
+	NumExtraCodes = 1,                 /* repeat code */
+	NumLenCodes   = NumDeltaCodes + NumExtraCodes,   /* 15 */
+	RepeatCode    = MaxCodeBits + 1,   /* 14 */
+	MinRepeat     = 6,
+};
+
+/* LZ77 parameters */
+enum {
+	UC2_MAX_DIST  = 125 * 512,       /* 64000 */
+	UC2_READ_SIZE = 512,
+	UC2_BUF_SIZE  = 65536,           /* circular buffer: u16 index wraps */
+	UC2_EOB_MARK  = 125 * 512 + 1,   /* 64001 — end-of-block distance */
+	UC2_MIN_MATCH = 3,
+	UC2_MAX_LEN   = 200,             /* direct match limit */
+	UC2_MAX_XLEN  = 32760,           /* extended match limit */
+};
+
+/* Distance encoding: 60 codes in 4 tiers.
+   tier 0: dist 1..15     (15 codes, 0 extra bits)
+   tier 1: dist 16..255   (15 codes, 4 extra bits)
+   tier 2: dist 256..4095 (15 codes, 8 extra bits)
+   tier 3: dist 4096..64000 (15 codes, 12 extra bits) */
+
+/* Length encoding: 28 codes.
+   0..7:   len 3..10      (0 extra bits)
+   8..15:  len 11..26     (1 extra bit)
+   16..23: len 27..90     (3 extra bits)
+   24:     len 91..154    (6 extra bits)
+   25:     len 155..666   (9 extra bits)
+   26:     len 667..2714  (11 extra bits)
+   27:     len 2715..35482 (15 extra bits) */
+
+/* Delta-to-absolute table for tree decoding (from decompress.c).
+   vval[prev_length][delta_code] = absolute_length */
+extern const u8 vval[NumDeltaCodes][NumDeltaCodes];
+
+/* Inverse: absolute-to-delta table for tree encoding.
+   ivval[prev_length][abs_length] = delta_code */
+extern const u8 ivval[NumDeltaCodes][NumDeltaCodes];
+
+/* Default Huffman code lengths for the first block */
+void uc2_default_lengths(u8 d[NumSymbols]);
+
+/* Little-endian record types */
+typedef struct u16le { u8 b[2]; } u16le;
+typedef struct u32le { u8 b[4]; } u32le;
+
+static inline u16 get16(u16le v) { return v.b[0] | v.b[1] << 8; }
+static inline u32 get32(u32le v) { return v.b[0] | v.b[1] << 8 | v.b[2] << 16 | (u32)v.b[3] << 24; }
+static inline u16le put16(u16 v) { return (u16le){{v & 0xff, v >> 8}}; }
+static inline u32le put32(u32 v) { return (u32le){{v & 0xff, v >> 8 & 0xff, v >> 16 & 0xff, v >> 24}}; }
+
+/* Fletcher checksum (XOR-based, as used by UC2) */
+struct csum { u32 value; };
+
+static inline void csum_init(struct csum *cs) { cs->value = 0xA55A; }
+
+static inline void csum_update(struct csum *cs, const u8 *p, unsigned n)
+{
+	if (!n) return;
+	u32 v = cs->value;
+	const u8 *e = p + n - 1;
+	if (v > 0xffff)
+		v ^= *p++ << 8;
+	while (p < e) {
+		v ^= p[0] | p[1] << 8;
+		p += 2;
+	}
+	v &= 0xffff;
+	if (p == e)
+		v ^= *p | 0x10000;
+	cs->value = v;
+}
+
+static inline u16 csum_get(struct csum *cs) { return (u16)cs->value; }
+
+#endif
--- a/lib/src/uc2_lz4.c
+++ b/lib/src/uc2_lz4.c
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* LZ4-compatible ultra-fast compression.
+ *
+ * Single-probe hash table with 4-byte match minimum.  No hash chains —
+ * each hash slot holds only the most recent position, giving O(1)
+ * match finding at the cost of missing some matches.  This trades
+ * compression ratio for extreme speed. */
+
+#include "uc2/uc2_lz4.h"
+#include <string.h>
+
+#define HASH_BITS 16
+#define HASH_SIZE (1 << HASH_BITS)
+#define MIN_MATCH 4
+#define ML_BITS   4
+#define ML_MASK   ((1 << ML_BITS) - 1)
+#define RUN_BITS  4
+#define RUN_MASK  ((1 << RUN_BITS) - 1)
+
+static uint32_t lz4_hash(const uint8_t *p)
+{
+	uint32_t v = p[0] | ((uint32_t)p[1] << 8) |
+	             ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
+	return (v * 2654435761U) >> (32 - HASH_BITS);
+}
+
+static void write_len(uint8_t **dst, size_t len)
+{
+	while (len >= 255) {
+		*(*dst)++ = 255;
+		len -= 255;
+	}
+	*(*dst)++ = (uint8_t)len;
+}
+
+size_t uc2_lz4_compress(const uint8_t *src, size_t src_len,
+                        uint8_t *dst, size_t dst_cap)
+{
+	if (src_len == 0 || dst_cap < 1) return 0;
+
+	uint32_t htab[HASH_SIZE];
+	memset(htab, 0, sizeof htab);
+
+	const uint8_t *ip = src;
+	const uint8_t *const iend = src + src_len;
+	const uint8_t *const mflimit = iend - MIN_MATCH;
+	const uint8_t *anchor = ip;
+	uint8_t *op = dst;
+	uint8_t *const oend = dst + dst_cap;
+
+	if (src_len < MIN_MATCH + 1) goto emit_last;
+
+	ip++;  /* first byte can't be a match ref */
+
+	for (;;) {
+		/* Find a match */
+		const uint8_t *ref;
+		uint32_t h;
+
+		for (;;) {
+			if (ip > mflimit) goto emit_last;
+			h = lz4_hash(ip);
+			ref = src + htab[h];
+			htab[h] = (uint32_t)(ip - src);
+			if (ref >= src && ip - ref <= 65535 && ip - ref > 0 &&
+			    memcmp(ref, ip, MIN_MATCH) == 0)
+				break;
+			ip++;
+		}
+
+		/* Encode literal run before match */
+		size_t lit_len = (size_t)(ip - anchor);
+		size_t match_len = MIN_MATCH;
+
+		/* Extend match forward */
+		while (ip + match_len < iend && ref[match_len] == ip[match_len])
+			match_len++;
+
+		/* Emit token */
+		if (op + 1 + (lit_len > 14 ? lit_len/255 + 1 : 0) + lit_len + 2 +
+		    (match_len - MIN_MATCH > 14 ? (match_len - MIN_MATCH)/255 + 1 : 0) > oend)
+			return 0;  /* output overflow */
+
+		uint8_t *token = op++;
+		size_t ll = lit_len < 15 ? lit_len : 15;
+		size_t ml = (match_len - MIN_MATCH) < 15 ? (match_len - MIN_MATCH) : 15;
+		*token = (uint8_t)((ll << 4) | ml);
+
+		if (lit_len >= 15)
+			write_len(&op, lit_len - 15);
+		memcpy(op, anchor, lit_len);
+		op += lit_len;
+
+		/* Offset (16-bit LE) */
+		uint16_t offset = (uint16_t)(ip - ref);
+		*op++ = (uint8_t)(offset & 0xFF);
+		*op++ = (uint8_t)(offset >> 8);
+
+		if (match_len - MIN_MATCH >= 15)
+			write_len(&op, match_len - MIN_MATCH - 15);
+
+		ip += match_len;
+		anchor = ip;
+
+		if (ip > mflimit) goto emit_last;
+
+		/* Hash the positions we skipped */
+		htab[lz4_hash(ip - 2)] = (uint32_t)(ip - 2 - src);
+	}
+
+emit_last:;
+	/* Emit final literal run */
+	size_t last_lit = (size_t)(iend - anchor);
+	if (op + 1 + (last_lit > 14 ? last_lit/255 + 1 : 0) + last_lit > oend)
+		return 0;
+
+	uint8_t *token = op++;
+	size_t ll = last_lit < 15 ? last_lit : 15;
+	*token = (uint8_t)(ll << 4);  /* match_len = 0 (no match) */
+	if (last_lit >= 15)
+		write_len(&op, last_lit - 15);
+	memcpy(op, anchor, last_lit);
+	op += last_lit;
+
+	return (size_t)(op - dst);
+}
+
+size_t uc2_lz4_decompress(const uint8_t *src, size_t src_len,
+                          uint8_t *dst, size_t dst_cap)
+{
+	const uint8_t *ip = src;
+	const uint8_t *const iend = src + src_len;
+	uint8_t *op = dst;
+	uint8_t *const oend = dst + dst_cap;
+
+	while (ip < iend) {
+		uint8_t token = *ip++;
+
+		/* Literal length */
+		size_t lit_len = token >> 4;
+		if (lit_len == 15) {
+			uint8_t b;
+			do {
+				if (ip >= iend) return 0;
+				b = *ip++;
+				lit_len += b;
+			} while (b == 255);
+		}
+
+		/* Copy literals */
+		if (ip + lit_len > iend || op + lit_len > oend) return 0;
+		memcpy(op, ip, lit_len);
+		ip += lit_len;
+		op += lit_len;
+
+		if (ip >= iend) break;  /* end of stream (last token has no match) */
+
+		/* Match offset */
+		if (ip + 2 > iend) return 0;
+		uint16_t offset = ip[0] | ((uint16_t)ip[1] << 8);
+		ip += 2;
+		if (offset == 0 || op - dst < offset) return 0;
+
+		/* Match length */
+		size_t match_len = (token & ML_MASK) + MIN_MATCH;
+		if ((token & ML_MASK) == ML_MASK) {
+			uint8_t b;
+			do {
+				if (ip >= iend) return 0;
+				b = *ip++;
+				match_len += b;
+			} while (b == 255);
+		}
+
+		/* Copy match */
+		if (op + match_len > oend) return 0;
+		const uint8_t *ref = op - offset;
+		for (size_t i = 0; i < match_len; i++)
+			op[i] = ref[i];  /* byte-by-byte for overlapping matches */
+		op += match_len;
+	}
+
+	return (size_t)(op - dst);
+}
--- a/lib/src/uc2_merkle.c
+++ b/lib/src/uc2_merkle.c
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Merkle DAG for content-addressable deduplication.
+ *
+ * Each file is split into CDC chunks (Gear hash), each chunk hashed
+ * with FNV-1a 64-bit.  The file's root hash is computed from the
+ * concatenated chunk hashes, forming a single-level Merkle tree.
+ *
+ * Comparison operations find shared chunks between trees, enabling
+ * dedup decisions based on structural content similarity rather than
+ * simple byte-prefix matching. */
+
+#include "uc2/uc2_merkle.h"
+#include "uc2/uc2_cdc.h"
+#include <stdlib.h>
+#include <string.h>
+
+uint64_t uc2_hash64(const uint8_t *data, size_t len)
+{
+	uint64_t h = 14695981039346656037ULL;
+	for (size_t i = 0; i < len; i++) {
+		h ^= data[i];
+		h *= 1099511628211ULL;
+	}
+	return h;
+}
+
+void uc2_merkle_build(struct uc2_merkle *tree,
+                      const uint8_t *data, size_t len, int bits)
+{
+	tree->chunks = NULL;
+	tree->nchunks = 0;
+	tree->capacity = 0;
+	tree->root = 0;
+
+	if (!data || len == 0)
+		return;
+
+	struct uc2_chunker chunker;
+	uc2_chunker_init(&chunker, bits, 0, 0);
+
+	size_t off, clen;
+	int more = 1;
+	while (more) {
+		more = uc2_chunker_next(&chunker, data, len, &off, &clen);
+		if (clen == 0) break;
+
+		if (tree->nchunks >= tree->capacity) {
+			tree->capacity = tree->capacity ? tree->capacity * 2 : 16;
+			tree->chunks = realloc(tree->chunks,
+			                       (size_t)tree->capacity * sizeof *tree->chunks);
+		}
+		struct uc2_chunk *c = &tree->chunks[tree->nchunks++];
+		c->hash = uc2_hash64(data + off, clen);
+		c->offset = (uint32_t)off;
+		c->length = (uint32_t)clen;
+	}
+
+	/* Root hash = hash of concatenated chunk hashes */
+	if (tree->nchunks > 0) {
+		uint8_t *hashbuf = malloc((size_t)tree->nchunks * 8);
+		if (hashbuf) {
+			for (int i = 0; i < tree->nchunks; i++) {
+				uint64_t h = tree->chunks[i].hash;
+				for (int j = 0; j < 8; j++)
+					hashbuf[i * 8 + j] = (uint8_t)(h >> (j * 8));
+			}
+			tree->root = uc2_hash64(hashbuf, (size_t)tree->nchunks * 8);
+			free(hashbuf);
+		}
+	}
+}
+
+int uc2_merkle_common(const struct uc2_merkle *a, const struct uc2_merkle *b)
+{
+	int count = 0;
+	for (int i = 0; i < a->nchunks; i++)
+		for (int j = 0; j < b->nchunks; j++)
+			if (a->chunks[i].hash == b->chunks[j].hash) {
+				count++;
+				break;  /* count each A chunk at most once */
+			}
+	return count;
+}
+
+double uc2_merkle_similarity(const struct uc2_merkle *a,
+                             const struct uc2_merkle *b)
+{
+	if (a->nchunks == 0) return 0.0;
+
+	uint32_t shared_bytes = 0;
+	uint32_t total_bytes = 0;
+	for (int i = 0; i < a->nchunks; i++) {
+		total_bytes += a->chunks[i].length;
+		for (int j = 0; j < b->nchunks; j++)
+			if (a->chunks[i].hash == b->chunks[j].hash) {
+				shared_bytes += a->chunks[i].length;
+				break;
+			}
+	}
+	return total_bytes > 0 ? (double)shared_bytes / total_bytes : 0.0;
+}
+
+void uc2_merkle_free(struct uc2_merkle *tree)
+{
+	free(tree->chunks);
+	tree->chunks = NULL;
+	tree->nchunks = 0;
+	tree->capacity = 0;
+}
--- a/lib/src/uc2_ots.c
+++ b/lib/src/uc2_ots.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* OpenTimestamps proof parser, serializer, walker, and UC2 trailer.
+ *
+ * The walker supports the calendar-path subset of opcodes (APPEND,
+ * PREPEND, SHA256) directly.  Other unary crypto ops (SHA1, RIPEMD160,
+ * KECCAK256) are accepted as structurally valid but flagged as not
+ * locally cryptographically verified; for full validation, extract
+ * the proof and run the standard `ots verify` tool. */
+
+#include "uc2/uc2_ots.h"
+#include "uc2/uc2_sha256.h"
+#include <string.h>
+
+static uint32_t r32le(const uint8_t *p)
+{
+	return (uint32_t)p[0] | ((uint32_t)p[1] << 8) |
+	       ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
+}
+
+static void w32le(uint8_t *p, uint32_t v)
+{
+	p[0] = (uint8_t)v;
+	p[1] = (uint8_t)(v >> 8);
+	p[2] = (uint8_t)(v >> 16);
+	p[3] = (uint8_t)(v >> 24);
+}
+
+int uc2_ots_varint_decode(const uint8_t *in, size_t in_len,
+                          uint64_t *out_value, size_t *consumed)
+{
+	uint64_t v = 0;
+	int shift = 0;
+	size_t i = 0;
+	for (;;) {
+		if (i >= in_len) return UC2_OTS_ERR_TRUNCATED;
+		if (shift >= 64) return UC2_OTS_ERR_OVERFLOW;
+		uint8_t b = in[i++];
+		uint8_t group = b & 0x7f;
+		/* At shift == 63 only payloads of 0 or 1 fit in 64 bits;
+		 * anything larger would silently lose its high bits. */
+		if (shift == 63 && group > 1)
+			return UC2_OTS_ERR_OVERFLOW;
+		v |= (uint64_t)group << shift;
+		if (!(b & 0x80)) {
+			/* Canonical: a multi-byte encoding must not have a zero
+			 * high group, i.e. the last byte cannot be 0x00 unless
+			 * the value is zero in a single byte. */
+			if (i > 1 && b == 0)
+				return UC2_OTS_ERR_NONCANONICAL;
+			*out_value = v;
+			*consumed = i;
+			return UC2_OTS_OK;
+		}
+		shift += 7;
+	}
+}
+
+size_t uc2_ots_varint_encode(uint64_t value, uint8_t out[10])
+{
+	size_t i = 0;
+	while (value >= 0x80) {
+		out[i++] = (uint8_t)(value | 0x80);
+		value >>= 7;
+	}
+	out[i++] = (uint8_t)value;
+	return i;
+}
+
+/* Read a "varbytes" field: varint length + that many bytes. */
+static int read_varbytes(const uint8_t *p, size_t len,
+                         const uint8_t **out_data, size_t *out_data_len,
+                         size_t *consumed)
+{
+	uint64_t n;
+	size_t lc;
+	int rc = uc2_ots_varint_decode(p, len, &n, &lc);
+	if (rc < 0) return rc;
+	if (n > UC2_OTS_MAX_VARBYTES) return UC2_OTS_ERR_TOO_LARGE;
+	if (n > len - lc) return UC2_OTS_ERR_TRUNCATED;
+	*out_data = p + lc;
+	*out_data_len = (size_t)n;
+	*consumed = lc + (size_t)n;
+	return UC2_OTS_OK;
+}
+
+int uc2_ots_parse_file(const uint8_t *file, size_t file_len,
+                       uint8_t *out_hash_op,
+                       const uint8_t **out_leaf_digest,
+                       size_t *out_leaf_digest_len,
+                       const uint8_t **out_body,
+                       size_t *out_body_len)
+{
+	if (file_len < UC2_OTS_HEADER_MAGIC_LEN + 1 + 1 + 32)
+		return UC2_OTS_ERR_TRUNCATED;
+	if (memcmp(file, UC2_OTS_HEADER_MAGIC, UC2_OTS_HEADER_MAGIC_LEN) != 0)
+		return UC2_OTS_ERR_BAD_MAGIC;
+	size_t off = UC2_OTS_HEADER_MAGIC_LEN;
+	if (file[off++] != UC2_OTS_VERSION)
+		return UC2_OTS_ERR_BAD_VERSION;
+	uint8_t hash_op = file[off++];
+	size_t digest_len;
+	switch (hash_op) {
+	case UC2_OTS_OP_SHA1:      digest_len = 20; break;
+	case UC2_OTS_OP_RIPEMD160: digest_len = 20; break;
+	case UC2_OTS_OP_SHA256:    digest_len = 32; break;
+	case UC2_OTS_OP_KECCAK256: digest_len = 32; break;
+	default: return UC2_OTS_ERR_BAD_HASH_OP;
+	}
+	if (file_len - off < digest_len)
+		return UC2_OTS_ERR_TRUNCATED;
+	*out_hash_op = hash_op;
+	*out_leaf_digest = file + off;
+	*out_leaf_digest_len = digest_len;
+	off += digest_len;
+	*out_body = file + off;
+	*out_body_len = file_len - off;
+	return UC2_OTS_OK;
+}
+
+int uc2_ots_serialize_file(uint8_t hash_op,
+                           const uint8_t *leaf_digest, size_t leaf_digest_len,
+                           const uint8_t *body, size_t body_len,
+                           uint8_t *out, size_t out_cap)
+{
+	size_t want_len;
+	switch (hash_op) {
+	case UC2_OTS_OP_SHA1:      want_len = 20; break;
+	case UC2_OTS_OP_RIPEMD160: want_len = 20; break;
+	case UC2_OTS_OP_SHA256:    want_len = 32; break;
+	case UC2_OTS_OP_KECCAK256: want_len = 32; break;
+	default: return UC2_OTS_ERR_BAD_HASH_OP;
+	}
+	if (leaf_digest_len != want_len) return UC2_OTS_ERR_BAD_HASH_OP;
+	size_t need = UC2_OTS_HEADER_MAGIC_LEN + 1 + 1 + leaf_digest_len + body_len;
+	if (need > out_cap) return UC2_OTS_ERR_TRUNCATED;
+	uint8_t *p = out;
+	memcpy(p, UC2_OTS_HEADER_MAGIC, UC2_OTS_HEADER_MAGIC_LEN);
+	p += UC2_OTS_HEADER_MAGIC_LEN;
+	*p++ = UC2_OTS_VERSION;
+	*p++ = hash_op;
+	memcpy(p, leaf_digest, leaf_digest_len);
+	p += leaf_digest_len;
+	memcpy(p, body, body_len);
+	p += body_len;
+	return (int)(p - out);
+}
+
+/* A serialized timestamp is a sequence of "items"; each item is either
+ *   (attestation)  0x00 + tag(8) + varbytes(payload)
+ *   (op)           op-byte + (varbytes operand for binary ops) + child-timestamp
+ *
+ * Within one timestamp node, items are separated by 0xff: every item
+ * except the LAST is preceded by 0xff.  Children timestamps recurse
+ * the same structure with the digest produced by their parent op. */
+
+struct walker {
+	const uint8_t *p, *end;
+	uc2_ots_attest_cb cb;
+	void *ctx;
+	int has_unsupported_op;
+};
+
+/* Apply an op to `digest`, consuming a varbytes operand for binary ops.
+ * Supported ops update the digest in place; unsupported unary ops set
+ * has_unsupported_op and leave the digest unchanged so the structural
+ * walk can continue. */
+static int apply_op(struct walker *w, uint8_t op,
+                    uint8_t *digest, size_t *digest_len)
+{
+	switch (op) {
+	case UC2_OTS_OP_APPEND:
+	case UC2_OTS_OP_PREPEND: {
+		const uint8_t *operand;
+		size_t operand_len, consumed;
+		int rc = read_varbytes(w->p, (size_t)(w->end - w->p),
+		                       &operand, &operand_len, &consumed);
+		if (rc < 0) return rc;
+		w->p += consumed;
+		if (*digest_len + operand_len > UC2_OTS_MAX_DIGEST_LEN)
+			return UC2_OTS_ERR_TOO_LARGE;
+		if (op == UC2_OTS_OP_APPEND) {
+			memcpy(digest + *digest_len, operand, operand_len);
+		} else {
+			memmove(digest + operand_len, digest, *digest_len);
+			memcpy(digest, operand, operand_len);
+		}
+		*digest_len += operand_len;
+		return UC2_OTS_OK;
+	}
+	case UC2_OTS_OP_SHA256: {
+		uint8_t out[UC2_SHA256_OUT_LEN];
+		uc2_sha256_hash(digest, *digest_len, out);
+		memcpy(digest, out, UC2_SHA256_OUT_LEN);
+		*digest_len = UC2_SHA256_OUT_LEN;
+		return UC2_OTS_OK;
+	}
+	case UC2_OTS_OP_SHA1:
+	case UC2_OTS_OP_RIPEMD160:
+	case UC2_OTS_OP_KECCAK256:
+	case UC2_OTS_OP_REVERSE:
+	case UC2_OTS_OP_HEXLIFY:
+		w->has_unsupported_op = 1;
+		return UC2_OTS_OK;
+	default:
+		return UC2_OTS_ERR_BAD_OP;
+	}
+}
+
+static int walk_attestation(struct walker *w,
+                            const uint8_t *digest, size_t digest_len)
+{
+	if (w->end - w->p < UC2_OTS_TAG_LEN) return UC2_OTS_ERR_TRUNCATED;
+	const uint8_t *tag = w->p;
+	w->p += UC2_OTS_TAG_LEN;
+	const uint8_t *payload;
+	size_t payload_len, consumed;
+	int rc = read_varbytes(w->p, (size_t)(w->end - w->p),
+	                       &payload, &payload_len, &consumed);
+	if (rc < 0) return rc;
+	w->p += consumed;
+	if (w->cb && w->cb(w->ctx, tag, payload, payload_len, digest, digest_len))
+		return UC2_OTS_ERR_OVERFLOW;
+	return UC2_OTS_OK;
+}
+
+static int walk_node(struct walker *w,
+                     const uint8_t *digest_in, size_t digest_in_len,
+                     int depth)
+{
+	if (depth >= UC2_OTS_MAX_DEPTH) return UC2_OTS_ERR_DEPTH;
+
+	for (;;) {
+		if (w->p >= w->end) return UC2_OTS_ERR_TRUNCATED;
+		uint8_t b = *w->p++;
+		int is_last = (b != UC2_OTS_BRANCH);
+		if (!is_last) {
+			if (w->p >= w->end) return UC2_OTS_ERR_TRUNCATED;
+			b = *w->p++;
+		}
+
+		if (b == UC2_OTS_ATTESTATION) {
+			int rc = walk_attestation(w, digest_in, digest_in_len);
+			if (rc < 0) return rc;
+		} else {
+			/* Op item: snapshot digest into a local buffer (siblings
+			 * within the same node share the parent digest), apply
+			 * the op, recurse into the sub-timestamp. */
+			uint8_t mut[UC2_OTS_MAX_DIGEST_LEN];
+			size_t mut_len = digest_in_len;
+			memcpy(mut, digest_in, digest_in_len);
+			int rc = apply_op(w, b, mut, &mut_len);
+			if (rc < 0) return rc;
+			rc = walk_node(w, mut, mut_len, depth + 1);
+			if (rc < 0) return rc;
+		}
+
+		if (is_last) return UC2_OTS_OK;
+	}
+}
+
+int uc2_ots_walk(const uint8_t *body, size_t body_len,
+                 const uint8_t *leaf_digest, size_t leaf_digest_len,
+                 uc2_ots_attest_cb cb, void *ctx)
+{
+	if (leaf_digest_len > UC2_OTS_MAX_DIGEST_LEN)
+		return UC2_OTS_ERR_TOO_LARGE;
+
+	struct walker w = { body, body + body_len, cb, ctx, 0 };
+	int rc = walk_node(&w, leaf_digest, leaf_digest_len, 0);
+	if (rc < 0) return rc;
+	if (w.p != w.end) return UC2_OTS_ERR_OVERFLOW;
+	return w.has_unsupported_op ? UC2_OTS_RESULT_STRUCTURAL
+	                            : UC2_OTS_RESULT_VERIFIED;
+}
+
+const char *uc2_ots_attest_name(const uint8_t tag[UC2_OTS_TAG_LEN])
+{
+	if (memcmp(tag, UC2_OTS_TAG_PENDING, UC2_OTS_TAG_LEN) == 0)
+		return "pending";
+	if (memcmp(tag, UC2_OTS_TAG_BITCOIN, UC2_OTS_TAG_LEN) == 0)
+		return "Bitcoin";
+	if (memcmp(tag, UC2_OTS_TAG_LITECOIN, UC2_OTS_TAG_LEN) == 0)
+		return "Litecoin";
+	return 0;
+}
+
+int uc2_ots_trailer_build(uint32_t archive_len,
+                          const uint8_t *proof, size_t proof_len,
+                          uint8_t *out, size_t out_cap)
+{
+	if (proof_len > UC2_OTS_TRAILER_MAX_PROOF)
+		return UC2_OTS_ERR_TOO_LARGE;
+	size_t total = UC2_OTS_TRAILER_OVERHEAD + proof_len;
+	if (total > out_cap) return UC2_OTS_ERR_TRUNCATED;
+	uint8_t *p = out;
+	memcpy(p, UC2_OTS_TRAILER_MAGIC, UC2_OTS_TRAILER_MAGIC_LEN);
+	p += UC2_OTS_TRAILER_MAGIC_LEN;
+	w32le(p, UC2_OTS_TRAILER_VERSION); p += 4;
+	w32le(p, archive_len);             p += 4;
+	w32le(p, (uint32_t)proof_len);     p += 4;
+	memcpy(p, proof, proof_len);       p += proof_len;
+	w32le(p, (uint32_t)proof_len);     p += 4;
+	memcpy(p, UC2_OTS_TRAILER_MAGIC, UC2_OTS_TRAILER_MAGIC_LEN);
+	p += UC2_OTS_TRAILER_MAGIC_LEN;
+	return (int)(p - out);
+}
+
+int uc2_ots_trailer_parse(const uint8_t *file, size_t file_len,
+                          uint32_t *out_archive_len,
+                          const uint8_t **out_proof, size_t *out_proof_len)
+{
+	if (file_len < UC2_OTS_TRAILER_TAIL_LEN) return 1;
+	const uint8_t *back = file + file_len - UC2_OTS_TRAILER_MAGIC_LEN;
+	if (memcmp(back, UC2_OTS_TRAILER_MAGIC, UC2_OTS_TRAILER_MAGIC_LEN) != 0)
+		return 1;
+
+	/* Back magic present: from here on, every check is hard-failed. */
+	uint32_t back_proof_len = r32le(file + file_len - UC2_OTS_TRAILER_TAIL_LEN);
+	if (back_proof_len > UC2_OTS_TRAILER_MAX_PROOF)
+		return UC2_OTS_ERR_TOO_LARGE;
+
+	size_t total = UC2_OTS_TRAILER_OVERHEAD + back_proof_len;
+	if (total > file_len) return UC2_OTS_ERR_TRUNCATED;
+	const uint8_t *trailer_start = file + file_len - total;
+
+	if (memcmp(trailer_start, UC2_OTS_TRAILER_MAGIC, UC2_OTS_TRAILER_MAGIC_LEN) != 0)
+		return UC2_OTS_ERR_BAD_MAGIC;
+
+	uint32_t version    = r32le(trailer_start + UC2_OTS_TRAILER_MAGIC_LEN);
+	uint32_t archive_ln = r32le(trailer_start + UC2_OTS_TRAILER_MAGIC_LEN + 4);
+	uint32_t front_pl   = r32le(trailer_start + UC2_OTS_TRAILER_MAGIC_LEN + 8);
+
+	if (version != UC2_OTS_TRAILER_VERSION) return UC2_OTS_ERR_BAD_VERSION;
+	if (front_pl != back_proof_len) return UC2_OTS_ERR_NONCANONICAL;
+	if ((size_t)archive_ln != (size_t)(trailer_start - file))
+		return UC2_OTS_ERR_OVERFLOW;
+
+	*out_archive_len = archive_ln;
+	*out_proof = trailer_start + UC2_OTS_TRAILER_HEAD_LEN;
+	*out_proof_len = back_proof_len;
+	return UC2_OTS_OK;
+}
--- a/lib/src/uc2_preprocess.c
+++ b/lib/src/uc2_preprocess.c
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Content-aware preprocessing filters. */
+
+#include "uc2/uc2_preprocess.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* --- BCJ (E8/E9 transform for x86) --- */
+
+/* Convert relative CALL (E8) and JMP (E9) addresses to absolute.
+ * The 4-byte displacement after E8/E9 is replaced with an absolute
+ * address relative to position 0.  This normalizes calls to the same
+ * function from different locations, improving LZ77 matching. */
+
+int uc2_bcj_apply(uint8_t *data, size_t len)
+{
+	if (len < 5) return 0;
+	for (size_t i = 0; i + 4 < len; i++) {
+		if (data[i] == 0xE8 || data[i] == 0xE9) {
+			int32_t rel = (int32_t)((uint32_t)data[i+1] | ((uint32_t)data[i+2] << 8) |
+			              ((uint32_t)data[i+3] << 16) | ((uint32_t)data[i+4] << 24));
+			int32_t abs_addr = rel + (int32_t)(i + 5);
+			data[i+1] = (uint8_t)(abs_addr);
+			data[i+2] = (uint8_t)(abs_addr >> 8);
+			data[i+3] = (uint8_t)(abs_addr >> 16);
+			data[i+4] = (uint8_t)(abs_addr >> 24);
+			i += 4;  /* skip the address bytes */
+		}
+	}
+	return 0;
+}
+
+int uc2_bcj_revert(uint8_t *data, size_t len)
+{
+	if (len < 5) return 0;
+	for (size_t i = 0; i + 4 < len; i++) {
+		if (data[i] == 0xE8 || data[i] == 0xE9) {
+			int32_t abs_addr = (int32_t)((uint32_t)data[i+1] | ((uint32_t)data[i+2] << 8) |
+			                   ((uint32_t)data[i+3] << 16) | ((uint32_t)data[i+4] << 24));
+			int32_t rel = abs_addr - (int32_t)(i + 5);
+			data[i+1] = (uint8_t)(rel);
+			data[i+2] = (uint8_t)(rel >> 8);
+			data[i+3] = (uint8_t)(rel >> 16);
+			data[i+4] = (uint8_t)(rel >> 24);
+			i += 4;
+		}
+	}
+	return 0;
+}
+
+/* --- BWT (Burrows-Wheeler Transform) --- */
+
+/* Simple BWT using suffix array (O(n log^2 n) via qsort). */
+
+static const uint8_t *bwt_data;
+static size_t bwt_len;
+
+static int bwt_cmp(const void *a, const void *b)
+{
+	uint32_t ia = *(const uint32_t *)a;
+	uint32_t ib = *(const uint32_t *)b;
+	for (size_t k = 0; k < bwt_len; k++) {
+		uint8_t ca = bwt_data[(ia + k) % bwt_len];
+		uint8_t cb = bwt_data[(ib + k) % bwt_len];
+		if (ca != cb) return (int)ca - (int)cb;
+	}
+	return 0;
+}
+
+int uc2_bwt_apply(const uint8_t *data, size_t len,
+                  uint8_t **out, uint32_t *primary_index)
+{
+	if (len == 0) { *out = NULL; *primary_index = 0; return 0; }
+
+	uint32_t *sa = malloc(len * sizeof(uint32_t));
+	uint8_t *result = malloc(len);
+	if (!sa || !result) { free(sa); free(result); return -1; }
+
+	for (size_t i = 0; i < len; i++) sa[i] = (uint32_t)i;
+	bwt_data = data;
+	bwt_len = len;
+	qsort(sa, len, sizeof(uint32_t), bwt_cmp);
+
+	*primary_index = 0;
+	for (size_t i = 0; i < len; i++) {
+		if (sa[i] == 0) *primary_index = (uint32_t)i;
+		result[i] = data[(sa[i] + len - 1) % len];
+	}
+
+	free(sa);
+	*out = result;
+	return 0;
+}
+
+int uc2_bwt_revert(const uint8_t *data, size_t len,
+                   uint32_t primary_index, uint8_t **out)
+{
+	if (len == 0) { *out = NULL; return 0; }
+
+	uint8_t *result = malloc(len);
+	uint32_t *T = malloc(len * sizeof(uint32_t));
+	if (!result || !T) { free(result); free(T); return -1; }
+
+	/* Build the LF-mapping (Last-to-First column mapping).
+	   T[i] = position in first column corresponding to last column position i. */
+	uint32_t count[256];
+	memset(count, 0, sizeof count);
+	for (size_t i = 0; i < len; i++) count[data[i]]++;
+
+	uint32_t sum = 0;
+	uint32_t start[256];
+	for (int c = 0; c < 256; c++) {
+		start[c] = sum;
+		sum += count[c];
+	}
+
+	/* Reset count for building T */
+	memset(count, 0, sizeof count);
+	for (size_t i = 0; i < len; i++) {
+		T[i] = start[data[i]] + count[data[i]];
+		count[data[i]]++;
+	}
+
+	/* Reconstruct: follow T from primary_index, reading in reverse */
+	uint32_t idx = primary_index;
+	for (size_t i = len; i > 0; i--) {
+		result[i - 1] = data[idx];
+		idx = T[idx];
+	}
+
+	free(T);
+	*out = result;
+	return 0;
+}
+
+/* --- Delta filter --- */
+
+void uc2_delta_filter_apply(uint8_t *data, size_t len, int stride)
+{
+	if (stride < 1) stride = 1;
+	/* Process from end to start to avoid overwriting needed values */
+	for (size_t i = len; i > (size_t)stride; ) {
+		i--;
+		data[i] = (uint8_t)(data[i] - data[i - stride]);
+	}
+}
+
+void uc2_delta_filter_revert(uint8_t *data, size_t len, int stride)
+{
+	if (stride < 1) stride = 1;
+	for (size_t i = (size_t)stride; i < len; i++)
+		data[i] = (uint8_t)(data[i] + data[i - stride]);
+}
+
+/* --- Content detection --- */
+
+int uc2_detect_content(const uint8_t *data, size_t len)
+{
+	if (len < 4) return UC2_CONTENT_BINARY;
+
+	/* Check for x86 executable signatures */
+	if (data[0] == 'M' && data[1] == 'Z')
+		return UC2_CONTENT_X86;  /* DOS/PE executable */
+	if (data[0] == 0x7F && data[1] == 'E' && data[2] == 'L' && data[3] == 'F')
+		return UC2_CONTENT_X86;  /* ELF executable */
+
+	/* Count printable ASCII characters */
+	size_t check = len > 4096 ? 4096 : len;
+	size_t printable = 0;
+	for (size_t i = 0; i < check; i++)
+		if ((data[i] >= 32 && data[i] <= 126) ||
+		    data[i] == '\n' || data[i] == '\r' || data[i] == '\t')
+			printable++;
+
+	if (printable * 100 / check > 85)
+		return UC2_CONTENT_TEXT;
+
+	/* Check for structured data: regular byte-value patterns */
+	if (len >= 64) {
+		size_t zeros = 0;
+		for (size_t i = 0; i < check; i++)
+			if (data[i] == 0) zeros++;
+		if (zeros * 100 / check > 20)
+			return UC2_CONTENT_STRUCT;
+	}
+
+	return UC2_CONTENT_BINARY;
+}
--- a/lib/src/uc2_rans.c
+++ b/lib/src/uc2_rans.c
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* rANS (range Asymmetric Numeral Systems) entropy coder.
+ *
+ * Table-based rANS with 32-bit state.  The state represents a position
+ * in a virtual number line partitioned proportionally to symbol
+ * frequencies.  Encoding maps the state forward (growing), decoding
+ * maps it backward (shrinking).
+ *
+ * Key properties vs Huffman:
+ * - Fractional bit costs: symbols can use e.g. 2.3 bits (not rounded to 3)
+ * - 5-15% better on skewed distributions (many symbols with freq < 2^-N)
+ * - Same O(1) encode/decode per symbol with lookup tables
+ *
+ * Reference: Duda, "Asymmetric Numeral Systems" (2009). */
+
+#include "uc2/uc2_rans.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define RANS_L (1u << 23)  /* lower bound of state range */
+
+void uc2_rans_build_table(struct uc2_rans_table *tab,
+                          const uint32_t *raw_freq, int nsym)
+{
+	if (nsym > UC2_RANS_MAX_SYMS)
+		nsym = UC2_RANS_MAX_SYMS;
+	tab->nsym = nsym;
+
+	/* Sum raw frequencies */
+	uint64_t total = 0;
+	for (int i = 0; i < nsym; i++)
+		total += raw_freq[i];
+
+	if (total == 0) {
+		memset(tab->freq, 0, sizeof tab->freq);
+		memset(tab->cumfreq, 0, sizeof tab->cumfreq);
+		return;
+	}
+
+	/* Scale to PROB_SCALE, ensuring every non-zero symbol gets freq >= 1 */
+	uint32_t assigned = 0;
+	for (int i = 0; i < nsym; i++) {
+		if (raw_freq[i] == 0) {
+			tab->freq[i] = 0;
+		} else {
+			uint32_t f = (uint32_t)((uint64_t)raw_freq[i] * UC2_RANS_PROB_SCALE / total);
+			if (f == 0) f = 1;
+			tab->freq[i] = (uint16_t)f;
+			assigned += f;
+		}
+	}
+
+	/* Adjust to hit exactly PROB_SCALE: add/remove from largest symbol */
+	if (assigned != UC2_RANS_PROB_SCALE) {
+		int largest = 0;
+		for (int i = 1; i < nsym; i++)
+			if (tab->freq[i] > tab->freq[largest])
+				largest = i;
+		int32_t diff = (int32_t)UC2_RANS_PROB_SCALE - (int32_t)assigned;
+		tab->freq[largest] = (uint16_t)((int32_t)tab->freq[largest] + diff);
+	}
+
+	/* Build cumulative frequencies */
+	tab->cumfreq[0] = 0;
+	for (int i = 1; i < nsym; i++)
+		tab->cumfreq[i] = tab->cumfreq[i - 1] + tab->freq[i - 1];
+}
+
+/* --- Encoder --- */
+
+static void enc_grow(struct uc2_rans_enc *enc)
+{
+	size_t newcap = enc->rev_cap ? enc->rev_cap * 2 : 4096;
+	enc->rev_buf = realloc(enc->rev_buf, newcap);
+	enc->rev_cap = newcap;
+}
+
+static void enc_put_byte(struct uc2_rans_enc *enc, uint8_t b)
+{
+	if (enc->rev_pos >= enc->rev_cap)
+		enc_grow(enc);
+	enc->rev_buf[enc->rev_pos++] = b;
+}
+
+void uc2_rans_enc_init(struct uc2_rans_enc *enc,
+                       const struct uc2_rans_table *tab)
+{
+	enc->state = RANS_L;
+	enc->tab = tab;
+	enc->rev_buf = NULL;
+	enc->rev_pos = 0;
+	enc->rev_cap = 0;
+}
+
+void uc2_rans_encode(struct uc2_rans_enc *enc, int sym)
+{
+	uint32_t freq = enc->tab->freq[sym];
+	if (freq == 0) return;  /* skip zero-freq symbols */
+
+	/* Renormalize: output bytes until state is in range */
+	uint32_t upper = ((RANS_L >> UC2_RANS_PROB_BITS) << 8) * freq;
+	while (enc->state >= upper) {
+		enc_put_byte(enc, (uint8_t)(enc->state & 0xFF));
+		enc->state >>= 8;
+	}
+
+	/* Encode: state = (state / freq) * PROB_SCALE + cumfreq + (state % freq) */
+	uint32_t cumfreq = enc->tab->cumfreq[sym];
+	enc->state = ((enc->state / freq) << UC2_RANS_PROB_BITS) +
+	             cumfreq + (enc->state % freq);
+}
+
+size_t uc2_rans_enc_finish(struct uc2_rans_enc *enc, uint8_t **out_data)
+{
+	/* Write final state (4 bytes, little-endian) */
+	for (int i = 0; i < 4; i++) {
+		enc_put_byte(enc, (uint8_t)(enc->state & 0xFF));
+		enc->state >>= 8;
+	}
+
+	/* Reverse the buffer (rANS produces output in reverse) */
+	size_t len = enc->rev_pos;
+	uint8_t *out = malloc(len);
+	if (out) {
+		for (size_t i = 0; i < len; i++)
+			out[i] = enc->rev_buf[len - 1 - i];
+	}
+
+	*out_data = out;
+	return len;
+}
+
+void uc2_rans_enc_free(struct uc2_rans_enc *enc)
+{
+	free(enc->rev_buf);
+	enc->rev_buf = NULL;
+	enc->rev_pos = 0;
+}
+
+/* --- Decoder --- */
+
+void uc2_rans_dec_init(struct uc2_rans_dec *dec,
+                       const struct uc2_rans_table *tab,
+                       const uint8_t *data, size_t len)
+{
+	dec->tab = tab;
+	dec->data = data;
+	dec->len = len;
+	dec->pos = 0;
+
+	/* Build reverse lookup table: cumfreq → symbol */
+	memset(dec->lookup, 0, sizeof dec->lookup);
+	for (int s = 0; s < tab->nsym; s++)
+		for (uint32_t i = tab->cumfreq[s];
+		     i < tab->cumfreq[s] + tab->freq[s] && i < UC2_RANS_PROB_SCALE; i++)
+			dec->lookup[i] = (uint16_t)s;
+
+	/* Read initial state (4 bytes, little-endian) */
+	dec->state = 0;
+	for (int i = 3; i >= 0; i--) {
+		dec->state <<= 8;
+		if (dec->pos < len)
+			dec->state |= data[dec->pos++];
+	}
+}
+
+int uc2_rans_decode(struct uc2_rans_dec *dec)
+{
+	/* Find symbol from state */
+	uint32_t slot = dec->state & (UC2_RANS_PROB_SCALE - 1);
+	int sym = dec->lookup[slot];
+	uint32_t freq = dec->tab->freq[sym];
+	uint32_t cumfreq = dec->tab->cumfreq[sym];
+
+	/* Update state: state = freq * (state >> PROB_BITS) + slot - cumfreq */
+	dec->state = freq * (dec->state >> UC2_RANS_PROB_BITS) + slot - cumfreq;
+
+	/* Renormalize: read bytes to keep state in range */
+	while (dec->state < RANS_L && dec->pos < dec->len) {
+		dec->state = (dec->state << 8) | dec->data[dec->pos++];
+	}
+
+	return sym;
+}
--- a/lib/src/uc2_sha256.c
+++ b/lib/src/uc2_sha256.c
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* SHA-256 (FIPS 180-4). Reference textbook implementation. */
+
+#include "uc2/uc2_sha256.h"
+#include <string.h>
+
+static const uint32_t K[64] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
+static uint32_t rotr32(uint32_t x, int n) { return (x >> n) | (x << (32 - n)); }
+
+static uint32_t r32be(const uint8_t *p)
+{
+	return ((uint32_t)p[0] << 24) | ((uint32_t)p[1] << 16) |
+	       ((uint32_t)p[2] << 8)  | (uint32_t)p[3];
+}
+
+static void w32be(uint8_t *p, uint32_t v)
+{
+	p[0] = (uint8_t)(v >> 24);
+	p[1] = (uint8_t)(v >> 16);
+	p[2] = (uint8_t)(v >> 8);
+	p[3] = (uint8_t)v;
+}
+
+static void compress(uint32_t state[8], const uint8_t block[64])
+{
+	uint32_t w[64];
+	for (int i = 0; i < 16; i++)
+		w[i] = r32be(block + 4 * i);
+	for (int i = 16; i < 64; i++) {
+		uint32_t s0 = rotr32(w[i-15], 7) ^ rotr32(w[i-15], 18) ^ (w[i-15] >> 3);
+		uint32_t s1 = rotr32(w[i-2], 17) ^ rotr32(w[i-2], 19) ^ (w[i-2] >> 10);
+		w[i] = w[i-16] + s0 + w[i-7] + s1;
+	}
+
+	uint32_t a = state[0], b = state[1], c = state[2], d = state[3];
+	uint32_t e = state[4], f = state[5], g = state[6], h = state[7];
+
+	for (int i = 0; i < 64; i++) {
+		uint32_t S1 = rotr32(e, 6) ^ rotr32(e, 11) ^ rotr32(e, 25);
+		uint32_t ch = (e & f) ^ (~e & g);
+		uint32_t t1 = h + S1 + ch + K[i] + w[i];
+		uint32_t S0 = rotr32(a, 2) ^ rotr32(a, 13) ^ rotr32(a, 22);
+		uint32_t mj = (a & b) ^ (a & c) ^ (b & c);
+		uint32_t t2 = S0 + mj;
+		h = g; g = f; f = e; e = d + t1;
+		d = c; c = b; b = a; a = t1 + t2;
+	}
+
+	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
+}
+
+void uc2_sha256_init(struct uc2_sha256 *ctx)
+{
+	ctx->state[0] = 0x6a09e667; ctx->state[1] = 0xbb67ae85;
+	ctx->state[2] = 0x3c6ef372; ctx->state[3] = 0xa54ff53a;
+	ctx->state[4] = 0x510e527f; ctx->state[5] = 0x9b05688c;
+	ctx->state[6] = 0x1f83d9ab; ctx->state[7] = 0x5be0cd19;
+	ctx->bitcount = 0;
+	ctx->buf_len = 0;
+}
+
+void uc2_sha256_update(struct uc2_sha256 *ctx, const void *data, size_t len)
+{
+	const uint8_t *p = data;
+	ctx->bitcount += (uint64_t)len * 8;
+	if (ctx->buf_len) {
+		size_t take = UC2_SHA256_BLOCK_LEN - ctx->buf_len;
+		if (take > len) take = len;
+		memcpy(ctx->buf + ctx->buf_len, p, take);
+		ctx->buf_len += take;
+		p += take;
+		len -= take;
+		if (ctx->buf_len == UC2_SHA256_BLOCK_LEN) {
+			compress(ctx->state, ctx->buf);
+			ctx->buf_len = 0;
+		}
+	}
+	while (len >= UC2_SHA256_BLOCK_LEN) {
+		compress(ctx->state, p);
+		p += UC2_SHA256_BLOCK_LEN;
+		len -= UC2_SHA256_BLOCK_LEN;
+	}
+	if (len) {
+		memcpy(ctx->buf, p, len);
+		ctx->buf_len = len;
+	}
+}
+
+void uc2_sha256_final(struct uc2_sha256 *ctx, uint8_t out[UC2_SHA256_OUT_LEN])
+{
+	uint64_t bits = ctx->bitcount;
+	ctx->buf[ctx->buf_len++] = 0x80;
+	if (ctx->buf_len > 56) {
+		memset(ctx->buf + ctx->buf_len, 0, UC2_SHA256_BLOCK_LEN - ctx->buf_len);
+		compress(ctx->state, ctx->buf);
+		ctx->buf_len = 0;
+	}
+	memset(ctx->buf + ctx->buf_len, 0, 56 - ctx->buf_len);
+	for (int i = 0; i < 8; i++)
+		ctx->buf[56 + i] = (uint8_t)(bits >> (56 - 8 * i));
+	compress(ctx->state, ctx->buf);
+
+	for (int i = 0; i < 8; i++)
+		w32be(out + 4 * i, ctx->state[i]);
+}
+
+void uc2_sha256_hash(const void *data, size_t len, uint8_t out[UC2_SHA256_OUT_LEN])
+{
+	struct uc2_sha256 ctx;
+	uc2_sha256_init(&ctx);
+	uc2_sha256_update(&ctx, data, len);
+	uc2_sha256_final(&ctx, out);
+}
--- a/lib/src/uc2_simhash.c
+++ b/lib/src/uc2_simhash.c
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* Near-duplicate detection via SimHash.
+ *
+ * Algorithm: extract overlapping 4-byte shingles from the input,
+ * hash each with FNV-1a 64-bit, then for each bit position, count
+ * how many hashes have that bit set vs clear.  The final SimHash
+ * bit is 1 if the majority of shingle hashes had 1 in that position.
+ *
+ * This gives a locality-sensitive hash: similar inputs produce
+ * fingerprints with small Hamming distance. */
+
+#include "uc2/uc2_simhash.h"
+
+static uint64_t fnv1a_64(const uint8_t *data, size_t len)
+{
+	uint64_t h = 14695981039346656037ULL;
+	for (size_t i = 0; i < len; i++) {
+		h ^= data[i];
+		h *= 1099511628211ULL;
+	}
+	return h;
+}
+
+uint64_t uc2_simhash(const uint8_t *data, size_t len)
+{
+	if (len < 4) {
+		/* Too short for shingles: just hash directly */
+		return fnv1a_64(data, len);
+	}
+
+	/* Accumulate bit votes: positive = more 1s, negative = more 0s */
+	int32_t votes[64];
+	for (int i = 0; i < 64; i++)
+		votes[i] = 0;
+
+	/* Slide 4-byte shingles */
+	size_t nshingles = len - 3;
+	for (size_t i = 0; i < nshingles; i++) {
+		uint64_t h = fnv1a_64(data + i, 4);
+		for (int b = 0; b < 64; b++) {
+			if (h & ((uint64_t)1 << b))
+				votes[b]++;
+			else
+				votes[b]--;
+		}
+	}
+
+	/* Majority vote */
+	uint64_t result = 0;
+	for (int b = 0; b < 64; b++)
+		if (votes[b] > 0)
+			result |= (uint64_t)1 << b;
+
+	return result;
+}
+
+int uc2_hamming(uint64_t a, uint64_t b)
+{
+	uint64_t x = a ^ b;
+	int count = 0;
+	while (x) {
+		count++;
+		x &= x - 1;  /* clear lowest set bit */
+	}
+	return count;
+}
--- a/lib/src/uc2_tables.c
+++ b/lib/src/uc2_tables.c
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* UC2 shared tables: Huffman delta coding and default tree lengths. */
+
+#include "uc2_internal.h"
+
+/* Delta-to-absolute lookup: vval[prev][delta_code] = absolute_length.
+   Used by the decompressor's ht_dec(). */
+const u8 vval[NumDeltaCodes][NumDeltaCodes] = {
+	{ 0,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
+	{ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0},
+	{ 2, 1, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0},
+	{ 3, 2, 4, 1, 5, 6, 7, 8, 9,10,11,12,13, 0},
+	{ 4, 3, 5, 2, 6, 1, 7, 8, 9,10,11,12,13, 0},
+	{ 5, 4, 6, 3, 7, 2, 8, 1, 9,10,11,12,13, 0},
+	{ 6, 5, 7, 4, 8, 3, 9, 2,10, 1,11,12,13, 0},
+	{ 7, 6, 8, 5, 9, 4,10, 3,11, 2,12, 1,13, 0},
+	{ 8, 7, 9, 6,10, 5,11, 4,12, 3,13, 2, 0, 1},
+	{ 9, 8,10, 7,11, 6,12, 5,13, 4, 0, 3, 2, 1},
+	{10, 9,11, 8,12, 7,13, 6, 0, 5, 4, 3, 2, 1},
+	{11,10,12, 9,13, 8, 0, 7, 6, 5, 4, 3, 2, 1},
+	{12,11,13,10, 0, 9, 8, 7, 6, 5, 4, 3, 2, 1},
+	{13,12, 0,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1}
+};
+
+/* Inverse delta table: ivval[prev][absolute] = delta_code.
+   Used by the compressor's tree encoder. */
+const u8 ivval[NumDeltaCodes][NumDeltaCodes] = {
+	{ 0,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
+	{13, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12},
+	{13, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12},
+	{13, 3, 1, 0, 2, 4, 5, 6, 7, 8, 9,10,11,12},
+	{13, 5, 3, 1, 0, 2, 4, 6, 7, 8, 9,10,11,12},
+	{13, 7, 5, 3, 1, 0, 2, 4, 6, 8, 9,10,11,12},
+	{13, 9, 7, 5, 3, 1, 0, 2, 4, 6, 8,10,11,12},
+	{13,11, 9, 7, 5, 3, 1, 0, 2, 4, 6, 8,10,12},
+	{12,13,11, 9, 7, 5, 3, 1, 0, 2, 4, 6, 8,10},
+	{10,13,12,11, 9, 7, 5, 3, 1, 0, 2, 4, 6, 8},
+	{ 8,13,12,11,10, 9, 7, 5, 3, 1, 0, 2, 4, 6},
+	{ 6,13,12,11,10, 9, 8, 7, 5, 3, 1, 0, 2, 4},
+	{ 4,13,12,11,10, 9, 8, 7, 6, 5, 3, 1, 0, 2},
+	{ 2,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 1, 0}
+};
+
+void uc2_default_lengths(u8 d[NumSymbols])
+{
+	static const u8 rle[] = {
+		10,9, 1,7, 1,9, 1,7, 19,9, 1,7, 13,8, 1,7, 11,8, 1,7,
+		33,8, 1,7, 35,8, 128,10, 16,6, 12,7, 6,8, 10,9, 16,10,
+		9,4, 9,5, 10,6, 0
+	};
+	const u8 *s = rle;
+	u8 n = s[0];
+	do {
+		u8 v = s[1];
+		s += 2;
+		do { *d++ = v; } while (--n);
+		n = *s;
+	} while (n);
+}
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,168 @@
+# UC2 tests
+
+add_executable(test_identify src/test_identify.c)
+target_link_libraries(test_identify PRIVATE uc2)
+target_include_directories(test_identify PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_identify PRIVATE c_std_99)
+add_test(NAME identify COMMAND test_identify)
+
+add_executable(test_extract src/test_extract.c)
+target_link_libraries(test_extract PRIVATE uc2)
+target_include_directories(test_extract PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_extract PRIVATE c_std_99)
+add_test(NAME extract COMMAND test_extract
+    "${CMAKE_CURRENT_SOURCE_DIR}/archives"
+    "${CMAKE_CURRENT_SOURCE_DIR}/corpus"
+)
+
+add_executable(test_roundtrip src/test_roundtrip.c)
+target_link_libraries(test_roundtrip PRIVATE uc2)
+target_include_directories(test_roundtrip PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_roundtrip PRIVATE c_std_99)
+add_test(NAME roundtrip COMMAND test_roundtrip)
+
+# CLI create/extract round-trip test
+add_test(NAME cli_create
+    COMMAND ${CMAKE_COMMAND}
+        -DUC2_CLI=$<TARGET_FILE:uc2-cli>
+        -DTEST_DIR=${CMAKE_CURRENT_BINARY_DIR}/cli_test
+        -P ${CMAKE_CURRENT_SOURCE_DIR}/test_cli_create.cmake
+)
+
+# CLI master-block deduplication round-trip test
+add_test(NAME cli_master
+    COMMAND ${CMAKE_COMMAND}
+        -DUC2_CLI=$<TARGET_FILE:uc2-cli>
+        -DTEST_DIR=${CMAKE_CURRENT_BINARY_DIR}/cli_master_test
+        -P ${CMAKE_CURRENT_SOURCE_DIR}/test_cli_master.cmake
+)
+
+# CLI >64KB round-trip test (sliding-window edge regression, git-bug d747658)
+add_test(NAME cli_bigfile
+    COMMAND ${CMAKE_COMMAND}
+        -DUC2_CLI=$<TARGET_FILE:uc2-cli>
+        -DTEST_DIR=${CMAKE_CURRENT_BINARY_DIR}/cli_bigfile_test
+        -P ${CMAKE_CURRENT_SOURCE_DIR}/test_cli_bigfile.cmake
+)
+
+# CLI directory archival round-trip test
+add_test(NAME cli_dirs
+    COMMAND ${CMAKE_COMMAND}
+        -DUC2_CLI=$<TARGET_FILE:uc2-cli>
+        -DTEST_DIR=${CMAKE_CURRENT_BINARY_DIR}/cli_dirs_test
+        -P ${CMAKE_CURRENT_SOURCE_DIR}/test_cli_dirs.cmake
+)
+
+add_executable(test_cdc src/test_cdc.c)
+target_link_libraries(test_cdc PRIVATE uc2)
+target_include_directories(test_cdc PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_cdc PRIVATE c_std_99)
+add_test(NAME cdc COMMAND test_cdc)
+
+add_executable(test_merkle src/test_merkle.c)
+if(WIN32)
+    target_link_libraries(test_merkle PRIVATE uc2)
+else()
+    target_link_libraries(test_merkle PRIVATE uc2 m)
+endif()
+target_include_directories(test_merkle PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_merkle PRIVATE c_std_99)
+add_test(NAME merkle COMMAND test_merkle)
+
+add_executable(test_blockstore src/test_blockstore.c)
+target_link_libraries(test_blockstore PRIVATE uc2)
+target_include_directories(test_blockstore PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_blockstore PRIVATE c_std_99)
+add_test(NAME blockstore COMMAND test_blockstore)
+
+add_executable(test_simhash src/test_simhash.c)
+target_link_libraries(test_simhash PRIVATE uc2)
+target_include_directories(test_simhash PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_simhash PRIVATE c_std_99)
+add_test(NAME simhash COMMAND test_simhash)
+
+add_executable(test_delta src/test_delta.c)
+target_link_libraries(test_delta PRIVATE uc2)
+target_include_directories(test_delta PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_delta PRIVATE c_std_99)
+add_test(NAME delta COMMAND test_delta)
+
+add_executable(test_rans src/test_rans.c)
+if(WIN32)
+    target_link_libraries(test_rans PRIVATE uc2)
+else()
+    target_link_libraries(test_rans PRIVATE uc2 m)
+endif()
+target_include_directories(test_rans PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_rans PRIVATE c_std_99)
+add_test(NAME rans COMMAND test_rans)
+
+add_executable(test_dict src/test_dict.c)
+target_link_libraries(test_dict PRIVATE uc2)
+target_include_directories(test_dict PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_dict PRIVATE c_std_99)
+add_test(NAME dict COMMAND test_dict)
+
+add_executable(test_preprocess src/test_preprocess.c)
+target_link_libraries(test_preprocess PRIVATE uc2)
+target_include_directories(test_preprocess PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_preprocess PRIVATE c_std_99)
+add_test(NAME preprocess COMMAND test_preprocess)
+
+add_executable(test_lz4 src/test_lz4.c)
+target_link_libraries(test_lz4 PRIVATE uc2)
+target_include_directories(test_lz4 PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_lz4 PRIVATE c_std_99)
+add_test(NAME lz4 COMMAND test_lz4)
+
+add_executable(test_blake3 src/test_blake3.c)
+target_link_libraries(test_blake3 PRIVATE uc2)
+target_include_directories(test_blake3 PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_blake3 PRIVATE c_std_99)
+add_test(NAME blake3 COMMAND test_blake3)
+
+add_executable(test_sha256 src/test_sha256.c)
+target_link_libraries(test_sha256 PRIVATE uc2)
+target_include_directories(test_sha256 PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_sha256 PRIVATE c_std_99)
+add_test(NAME sha256 COMMAND test_sha256)
+
+add_executable(test_ots src/test_ots.c)
+target_link_libraries(test_ots PRIVATE uc2)
+target_include_directories(test_ots PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_ots PRIVATE c_std_99)
+add_test(NAME ots COMMAND test_ots)
+
+add_executable(test_ingest src/test_ingest.c)
+target_link_libraries(test_ingest PRIVATE uc2)
+target_include_directories(test_ingest PRIVATE "${PROJECT_BINARY_DIR}/lib")
+target_compile_features(test_ingest PRIVATE c_std_99)
+add_test(NAME ingest COMMAND test_ingest)
+
+# Optional cross-check: validates uc2 .ots output against the python-opentimestamps
+# reference parser.  Skipped (return code 77) when opentimestamps is not installed.
+find_package(Python3 COMPONENTS Interpreter)
+if(Python3_Interpreter_FOUND)
+    add_test(NAME ots_cross_check
+        COMMAND ${Python3_EXECUTABLE}
+            ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cross_check_ots.py
+            $<TARGET_FILE:uc2-cli>
+            ${CMAKE_CURRENT_BINARY_DIR}/ots_cross_check
+    )
+    set_tests_properties(ots_cross_check PROPERTIES
+        SKIP_RETURN_CODE 77
+        LABELS "optional"
+    )
+endif()
+
+# Cross-tool round-trip: UC2 v3 <-> original uc2pro.exe via DOSBox-X
+add_test(NAME roundtrip_dosbox
+    COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/scripts/roundtrip_dosbox.sh
+        $<TARGET_FILE:uc2-cli>
+        ${CMAKE_CURRENT_SOURCE_DIR}/../original/UC2_source/uc2pro.exe
+        ${CMAKE_CURRENT_SOURCE_DIR}/corpus
+)
+set_tests_properties(roundtrip_dosbox PROPERTIES
+    LABELS "dosbox"
+    TIMEOUT 1200
+)
--- a/tests/archives/basic.uc2
+++ b/tests/archives/basic.uc2
--- a/tests/archives/empty.uc2
+++ b/tests/archives/empty.uc2
--- a/tests/archives/random.uc2
+++ b/tests/archives/random.uc2
--- a/tests/archives/single.uc2
+++ b/tests/archives/single.uc2
--- a/tests/archives/zeros.uc2
+++ b/tests/archives/zeros.uc2
--- a/tests/corpus/allbytes.bin
+++ b/tests/corpus/allbytes.bin
--- a/tests/corpus/empty.dat
+++ b/tests/corpus/empty.dat
--- a/tests/corpus/hello.txt
+++ b/tests/corpus/hello.txt
@@ -0,0 +1 @@
+Hello, World!
--- a/tests/corpus/random.bin
+++ b/tests/corpus/random.bin
--- a/tests/corpus/textfile.txt
+++ b/tests/corpus/textfile.txt
@@ -0,0 +1,30 @@
+UltraCompressor II was a DOS-era archiver created by Nico de Vries between
+1992 and 1996.  It was notable for its advanced deduplication system called
+"master blocks", file versioning within archives, and competitive compression
+ratios on the hardware of its day.
+
+The archiver used an LZ77 sliding-window compressor with Huffman entropy
+coding.  The algorithm operates on a 64KB circular buffer with hash-chain
+match finding.  Matches of 3 to 32760 bytes are supported, with lazy
+evaluation to find better matches at adjacent positions.
+
+Huffman trees are serialized using delta coding against the previous block's
+tree, with a nested Huffman code for the delta symbols.  This is remarkably
+efficient for typical data where consecutive blocks have similar symbol
+distributions.
+
+The deduplication system works by identifying common data blocks across files
+and storing them only once as "master blocks".  When a file's compressed data
+matches an existing master, only a reference is stored.  This was ahead of its
+time -- modern tools like borg and restic use similar content-defined chunking.
+
+UC2 v3.0.0 is a cross-platform revival of this archiver, built on Jan
+Bobrowski's clean-room portable decompressor (libunuc2).  The project brings
+UC2 back as a modern, portable C99 tool that runs on Linux, macOS, Windows,
+and even DOS via DJGPP cross-compilation.
+
+This text file serves as part of the test corpus for verifying the extraction
+pipeline.  It contains enough English prose to exercise the typical symbol
+distribution paths in the decompressor, including the Huffman tree generation
+and the LZ77 back-reference matching for repeated phrases like "master blocks"
+and "compression" which appear multiple times.
--- a/tests/corpus/zeros.bin
+++ b/tests/corpus/zeros.bin
--- a/tests/scripts/bitdump.py
+++ b/tests/scripts/bitdump.py
@@ -0,0 +1,392 @@
+#!/usr/bin/env python3
+"""Decode and annotate UC2 compressed bitstreams for comparison.
+
+Reads a UC2 archive, locates either the cdir or a file data section,
+and decodes the LZ77+Huffman bitstream symbol by symbol using the
+exact same algorithm as the Bobrowski decompressor.
+
+Usage:
+    python3 bitdump.py archive.uc2 [--cdir | --file N] [--max-symbols 100]
+"""
+
+import struct
+import sys
+import argparse
+
+# UC2 constants
+MaxCodeBits = 13
+NumByteSym = 256
+NumDistSym = 60
+NumLenSym = 28
+NumSymbols = NumByteSym + NumDistSym + NumLenSym  # 344
+NumDeltaCodes = 14
+RepeatCode = 14
+MinRepeat = 6
+EOB_MARK = 64001
+
+# Default tree lengths (BasePrev from TREEENC.CPP)
+DEFAULT_LENGTHS = [0] * NumSymbols
+# Symbols 0..31: code length 9 (except 10,12,32 = 7)
+for i in range(32):
+    DEFAULT_LENGTHS[i] = 9
+DEFAULT_LENGTHS[10] = 7
+DEFAULT_LENGTHS[12] = 7
+DEFAULT_LENGTHS[32] = 7
+# 33..127: 8 (except 46,58,92 = 7)
+for i in range(33, 128):
+    DEFAULT_LENGTHS[i] = 8
+DEFAULT_LENGTHS[46] = 7
+DEFAULT_LENGTHS[58] = 7
+DEFAULT_LENGTHS[92] = 7
+# 128..255: 10
+for i in range(128, 256):
+    DEFAULT_LENGTHS[i] = 10
+# 256..271: 6
+for i in range(256, 272):
+    DEFAULT_LENGTHS[i] = 6
+# 272..283: 7
+for i in range(272, 284):
+    DEFAULT_LENGTHS[i] = 7
+# 284..289: 8
+for i in range(284, 290):
+    DEFAULT_LENGTHS[i] = 8
+# 290..299: 9
+for i in range(290, 300):
+    DEFAULT_LENGTHS[i] = 9
+# 300..315: 10
+for i in range(300, 316):
+    DEFAULT_LENGTHS[i] = 10
+# 316..324: 4
+for i in range(316, 325):
+    DEFAULT_LENGTHS[i] = 4
+# 325..333: 5
+for i in range(325, 334):
+    DEFAULT_LENGTHS[i] = 5
+# 334..343: 6
+for i in range(334, 344):
+    DEFAULT_LENGTHS[i] = 6
+
+# vval table (delta-to-absolute)
+VVAL = [
+    [0,13,12,11,10,9,8,7,6,5,4,3,2,1],
+    [1,2,3,4,5,6,7,8,9,10,11,12,13,0],
+    [2,1,3,4,5,6,7,8,9,10,11,12,13,0],
+    [3,2,4,1,5,6,7,8,9,10,11,12,13,0],
+    [4,3,5,2,6,1,7,8,9,10,11,12,13,0],
+    [5,4,6,3,7,2,8,1,9,10,11,12,13,0],
+    [6,5,7,4,8,3,9,2,10,1,11,12,13,0],
+    [7,6,8,5,9,4,10,3,11,2,12,1,13,0],
+    [8,7,9,6,10,5,11,4,12,3,13,2,0,1],
+    [9,8,10,7,11,6,12,5,13,4,0,3,2,1],
+    [10,9,11,8,12,7,13,6,0,5,4,3,2,1],
+    [11,10,12,9,13,8,0,7,6,5,4,3,2,1],
+    [12,11,13,10,0,9,8,7,6,5,4,3,2,1],
+    [13,12,0,11,10,9,8,7,6,5,4,3,2,1],
+]
+
+# Distance decoding tables
+DIST_CODES = []
+for i in range(15):
+    DIST_CODES.append((i + 1, 0))        # dist 1-15, 0 extra
+for i in range(15):
+    DIST_CODES.append((16 + i * 16, 4))  # dist 16-240, 4 extra
+for i in range(15):
+    DIST_CODES.append((256 + i * 256, 8))  # dist 256-3840, 8 extra
+for i in range(15):
+    DIST_CODES.append((4096 + i * 4096, 12))  # dist 4096-61440, 12 extra
+
+# Length decoding tables
+LEN_CODES = []
+for i in range(8):
+    LEN_CODES.append((3 + i, 0))
+for i in range(8):
+    LEN_CODES.append((11 + i * 2, 1))
+for i in range(8):
+    LEN_CODES.append((27 + i * 8, 3))
+LEN_CODES.append((91, 6))
+LEN_CODES.append((155, 9))
+LEN_CODES.append((667, 11))
+LEN_CODES.append((2715, 15))
+
+
+class BitReader:
+    def __init__(self, data, offset):
+        self.data = data
+        self.byte_pos = offset
+        self.bits = 0
+        self.have = 0
+        self.total_bits_read = 0
+        self.exhausted = False
+
+    def _fill(self):
+        if self.byte_pos + 1 < len(self.data):
+            lo = self.data[self.byte_pos]
+            hi = self.data[self.byte_pos + 1]
+            self.bits = (self.bits << 16) | lo | (hi << 8)
+            self.have += 16
+            self.byte_pos += 2
+        else:
+            self.exhausted = True
+
+    def peek(self, n):
+        while self.have < n:
+            if self.exhausted:
+                return (self.bits << (n - self.have)) & ((1 << n) - 1)
+            self._fill()
+        return (self.bits >> (self.have - n)) & ((1 << n) - 1)
+
+    def get(self, n):
+        v = self.peek(n)
+        self.have -= n
+        self.total_bits_read += n
+        return v
+
+
+def build_decode_table(lengths, nsym):
+    """Build 13-bit lookup table from code lengths."""
+    table = [None] * (1 << MaxCodeBits)
+    code = 0
+    for bit_len in range(1, MaxCodeBits + 1):
+        for sym in range(nsym):
+            if lengths[sym] == bit_len:
+                prefix = code << (MaxCodeBits - bit_len)
+                count = 1 << (MaxCodeBits - bit_len)
+                for j in range(count):
+                    table[prefix + j] = (sym, bit_len)
+                code += 1
+        code <<= 1
+    return table
+
+
+def huff_decode(br, table):
+    """Decode one Huffman symbol."""
+    idx = br.peek(MaxCodeBits)
+    entry = table[idx]
+    if entry is None:
+        return None, 0
+    sym, bits = entry
+    br.get(bits)
+    return sym, bits
+
+
+def decode_tree(br, symprev):
+    """Decode Huffman tree from bitstream."""
+    tree_changed = br.get(1)
+    if not tree_changed:
+        lengths = list(DEFAULT_LENGTHS)
+        for i in range(NumSymbols):
+            symprev[i] = lengths[i]
+        return lengths, "default"
+
+    t = br.get(2)
+    has_lo = t & 1
+    has_hi = (t >> 1) & 1
+
+    # Read tree-encoding tree (15 x 3 bits)
+    tlengths = [br.get(3) for _ in range(15)]
+
+    # Build meta-tree decode table
+    meta_table = build_decode_table(tlengths, 15)
+
+    # Compute stream size
+    stream_size = NumSymbols
+    if not has_lo:
+        stream_size -= 28  # skip symbols 4..31
+    if not has_hi:
+        stream_size -= 128  # skip symbols 128..255
+
+    # Decode delta stream with RLE
+    stream = []
+    val = 0
+    while len(stream) < stream_size:
+        sym, _ = huff_decode(br, meta_table)
+        if sym == RepeatCode:
+            c, _ = huff_decode(br, meta_table)
+            count = c + MinRepeat - 1
+            stream.extend([val] * count)
+        else:
+            val = sym
+            stream.append(sym)
+
+    # Convert delta to absolute lengths
+    lengths = [0] * NumSymbols
+    si = 0
+    if has_lo:
+        for i in range(32):
+            lengths[i] = VVAL[symprev[i]][stream[si]]
+            si += 1
+    else:
+        lengths[9] = VVAL[symprev[9]][stream[si]]; si += 1
+        lengths[10] = VVAL[symprev[10]][stream[si]]; si += 1
+        lengths[12] = VVAL[symprev[12]][stream[si]]; si += 1
+        lengths[13] = VVAL[symprev[13]][stream[si]]; si += 1
+
+    for i in range(32, 128):
+        lengths[i] = VVAL[symprev[i]][stream[si]]
+        si += 1
+
+    if has_hi:
+        for i in range(128, 256):
+            lengths[i] = VVAL[symprev[i]][stream[si]]
+            si += 1
+
+    for i in range(256, 344):
+        lengths[i] = VVAL[symprev[i]][stream[si]]
+        si += 1
+
+    for i in range(NumSymbols):
+        symprev[i] = lengths[i]
+
+    return lengths, f"custom(t={t})"
+
+
+def decode_block(br, bd_table, l_table, max_symbols):
+    """Decode LZ77 symbols from one block."""
+    symbols = []
+    max_bits = (len(br.data) - br.byte_pos + br.have) * 8 + 1000
+    start_bits = br.total_bits_read
+    while len(symbols) < max_symbols:
+        if br.total_bits_read - start_bits > max_bits:
+            symbols.append(("ERROR", "exceeded bit limit"))
+            break
+        sym, _ = huff_decode(br, bd_table)
+        if sym is None:
+            symbols.append(("ERROR", f"invalid Huffman code at bit {br.total_bits_read}"))
+            break
+
+        if sym < NumByteSym:
+            symbols.append(("LIT", sym))
+        else:
+            dsym = sym - NumByteSym
+            if dsym >= NumDistSym:
+                symbols.append(("ERROR", f"dist sym {dsym} out of range"))
+                break
+            base, extra_bits = DIST_CODES[dsym]
+            dist = base
+            if extra_bits:
+                dist += br.get(extra_bits)
+
+            if dist == EOB_MARK:
+                # Read length (should be 3)
+                lsym, _ = huff_decode(br, l_table)
+                lbase, lextra = LEN_CODES[lsym]
+                length = lbase + (br.get(lextra) if lextra else 0)
+                symbols.append(("EOB", f"dist={dist} len={length}"))
+                break
+
+            lsym, _ = huff_decode(br, l_table)
+            if lsym is None:
+                symbols.append(("ERROR", "invalid length Huffman code"))
+                break
+            lbase, lextra = LEN_CODES[lsym]
+            length = lbase + (br.get(lextra) if lextra else 0)
+            symbols.append(("MATCH", f"dist={dist} len={length}"))
+
+    return symbols
+
+
+def analyze_archive(path, section, max_symbols):
+    with open(path, "rb") as f:
+        data = f.read()
+
+    magic = data[0:4]
+    if magic != b'UC2\x1a':
+        print(f"Not a UC2 archive: {magic}")
+        return
+
+    complen = struct.unpack_from('<I', data, 4)[0]
+    cdir_off = struct.unpack_from('<I', data, 17)[0]
+    fletch = struct.unpack_from('<H', data, 21)[0]
+    ver_made = struct.unpack_from('<H', data, 24)[0]
+    ver_need = struct.unpack_from('<H', data, 26)[0]
+
+    print(f"Archive: {path} ({len(data)} bytes)")
+    print(f"  complen={complen}, total={complen+13}")
+    print(f"  cdir_offset={cdir_off}, fletcher={fletch:#06x}")
+    print(f"  versionMade={ver_made}, versionNeeded={ver_need}")
+
+    if section == 'cdir':
+        crec = data[cdir_off:cdir_off + 10]
+        csize, method, master = struct.unpack_from('<IHI', crec)
+        print(f"  cdir COMPRESS: csize={csize}, method={method}, master={master}")
+        stream_start = cdir_off + 10
+    else:
+        stream_start = 29
+        print(f"  File data starts at offset {stream_start}")
+
+    print()
+
+    br = BitReader(data, stream_start)
+    symprev = list(DEFAULT_LENGTHS)
+    total_decoded = 0
+
+    block_num = 0
+    while total_decoded < max_symbols:
+        bit_pos = br.total_bits_read
+        block_present = br.get(1)
+        print(f"Block {block_num} at bit {bit_pos}: present={block_present}")
+        if not block_present:
+            print("  End of stream")
+            break
+
+        lengths, tree_desc = decode_tree(br, symprev)
+        tree_bits = br.total_bits_read - bit_pos - 1
+        print(f"  Tree: {tree_desc} ({tree_bits} bits)")
+
+        nonzero = sum(1 for l in lengths if l > 0)
+        print(f"  Non-zero lengths: {nonzero}/{NumSymbols}")
+
+        bd_table = build_decode_table(lengths[:NumByteSym + NumDistSym],
+                                      NumByteSym + NumDistSym)
+        l_table = build_decode_table(lengths[NumByteSym + NumDistSym:],
+                                     NumLenSym)
+
+        bd_none = sum(1 for x in bd_table if x is None)
+        l_none = sum(1 for x in l_table if x is None)
+        if bd_none:
+            print(f"  WARNING: {bd_none}/{len(bd_table)} BD table entries are None")
+        if l_none:
+            print(f"  WARNING: {l_none}/{len(l_table)} LEN table entries are None")
+
+        # Decode until EOB or error (no per-block symbol limit)
+        remaining = max_symbols - total_decoded
+        symbols = decode_block(br, bd_table, l_table, remaining)
+        total_decoded += len(symbols)
+
+        truncated = len(symbols) >= remaining and symbols[-1][0] not in ("EOB", "ERROR")
+        print(f"  Decoded {len(symbols)} symbols{' (truncated)' if truncated else ''}:")
+        for i, (kind, val) in enumerate(symbols):
+            if kind == "LIT":
+                ch = chr(val) if 32 <= val < 127 else f"\\x{val:02x}"
+                print(f"    [{i:3d}] LIT {val:3d} '{ch}'")
+            elif kind == "MATCH":
+                print(f"    [{i:3d}] {val}")
+            elif kind == "EOB":
+                print(f"    [{i:3d}] EOB ({val})")
+            elif kind == "ERROR":
+                print(f"    [{i:3d}] ERROR: {val}")
+
+        data_bits = br.total_bits_read - bit_pos - 1 - tree_bits
+        print(f"  Data: {data_bits} bits")
+        print()
+
+        if truncated or (symbols and symbols[-1][0] in ("ERROR",)):
+            break
+        block_num += 1
+
+
+def main():
+    parser = argparse.ArgumentParser(description='UC2 bitstream analyzer')
+    parser.add_argument('archive', help='UC2 archive file')
+    parser.add_argument('--cdir', action='store_true', help='Analyze cdir section')
+    parser.add_argument('--file', action='store_true', help='Analyze file data section')
+    parser.add_argument('--max-symbols', type=int, default=200,
+                        help='Max symbols to decode per block')
+    args = parser.parse_args()
+
+    section = 'cdir' if args.cdir else 'file'
+    analyze_archive(args.archive, section, args.max_symbols)
+
+
+if __name__ == '__main__':
+    main()
--- a/tests/scripts/check_assert_side_effects.py
+++ b/tests/scripts/check_assert_side_effects.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# Fails when assert(...) wraps a function call with side effects.
+#
+# Background: under -DNDEBUG (CMake's default for Release) the assert macro
+# expands to ((void)0) and the wrapped expression is not evaluated.  Any work
+# done inside assert() is silently dropped.  This has cost the project two
+# CI rounds:
+#   - dae8a50: int-truncation in test_merkle / test_dict Debug builds
+#   - 6d8087f: test_delta double-free under Release / Windows MSVC
+#
+# Rule: tests must capture the call result first, then assert on it:
+#   int rc = call(...);
+#   assert(rc == EXPECTED);
+#
+# This script detects the dangerous form by matching assert() that wraps a
+# call to a function whose name contains a side-effect verb.  Pure queries
+# (_equal, _match, _verify, _has_, _is_, _root, _id, _hash, _attest_name,
+# memcmp, strcmp, ...) are allowed.
+
+import re
+import sys
+from pathlib import Path
+
+SIDE_EFFECT_VERBS = (
+    "encode", "decode", "parse", "serialize", "deserialize",
+    "build", "init", "write", "read_file", "attach", "extract",
+    "compress", "decompress", "create", "destroy", "open", "close",
+    "flush", "push", "pop", "append", "insert", "remove", "update",
+    "store", "load", "put", "finalize", "process", "run", "step",
+    "alloc", "free", "register", "submit", "commit", "rollback",
+)
+
+SCAN_DIRS = ("tests/src", "lib/src", "cli/src", "src")
+
+assert_call_re = re.compile(
+    r"assert\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\("
+)
+
+verb_re = re.compile(
+    r"(?:^|_)(" + "|".join(SIDE_EFFECT_VERBS) + r")(?:_|$)"
+)
+
+
+def scan(root: Path) -> list[tuple[Path, int, str, str]]:
+    findings = []
+    for d in SCAN_DIRS:
+        base = root / d
+        if not base.is_dir():
+            continue
+        for path in sorted(base.rglob("*.c")):
+            for lineno, line in enumerate(path.read_text(encoding="utf-8",
+                                                         errors="replace").splitlines(),
+                                          start=1):
+                # Skip comments quickly.  Not perfect but adequate here.
+                stripped = line.lstrip()
+                if stripped.startswith("//") or stripped.startswith("*"):
+                    continue
+                m = assert_call_re.search(line)
+                if not m:
+                    continue
+                ident = m.group(1)
+                if verb_re.search(ident):
+                    findings.append((path, lineno, ident, line.rstrip()))
+    return findings
+
+
+def main() -> int:
+    repo_root = Path(__file__).resolve().parents[2]
+    findings = scan(repo_root)
+    if not findings:
+        print("OK: no side-effecting asserts found.")
+        return 0
+    print("ERROR: assert() must not wrap calls with side effects.", file=sys.stderr)
+    print("Under -DNDEBUG (Release builds) the call is dropped, leaving", file=sys.stderr)
+    print("output parameters uninitialised and the test silently no-op.", file=sys.stderr)
+    print("Convert to: int rc = call(...); assert(rc == EXPECTED);", file=sys.stderr)
+    print(file=sys.stderr)
+    for path, lineno, ident, line in findings:
+        rel = path.relative_to(repo_root)
+        print(f"{rel}:{lineno}: {ident}", file=sys.stderr)
+        print(f"    {line.strip()}", file=sys.stderr)
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/tests/scripts/create_archives.sh
+++ b/tests/scripts/create_archives.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# Create reference UC2 archives from the test corpus using the original
+# UC2 Pro (UC.EXE) in DOSBox-X.
+#
+# Run from the UC2 project root: bash tests/scripts/create_archives.sh
+#
+# uc2pro.exe is a UCEXE-compressed self-extracting archive containing the
+# UC2 Pro distribution.  We first extract it to get UC.EXE, then use
+# UC.EXE to create the reference archives.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+ARCHIVE_DIR="$PROJECT_DIR/tests/archives"
+
+# DOSBox-X flatpak only has filesystem=home access.
+WORK_DIR="$(mktemp -d "$HOME/.cache/uc2-create-archives.XXXXXX")"
+trap 'rm -rf "$WORK_DIR"' EXIT
+
+echo "Working in: $WORK_DIR"
+
+cp "$PROJECT_DIR/original/UC2_source/uc2pro.exe" "$WORK_DIR/"
+mkdir -p "$WORK_DIR/corpus" "$WORK_DIR/out"
+cp "$PROJECT_DIR/tests/corpus/"* "$WORK_DIR/corpus/"
+
+# Session 1: Extract UC2 Pro distribution from SFX
+echo "Extracting UC2 Pro tools from uc2pro.exe (this takes ~60s in DOSBox)..."
+cat > "$WORK_DIR/dosbox.conf" <<DOSBOXCFG
+[sdl]
+output=none
+fullscreen=false
+[dosbox]
+memsize=16
+machine=svga_s3
+[cpu]
+cycles=max
+[autoexec]
+mount c: $WORK_DIR
+c:
+uc2pro UC2DIST
+exit
+DOSBOXCFG
+
+timeout 180 flatpak run com.dosbox_x.DOSBox-X \
+    -conf "$WORK_DIR/dosbox.conf" -nopromptfolder 2>/dev/null || true
+
+if [ ! -f "$WORK_DIR/UC2DIST/UC.EXE" ]; then
+    echo "ERROR: SFX extraction failed (UC.EXE not found)"
+    exit 1
+fi
+
+# Session 2: Create reference archives
+echo "Creating reference archives..."
+cat > "$WORK_DIR/dosbox.conf" <<DOSBOXCFG
+[sdl]
+output=none
+fullscreen=false
+[dosbox]
+memsize=16
+machine=svga_s3
+[cpu]
+cycles=max
+[autoexec]
+mount c: $WORK_DIR
+c:
+cd C:\\UC2DIST
+
+rem Basic archive: all corpus files, Method 4 (Ultra, default)
+UC a C:\\OUT\\BASIC C:\\CORPUS\\*.*
+
+rem Empty file only
+UC a C:\\OUT\\EMPTY C:\\CORPUS\\EMPTY.DAT
+
+rem Single text file
+UC a C:\\OUT\\SINGLE C:\\CORPUS\\HELLO.TXT
+
+rem Large compressible file
+UC a C:\\OUT\\ZEROS C:\\CORPUS\\ZEROS.BIN
+
+rem Incompressible data
+UC a C:\\OUT\\RANDOM C:\\CORPUS\\RANDOM.BIN
+
+echo DONE > C:\\DONE.TXT
+exit
+DOSBOXCFG
+
+timeout 600 flatpak run com.dosbox_x.DOSBox-X \
+    -conf "$WORK_DIR/dosbox.conf" -nopromptfolder 2>/dev/null || true
+
+# Copy generated archives to the project
+if ls "$WORK_DIR/out/"*.UC2 >/dev/null 2>&1; then
+    mkdir -p "$ARCHIVE_DIR"
+    for f in "$WORK_DIR/out/"*.UC2; do
+        base=$(basename "$f")
+        lower=$(echo "$base" | tr '[:upper:]' '[:lower:]')
+        cp "$f" "$ARCHIVE_DIR/$lower"
+    done
+    echo "Archives created in $ARCHIVE_DIR:"
+    ls -la "$ARCHIVE_DIR/"*.uc2
+else
+    echo "ERROR: No archives were generated. Check DOSBox output."
+    exit 1
+fi
--- a/tests/scripts/cross_check_ots.py
+++ b/tests/scripts/cross_check_ots.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""Cross-check uc2 OTS output against the python-opentimestamps reference.
+
+Usage: cross_check_ots.py <uc2-binary> <work-dir>
+
+Builds a tiny archive, attaches a hand-crafted OTS proof, then:
+  1. Extracts via `uc2 --ots-extract`
+  2. Round-trips the .ots through python-opentimestamps
+  3. Confirms the proof's leaf digest equals SHA-256 of the attested archive prefix
+
+Exits 0 on success, 1 on mismatch, 77 (autotools "skip" code) if the
+opentimestamps library isn't installed.
+"""
+
+import hashlib
+import os
+import struct
+import subprocess
+import sys
+from io import BytesIO
+
+try:
+    from opentimestamps.core.timestamp import DetachedTimestampFile
+    from opentimestamps.core.serialize import StreamDeserializationContext
+except ModuleNotFoundError:
+    print("opentimestamps library not installed; skipping cross-check.")
+    sys.exit(77)
+
+
+HEADER_MAGIC = (b"\x00OpenTimestamps\x00\x00Proof\x00"
+                b"\xbf\x89\xe2\xe8\x84\xe8\x92\x94")
+PENDING_TAG = b"\x83\xdf\xe3\x0d\x2e\xf9\x0c\x8e"
+TRAILER_MAGIC = b"UC2-OTS\x00"
+
+
+def varint(n):
+    out = b""
+    while n >= 0x80:
+        out += bytes([n & 0x7f | 0x80])
+        n >>= 7
+    return out + bytes([n])
+
+
+def varbytes(b):
+    return varint(len(b)) + b
+
+
+def build_proof(leaf):
+    # Pending attestation payload is itself varbytes(uri) per the OTS spec,
+    # wrapped in the outer varbytes(serialized_attestation) layer.
+    pending_payload = varbytes(b"https://example.com/digest")
+    body = b"\x00" + PENDING_TAG + varbytes(pending_payload)
+    return HEADER_MAGIC + b"\x01" + b"\x08" + leaf + body
+
+
+def main():
+    if len(sys.argv) != 3:
+        print("usage: cross_check_ots.py <uc2-binary> <work-dir>", file=sys.stderr)
+        return 1
+    uc2 = sys.argv[1]
+    work = sys.argv[2]
+    os.makedirs(work, exist_ok=True)
+
+    a = os.path.join(work, "a.txt")
+    b = os.path.join(work, "b.txt")
+    with open(a, "w") as f: f.write("hello uc2 ots cross-check\n")
+    with open(b, "w") as f: f.write("second file\n")
+
+    arc = os.path.join(work, "test.uc2")
+    subprocess.check_call([uc2, "-w", "-q", arc, a, b])
+
+    archive_size = os.path.getsize(arc)
+    with open(arc, "rb") as f:
+        archive_bytes = f.read()
+    leaf = hashlib.sha256(archive_bytes).digest()
+
+    proof_path = os.path.join(work, "proof.ots")
+    with open(proof_path, "wb") as f:
+        f.write(build_proof(leaf))
+
+    subprocess.check_call([uc2, "--ots-attach", proof_path, arc])
+
+    extracted = os.path.join(work, "extracted.ots")
+    subprocess.check_call([uc2, "--ots-extract", arc, extracted])
+
+    with open(extracted, "rb") as f:
+        ots_bytes = f.read()
+    ctx = StreamDeserializationContext(BytesIO(ots_bytes))
+    detached = DetachedTimestampFile.deserialize(ctx)
+
+    py_leaf = bytes(detached.timestamp.msg)
+    if py_leaf != leaf:
+        print("LEAF MISMATCH", file=sys.stderr)
+        print(f"  hand-computed: {leaf.hex()}", file=sys.stderr)
+        print(f"  python-ots:    {py_leaf.hex()}", file=sys.stderr)
+        return 1
+
+    attestations = list(detached.timestamp.all_attestations())
+    if not attestations:
+        print("no attestations parsed by python-opentimestamps", file=sys.stderr)
+        return 1
+
+    info = subprocess.check_output(
+        [uc2, "--ots-info", arc], stderr=subprocess.STDOUT, text=True)
+    if "leaf matches archive: yes" not in info:
+        print("uc2 --ots-info reports leaf mismatch:", file=sys.stderr)
+        print(info, file=sys.stderr)
+        return 1
+
+    if archive_size + len(ots_bytes) >= os.path.getsize(arc):
+        pass  # archive grew by at least proof_len; trailer is present
+
+    print(f"cross-check OK: archive_size={archive_size}, proof_len={len(ots_bytes)}, "
+          f"attestations={len(attestations)}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/tests/scripts/dos_smoke.sh
+++ b/tests/scripts/dos_smoke.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# Smoke test for the DJGPP-built uc2.exe via DOSBox-X.
+#
+# Verifies the cross-compiled DOS binary actually loads under a real
+# DPMI host and produces expected output for `uc2 -h` and `uc2 -l`.
+#
+# Usage:
+#   tests/scripts/dos_smoke.sh <uc2.exe> <CWSDPMI.EXE> [<list-archive>]
+#
+# Where:
+#   <uc2.exe>          DJGPP-built DOS binary
+#                      (e.g. build-djgpp/cli/uc2.exe)
+#   <CWSDPMI.EXE>      DPMI extender from csdpmi7b.zip
+#                      (http://www.delorie.com/pub/djgpp/current/v2misc/csdpmi7b.zip)
+#   <list-archive>     Optional: small UC2 archive to test 'uc2 -l' against
+#                      (e.g. tests/archives/basic.uc2)
+
+set -euo pipefail
+
+UC2_EXE="${1:?usage: dos_smoke.sh <uc2.exe> <CWSDPMI.EXE> [<list-archive>]}"
+CWSDPMI="${2:?usage: dos_smoke.sh <uc2.exe> <CWSDPMI.EXE> [<list-archive>]}"
+LIST_ARCHIVE="${3:-}"
+
+if [ ! -f "$UC2_EXE" ]; then
+    echo "SKIP: uc2.exe not found at $UC2_EXE (run the DJGPP build first)"
+    exit 0
+fi
+if [ ! -f "$CWSDPMI" ]; then
+    echo "SKIP: CWSDPMI.EXE not found at $CWSDPMI"
+    exit 0
+fi
+if ! flatpak info com.dosbox_x.DOSBox-X &>/dev/null; then
+    echo "SKIP: DOSBox-X not installed (flatpak com.dosbox_x.DOSBox-X)"
+    exit 0
+fi
+
+WORK="$(mktemp -d "$HOME/.cache/uc2-dos-smoke.XXXXXX")"
+trap 'rm -rf "$WORK"' EXIT
+
+cp "$UC2_EXE" "$WORK/UC2.EXE"
+cp "$CWSDPMI" "$WORK/CWSDPMI.EXE"
+
+ARCHIVE_NAME=""
+if [ -n "$LIST_ARCHIVE" ] && [ -f "$LIST_ARCHIVE" ]; then
+    ARCHIVE_NAME="$(basename "$LIST_ARCHIVE" | tr '[:lower:]' '[:upper:]')"
+    cp "$LIST_ARCHIVE" "$WORK/$ARCHIVE_NAME"
+fi
+
+cat > "$WORK/dosbox.conf" <<DOSBOXCFG
+[sdl]
+output=none
+fullscreen=false
+[dosbox]
+memsize=16
+machine=svga_s3
+[cpu]
+cycles=max
+[autoexec]
+mount c: $WORK
+c:
+UC2 -h > HELP.TXT
+echo HELPDONE > HELPMRK.TXT
+${ARCHIVE_NAME:+UC2 -l $ARCHIVE_NAME > LIST.TXT}
+${ARCHIVE_NAME:+echo LISTDONE > LISTMRK.TXT}
+exit
+DOSBOXCFG
+
+echo "=== Running uc2.exe under DOSBox-X ==="
+timeout 60 flatpak run com.dosbox_x.DOSBox-X \
+    -conf "$WORK/dosbox.conf" -nopromptfolder 2>/dev/null || true
+
+FAIL=0
+
+# --- Validate uc2 -h ---
+if [ ! -f "$WORK/HELPMRK.TXT" ]; then
+    echo "  FAIL: DOSBox session did not complete (no HELPMRK.TXT)"
+    FAIL=1
+elif [ ! -f "$WORK/HELP.TXT" ]; then
+    echo "  FAIL: uc2 -h produced no output"
+    FAIL=1
+elif ! grep -qi "UltraCompressor\|UC2" "$WORK/HELP.TXT"; then
+    echo "  FAIL: uc2 -h output missing expected text"
+    head -20 "$WORK/HELP.TXT"
+    FAIL=1
+else
+    echo "  OK: uc2 -h"
+fi
+
+# --- Optional: validate uc2 -l ---
+if [ -n "$ARCHIVE_NAME" ]; then
+    if [ ! -f "$WORK/LISTMRK.TXT" ]; then
+        echo "  FAIL: uc2 -l did not complete"
+        FAIL=1
+    elif [ ! -s "$WORK/LIST.TXT" ]; then
+        echo "  FAIL: uc2 -l produced empty output"
+        FAIL=1
+    else
+        echo "  OK: uc2 -l $ARCHIVE_NAME"
+    fi
+fi
+
+if [ $FAIL -ne 0 ]; then
+    echo "FAILED: DOS smoke test"
+    echo "Work directory preserved at: $WORK"
+    trap - EXIT
+    exit 1
+fi
+
+echo "PASSED: DJGPP-built uc2.exe runs under DOSBox-X"
--- a/tests/scripts/roundtrip_dosbox.sh
+++ b/tests/scripts/roundtrip_dosbox.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# Cross-tool round-trip test: original UC2 Pro -> UC2 v3 via DOSBox-X
+#
+# Tests both directions: UC2 v3 creates a multi-file archive that the
+# original extracts (Direction 1), and the original creates an archive
+# that UC2 v3 extracts (Direction 2).  Multi-file Direction 1 has worked
+# since the custom-Huffman-tree fix; an earlier version of this comment
+# documented a hang that no longer reproduces.
+#
+# Usage: roundtrip_dosbox.sh <uc2-cli> <uc2pro.exe> <corpus-dir>
+
+set -euo pipefail
+
+UC2_CLI="$1"
+UC2PRO="$2"
+CORPUS="$3"
+
+FILES=(hello.txt textfile.txt allbytes.bin random.bin zeros.bin)
+
+if ! flatpak info com.dosbox_x.DOSBox-X &>/dev/null; then
+    echo "SKIP: DOSBox-X not installed (flatpak com.dosbox_x.DOSBox-X)"
+    exit 0
+fi
+
+WORK="$(mktemp -d "$HOME/.cache/uc2-dosbox-test.XXXXXX")"
+trap 'rm -rf "$WORK"' EXIT
+
+mkdir -p "$WORK/corpus" "$WORK/out" "$WORK/output"
+for f in "${FILES[@]}"; do
+    cp "$CORPUS/$f" "$WORK/corpus/"
+done
+cp "$UC2PRO" "$WORK/uc2pro.exe"
+
+# --- Session 1: Extract UC2 Pro distribution from SFX ---
+echo "=== Session 1: Extracting UC2 Pro tools from SFX ==="
+cat > "$WORK/dosbox.conf" <<DOSBOXCFG
+[sdl]
+output=none
+fullscreen=false
+[dosbox]
+memsize=16
+machine=svga_s3
+[cpu]
+cycles=max
+[autoexec]
+mount c: $WORK
+c:
+uc2pro UC2DIST
+exit
+DOSBOXCFG
+
+# SFX decompression takes 3-8 minutes depending on host CPU speed
+timeout 600 flatpak run com.dosbox_x.DOSBox-X \
+    -conf "$WORK/dosbox.conf" -nopromptfolder 2>/dev/null || true
+
+UC2DIST_COUNT=$(ls "$WORK/UC2DIST/" 2>/dev/null | wc -l)
+if [ ! -f "$WORK/UC2DIST/UC.EXE" ] || [ "$UC2DIST_COUNT" -lt 22 ]; then
+    echo "FAIL: UC2 Pro SFX extraction incomplete ($UC2DIST_COUNT/22 files)"
+    exit 1
+fi
+echo "  UC.EXE extracted ($(wc -c < "$WORK/UC2DIST/UC.EXE") bytes, $UC2DIST_COUNT files)"
+
+# --- Direction 1: UC2 v3 creates, original extracts (multi-file) ---
+echo "=== Direction 1: UC2 v3 creates -> original extracts ==="
+DIR1_FILES=(hello.txt textfile.txt allbytes.bin random.bin)
+"$UC2_CLI" -w "$WORK/v3multi.uc2" \
+    "$WORK/corpus/hello.txt" "$WORK/corpus/textfile.txt" \
+    "$WORK/corpus/allbytes.bin" "$WORK/corpus/random.bin"
+mkdir -p "$WORK/dir1_out"
+cat > "$WORK/dosbox.conf" <<DOSBOXCFG
+[sdl]
+output=none
+fullscreen=false
+[dosbox]
+memsize=16
+machine=svga_s3
+[cpu]
+cycles=max
+[autoexec]
+mount c: $WORK
+c:
+cd C:\\DIR1_OUT
+C:\\UC2DIST\\UC eF C:\\V3MULTI *.*
+echo DIR1 > C:\\DIR1.TXT
+exit
+DOSBOXCFG
+
+timeout 120 flatpak run com.dosbox_x.DOSBox-X \
+    -conf "$WORK/dosbox.conf" -nopromptfolder 2>/dev/null || true
+
+# --- Session 2: original creates archive ---
+echo "=== Session 2 (Direction 2): UC2 Pro creates archive ==="
+cat > "$WORK/dosbox.conf" <<DOSBOXCFG
+[sdl]
+output=none
+fullscreen=false
+[dosbox]
+memsize=16
+machine=svga_s3
+[cpu]
+cycles=max
+[autoexec]
+mount c: $WORK
+c:
+cd C:\\UC2DIST
+UC a C:\\OUT\\DOSTEST C:\\CORPUS\\*.*
+echo DONE > C:\\MARKER.TXT
+exit
+DOSBOXCFG
+
+timeout 300 flatpak run com.dosbox_x.DOSBox-X \
+    -conf "$WORK/dosbox.conf" -nopromptfolder 2>/dev/null || true
+
+if [ ! -f "$WORK/MARKER.TXT" ]; then
+    echo "FAIL: DOSBox session did not complete"
+    exit 1
+fi
+
+DOS_ARCHIVE=""
+for candidate in "$WORK/out/DOSTEST.UC2" "$WORK/out/dostest.uc2"; do
+    [ -f "$candidate" ] && DOS_ARCHIVE="$candidate" && break
+done
+if [ -z "$DOS_ARCHIVE" ]; then
+    echo "FAIL: UC2 Pro did not create DOSTEST.UC2"
+    exit 1
+fi
+echo "  Archive created: $(wc -c < "$DOS_ARCHIVE") bytes"
+
+# --- Extract with UC2 v3 and verify ---
+echo "=== Extracting with UC2 v3 ==="
+"$UC2_CLI" -d "$WORK/output" "$DOS_ARCHIVE"
+
+FAIL=0
+for f in "${FILES[@]}"; do
+    upper=$(echo "$f" | tr '[:lower:]' '[:upper:]')
+    extracted=""
+    for candidate in "$WORK/output/$f" "$WORK/output/$upper"; do
+        [ -f "$candidate" ] && extracted="$candidate" && break
+    done
+    if [ -z "$extracted" ]; then
+        echo "  FAIL: $f not extracted"
+        FAIL=1
+        continue
+    fi
+    if cmp -s "$CORPUS/$f" "$extracted"; then
+        echo "  OK: $f"
+    else
+        echo "  FAIL: $f content mismatch"
+        FAIL=1
+    fi
+done
+
+# --- Verify Direction 1 (multi-file) ---
+echo "--- Verifying Direction 1 (UC2 v3 -> original) ---"
+if [ -f "$WORK/DIR1.TXT" ]; then
+    for f in "${DIR1_FILES[@]}"; do
+        upper=$(echo "$f" | tr '[:lower:]' '[:upper:]')
+        extracted=""
+        for candidate in "$WORK/dir1_out/$upper" "$WORK/dir1_out/$f"; do
+            [ -f "$candidate" ] && extracted="$candidate" && break
+        done
+        if [ -z "$extracted" ]; then
+            echo "  FAIL: $f not extracted by original (Direction 1)"
+            FAIL=1
+        elif cmp -s "$CORPUS/$f" "$extracted"; then
+            echo "  OK: $f (Direction 1)"
+        else
+            echo "  FAIL: $f content mismatch (Direction 1)"
+            FAIL=1
+        fi
+    done
+else
+    echo "  FAIL: Direction 1 DOSBox session incomplete"
+    FAIL=1
+fi
+
+if [ $FAIL -ne 0 ]; then
+    echo "FAILED: some files did not survive cross-tool round-trip"
+    echo "Work directory preserved at: $WORK"
+    trap - EXIT
+    exit 1
+fi
+
+echo "PASSED: all files verified (both directions)"
--- a/tests/src/test_blake3.c
+++ b/tests/src/test_blake3.c
@@ -0,0 +1,127 @@
+/* Tests for BLAKE3 cryptographic hashing. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <uc2/uc2_blake3.h>
+
+static int tests_run = 0, tests_passed = 0;
+#define TEST(name) do { tests_run++; printf("  %s: ", #name); name(); tests_passed++; printf("OK\n"); } while (0)
+
+static void hex(const uint8_t *h, int n, char *out)
+{
+	for (int i = 0; i < n; i++) sprintf(out + i*2, "%02x", h[i]);
+}
+
+static void test_empty(void)
+{
+	uint8_t hash[32];
+	uc2_blake3_hash("", 0, hash);
+	/* BLAKE3("") is a known constant */
+	char h[65]; hex(hash, 32, h); h[64] = 0;
+	printf("(%s) ", h);
+	/* The hash should be non-zero and deterministic */
+	uint8_t hash2[32];
+	uc2_blake3_hash("", 0, hash2);
+	assert(uc2_blake3_equal(hash, hash2));
+}
+
+static void test_deterministic(void)
+{
+	uint8_t data[] = "Hello, BLAKE3!";
+	uint8_t h1[32], h2[32];
+	uc2_blake3_hash(data, sizeof data - 1, h1);
+	uc2_blake3_hash(data, sizeof data - 1, h2);
+	assert(uc2_blake3_equal(h1, h2));
+}
+
+static void test_differs(void)
+{
+	uint8_t h1[32], h2[32];
+	uc2_blake3_hash("AAA", 3, h1);
+	uc2_blake3_hash("BBB", 3, h2);
+	assert(!uc2_blake3_equal(h1, h2));
+}
+
+static void test_incremental(void)
+{
+	/* Incremental update should match one-shot */
+	uint8_t data[] = "The quick brown fox jumps over the lazy dog";
+	size_t len = sizeof data - 1;
+
+	uint8_t oneshot[32];
+	uc2_blake3_hash(data, len, oneshot);
+
+	struct uc2_blake3 ctx;
+	uc2_blake3_init(&ctx);
+	uc2_blake3_update(&ctx, data, 10);
+	uc2_blake3_update(&ctx, data + 10, len - 10);
+	uint8_t incremental[32];
+	uc2_blake3_final(&ctx, incremental);
+
+	assert(uc2_blake3_equal(oneshot, incremental));
+}
+
+static void test_single_byte_updates(void)
+{
+	uint8_t data[] = "ABCDEFGH";
+	size_t len = 8;
+
+	uint8_t oneshot[32];
+	uc2_blake3_hash(data, len, oneshot);
+
+	struct uc2_blake3 ctx;
+	uc2_blake3_init(&ctx);
+	for (size_t i = 0; i < len; i++)
+		uc2_blake3_update(&ctx, data + i, 1);
+	uint8_t piecemeal[32];
+	uc2_blake3_final(&ctx, piecemeal);
+
+	assert(uc2_blake3_equal(oneshot, piecemeal));
+}
+
+static void test_avalanche(void)
+{
+	/* Changing one bit should change ~50% of output bits */
+	uint8_t a[64], b[64];
+	memset(a, 0, 64);
+	memset(b, 0, 64);
+	b[0] = 1;  /* flip one bit */
+
+	uint8_t ha[32], hb[32];
+	uc2_blake3_hash(a, 64, ha);
+	uc2_blake3_hash(b, 64, hb);
+
+	int diff_bits = 0;
+	for (int i = 0; i < 32; i++) {
+		uint8_t x = ha[i] ^ hb[i];
+		while (x) { diff_bits++; x &= x - 1; }
+	}
+	printf("(%d/256 bits differ) ", diff_bits);
+	assert(diff_bits > 80 && diff_bits < 176);  /* ~50% ± 30% */
+}
+
+static void test_equal_constant_time(void)
+{
+	uint8_t a[32], b[32];
+	memset(a, 0xAA, 32);
+	memcpy(b, a, 32);
+	assert(uc2_blake3_equal(a, b));
+	b[31] ^= 1;
+	assert(!uc2_blake3_equal(a, b));
+}
+
+int main(void)
+{
+	printf("BLAKE3 tests:\n");
+	TEST(test_empty);
+	TEST(test_deterministic);
+	TEST(test_differs);
+	TEST(test_incremental);
+	TEST(test_single_byte_updates);
+	TEST(test_avalanche);
+	TEST(test_equal_constant_time);
+	printf("%d/%d tests passed\n", tests_passed, tests_run);
+	return tests_passed == tests_run ? 0 : 1;
+}
--- a/tests/src/test_blockstore.c
+++ b/tests/src/test_blockstore.c
@@ -0,0 +1,204 @@
+/* Tests for cross-archive block store. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#ifdef _MSC_VER
+#include <process.h>
+#define getpid _getpid
+#else
+#include <unistd.h>
+#endif
+#include <uc2/uc2_blockstore.h>
+#include <uc2/uc2_merkle.h>
+
+static int tests_run = 0, tests_passed = 0;
+#define TEST(name) do { tests_run++; printf("  %s: ", #name); name(); tests_passed++; printf("OK\n"); } while (0)
+
+static char store_path[256];
+
+static void fill_random(uint8_t *buf, size_t len, uint32_t seed)
+{
+	for (size_t i = 0; i < len; i++) {
+		seed = seed * 1103515245 + 12345;
+		buf[i] = (uint8_t)(seed >> 16);
+	}
+}
+
+/* Recursive rm -rf (simple, for test cleanup) */
+static void rmrf(const char *path)
+{
+	char cmd[512];
+	snprintf(cmd, sizeof cmd, "rm -rf '%s'", path);
+	system(cmd);
+}
+
+static void test_open_close(void)
+{
+	struct uc2_blockstore bs;
+	{ int _r = uc2_blockstore_open(&bs, store_path); (void)_r; assert(_r == 0); }
+	assert(bs.nblocks == 0);
+	assert(bs.total_bytes == 0);
+	assert(bs.saved_bytes == 0);
+	uc2_blockstore_close(&bs);
+}
+
+static void test_ingest_single(void)
+{
+	uint8_t data[4096];
+	fill_random(data, sizeof data, 0xABCD);
+
+	struct uc2_merkle tree;
+	uc2_merkle_build(&tree, data, sizeof data, 12);
+
+	struct uc2_blockstore bs;
+	uc2_blockstore_open(&bs, store_path);
+	int new_chunks = uc2_blockstore_ingest(&bs, &tree, data, sizeof data);
+	assert(new_chunks == tree.nchunks);
+	assert(bs.nblocks == tree.nchunks);
+	assert(bs.total_bytes == sizeof data);
+	assert(bs.saved_bytes == 0);
+	uc2_blockstore_close(&bs);
+	uc2_merkle_free(&tree);
+}
+
+static void test_dedup_identical(void)
+{
+	/* Ingest same data twice: second ingest should store 0 new chunks */
+	uint8_t data[8192];
+	fill_random(data, sizeof data, 0x1234);
+
+	struct uc2_merkle tree;
+	uc2_merkle_build(&tree, data, sizeof data, 12);
+
+	struct uc2_blockstore bs;
+	uc2_blockstore_open(&bs, store_path);
+
+	int n1 = uc2_blockstore_ingest(&bs, &tree, data, sizeof data);
+	assert(n1 == tree.nchunks);
+
+	int n2 = uc2_blockstore_ingest(&bs, &tree, data, sizeof data);
+	assert(n2 == 0);  /* fully deduplicated */
+	assert(bs.saved_bytes == sizeof data);
+
+	printf("(%d chunks, %lld saved) ", tree.nchunks, (long long)bs.saved_bytes);
+	uc2_blockstore_close(&bs);
+	uc2_merkle_free(&tree);
+}
+
+static void test_read_back(void)
+{
+	uint8_t data[2048];
+	fill_random(data, sizeof data, 0x5678);
+
+	struct uc2_merkle tree;
+	uc2_merkle_build(&tree, data, sizeof data, 12);
+
+	struct uc2_blockstore bs;
+	uc2_blockstore_open(&bs, store_path);
+	uc2_blockstore_ingest(&bs, &tree, data, sizeof data);
+
+	/* Read each chunk back and verify */
+	for (int i = 0; i < tree.nchunks; i++) {
+		uint8_t buf[65536];
+		int n = uc2_blockstore_read(&bs, tree.chunks[i].hash, buf, sizeof buf);
+		assert(n == (int)tree.chunks[i].length);
+		{ int _r = memcmp(buf, data + tree.chunks[i].offset, n); (void)_r; assert(_r == 0); }
+	}
+
+	uc2_blockstore_close(&bs);
+	uc2_merkle_free(&tree);
+}
+
+static void test_cross_archive_dedup(void)
+{
+	/* Simulate two archives with shared content */
+	size_t shared_len = 32 * 1024;
+	uint8_t *shared = malloc(shared_len);
+	fill_random(shared, shared_len, 0xFEED);
+
+	/* Archive 1: [shared] */
+	struct uc2_merkle t1;
+	uc2_merkle_build(&t1, shared, shared_len, 12);
+
+	/* Archive 2: [shared + unique(8KB)] */
+	size_t f2_len = shared_len + 8192;
+	uint8_t *f2 = malloc(f2_len);
+	memcpy(f2, shared, shared_len);
+	fill_random(f2 + shared_len, 8192, 0xBEEF);
+	struct uc2_merkle t2;
+	uc2_merkle_build(&t2, f2, f2_len, 12);
+
+	struct uc2_blockstore bs;
+	uc2_blockstore_open(&bs, store_path);
+
+	/* Ingest archive 1 */
+	int n1 = uc2_blockstore_ingest(&bs, &t1, shared, shared_len);
+	int64_t bytes1 = bs.total_bytes;
+
+	/* Ingest archive 2: shared chunks should dedup */
+	int n2 = uc2_blockstore_ingest(&bs, &t2, f2, f2_len);
+	int64_t saved = bs.saved_bytes;
+
+	printf("(a1=%d new, a2=%d new, saved=%lld) ", n1, n2, (long long)saved);
+	assert(n2 < t2.nchunks);  /* some chunks deduplicated */
+	assert(saved > 0);        /* bytes saved */
+
+	uc2_blockstore_close(&bs);
+	uc2_merkle_free(&t1);
+	uc2_merkle_free(&t2);
+	free(shared);
+	free(f2);
+}
+
+static void test_has(void)
+{
+	uint8_t data[1024];
+	fill_random(data, sizeof data, 0x9999);
+
+	struct uc2_merkle tree;
+	uc2_merkle_build(&tree, data, sizeof data, 12);
+
+	struct uc2_blockstore bs;
+	uc2_blockstore_open(&bs, store_path);
+
+	/* Before ingest: chunk should not exist */
+	assert(!uc2_blockstore_has(&bs, tree.chunks[0].hash));
+
+	uc2_blockstore_ingest(&bs, &tree, data, sizeof data);
+
+	/* After ingest: chunk should exist */
+	assert(uc2_blockstore_has(&bs, tree.chunks[0].hash));
+
+	/* Random hash: should not exist */
+	assert(!uc2_blockstore_has(&bs, 0x1234567890ABCDEFULL));
+
+	uc2_blockstore_close(&bs);
+	uc2_merkle_free(&tree);
+}
+
+int main(void)
+{
+	snprintf(store_path, sizeof store_path, "/tmp/uc2_blockstore_test_%d",
+	         (int)getpid());
+
+	printf("Block store tests:\n");
+	rmrf(store_path);  /* clean start */
+
+	TEST(test_open_close);
+	rmrf(store_path);
+	TEST(test_ingest_single);
+	rmrf(store_path);
+	TEST(test_dedup_identical);
+	rmrf(store_path);
+	TEST(test_read_back);
+	rmrf(store_path);
+	TEST(test_cross_archive_dedup);
+	rmrf(store_path);
+	TEST(test_has);
+	rmrf(store_path);
+
+	printf("%d/%d tests passed\n", tests_passed, tests_run);
+	return tests_passed == tests_run ? 0 : 1;
+}
--- a/tests/src/test_cdc.c
+++ b/tests/src/test_cdc.c
@@ -0,0 +1,242 @@
+/* Tests for content-defined chunking (CDC). */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <uc2/uc2_cdc.h>
+
+static int tests_run = 0;
+static int tests_passed = 0;
+
+#define TEST(name) do { \
+	tests_run++; \
+	printf("  %s: ", #name); \
+	name(); \
+	tests_passed++; \
+	printf("OK\n"); \
+} while (0)
+
+static void test_gear_hash_deterministic(void)
+{
+	uint8_t data[] = "Hello, World!";
+	uint32_t h1 = uc2_gear_hash(data, sizeof data - 1);
+	uint32_t h2 = uc2_gear_hash(data, sizeof data - 1);
+	assert(h1 == h2);
+	assert(h1 != 0);
+}
+
+static void test_gear_hash_differs(void)
+{
+	uint8_t a[] = "AAAA";
+	uint8_t b[] = "BBBB";
+	assert(uc2_gear_hash(a, 4) != uc2_gear_hash(b, 4));
+}
+
+static void test_fnv1a(void)
+{
+	uint8_t data[] = "test";
+	uint32_t h = uc2_fnv1a(data, 4);
+	assert(h != 0);
+	assert(h == uc2_fnv1a(data, 4));
+}
+
+static void test_chunker_single_small(void)
+{
+	/* Data smaller than min_chunk: one chunk */
+	uint8_t data[100];
+	memset(data, 'A', sizeof data);
+
+	struct uc2_chunker c;
+	uc2_chunker_init(&c, 13, 0, 0);  /* avg 8KB, min ~2KB */
+
+	size_t off, len;
+	int got = uc2_chunker_next(&c, data, sizeof data, &off, &len);
+	assert(got == 0);  /* final chunk */
+	assert(off == 0);
+	assert(len == sizeof data);
+}
+
+static void test_chunker_covers_all_data(void)
+{
+	/* Generate pseudo-random data to force boundary detection */
+	size_t total = 256 * 1024;  /* 256 KB */
+	uint8_t *data = malloc(total);
+	assert(data);
+	uint32_t rng = 0xDEADBEEF;
+	for (size_t i = 0; i < total; i++) {
+		rng = rng * 1103515245 + 12345;
+		data[i] = (uint8_t)(rng >> 16);
+	}
+
+	struct uc2_chunker c;
+	uc2_chunker_init(&c, 13, 0, 0);
+
+	size_t total_chunked = 0;
+	int chunks = 0;
+	size_t off, len;
+	while (uc2_chunker_next(&c, data, total, &off, &len)) {
+		assert(off == total_chunked);
+		assert(len > 0);
+		total_chunked += len;
+		chunks++;
+	}
+	/* Handle the final chunk */
+	total_chunked += len;
+	chunks++;
+
+	assert(total_chunked == total);
+	assert(chunks > 1);  /* 256KB should produce multiple 8KB-ish chunks */
+
+	free(data);
+}
+
+static void test_chunker_respects_min_max(void)
+{
+	size_t total = 128 * 1024;
+	uint8_t *data = malloc(total);
+	assert(data);
+	uint32_t rng = 0x12345678;
+	for (size_t i = 0; i < total; i++) {
+		rng = rng * 1103515245 + 12345;
+		data[i] = (uint8_t)(rng >> 16);
+	}
+
+	size_t min_chunk = 2048;
+	size_t max_chunk = 32768;
+	struct uc2_chunker c;
+	uc2_chunker_init(&c, 13, min_chunk, max_chunk);
+
+	size_t off, len;
+	while (uc2_chunker_next(&c, data, total, &off, &len)) {
+		assert(len >= min_chunk || off + len == total);
+		assert(len <= max_chunk);
+	}
+	/* Final chunk can be smaller than min */
+	assert(len <= max_chunk);
+
+	free(data);
+}
+
+static void test_chunker_content_defined(void)
+{
+	/* Same data inserted at different offsets should produce
+	   the same chunk boundaries (shifted by the offset). */
+	size_t base_len = 64 * 1024;
+	uint8_t *base = malloc(base_len);
+	assert(base);
+	uint32_t rng = 0xCAFEBABE;
+	for (size_t i = 0; i < base_len; i++) {
+		rng = rng * 1103515245 + 12345;
+		base[i] = (uint8_t)(rng >> 16);
+	}
+
+	/* Chunk the base data */
+	struct uc2_chunker c;
+	uc2_chunker_init(&c, 12, 0, 0);
+
+	int base_n = 0;
+	size_t off, len;
+	while (uc2_chunker_next(&c, base, base_len, &off, &len) && base_n < 99)
+		base_n++;
+	base_n++;
+
+	/* Prepend 1000 bytes of garbage, then the same data */
+	size_t pad = 1000;
+	uint8_t *shifted = malloc(pad + base_len);
+	assert(shifted);
+	memset(shifted, 0xFF, pad);
+	memcpy(shifted + pad, base, base_len);
+
+	uc2_chunker_reset(&c);
+	/* Skip the padded portion's chunks */
+	size_t total = 0;
+	int found_base = 0;
+	while (uc2_chunker_next(&c, shifted, pad + base_len, &off, &len)) {
+		total += len;
+		if (off >= pad && !found_base) {
+			found_base = 1;
+			/* After the padding chunk(s), subsequent chunks of the
+			   base data should eventually align */
+		}
+	}
+	total += len;
+	assert(total == pad + base_len);
+	assert(found_base);
+
+	free(base);
+	free(shifted);
+}
+
+static void test_chunker_dedup_detection(void)
+{
+	/* Two files with a shared 256KB block: CDC should find matching chunks.
+	   The shared region is large enough that after the Gear hash state
+	   resets (~32 bytes), boundaries align between both files. */
+	size_t shared_len = 256 * 1024;
+	size_t unique_a = 4096;
+	size_t unique_b = 8192;
+
+	uint8_t *shared = malloc(shared_len);
+	uint8_t *file_a = malloc(unique_a + shared_len);
+	uint8_t *file_b = malloc(shared_len + unique_b);
+	assert(shared && file_a && file_b);
+
+	uint32_t rng = 0xFEEDFACE;
+	for (size_t i = 0; i < shared_len; i++) {
+		rng = rng * 1103515245 + 12345;
+		shared[i] = (uint8_t)(rng >> 16);
+	}
+	for (size_t i = 0; i < unique_a; i++) file_a[i] = (uint8_t)i;
+	memcpy(file_a + unique_a, shared, shared_len);
+	memcpy(file_b, shared, shared_len);
+	for (size_t i = 0; i < unique_b; i++) file_b[shared_len + i] = (uint8_t)(i ^ 0xAA);
+
+	struct uc2_chunker c;
+	uc2_chunker_init(&c, 13, 0, 0);
+
+	/* Hash all chunks from file_a */
+	uint32_t hashes_a[200];
+	int n_a = 0;
+	size_t off, len;
+	while (uc2_chunker_next(&c, file_a, unique_a + shared_len, &off, &len) && n_a < 199)
+		hashes_a[n_a++] = uc2_fnv1a(file_a + off, len);
+	hashes_a[n_a++] = uc2_fnv1a(file_a + off, len);
+
+	/* Hash all chunks from file_b */
+	uc2_chunker_reset(&c);
+	uint32_t hashes_b[200];
+	int n_b = 0;
+	while (uc2_chunker_next(&c, file_b, shared_len + unique_b, &off, &len) && n_b < 199)
+		hashes_b[n_b++] = uc2_fnv1a(file_b + off, len);
+	hashes_b[n_b++] = uc2_fnv1a(file_b + off, len);
+
+	/* At least one chunk hash should appear in both files */
+	int matches = 0;
+	for (int i = 0; i < n_a; i++)
+		for (int j = 0; j < n_b; j++)
+			if (hashes_a[i] == hashes_b[j])
+				matches++;
+
+	assert(matches > 0);
+	printf("(%d chunks A, %d chunks B, %d shared) ", n_a, n_b, matches);
+
+	free(shared);
+	free(file_a);
+	free(file_b);
+}
+
+int main(void)
+{
+	printf("CDC tests:\n");
+	TEST(test_gear_hash_deterministic);
+	TEST(test_gear_hash_differs);
+	TEST(test_fnv1a);
+	TEST(test_chunker_single_small);
+	TEST(test_chunker_covers_all_data);
+	TEST(test_chunker_respects_min_max);
+	TEST(test_chunker_content_defined);
+	TEST(test_chunker_dedup_detection);
+	printf("%d/%d tests passed\n", tests_passed, tests_run);
+	return tests_passed == tests_run ? 0 : 1;
+}
--- a/Show More
+++ b/Show More