2018-05-27 17:20:54 -04:00
|
|
|
;;; GNU Guix --- Functional package management for GNU
|
|
|
|
;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
|
2022-12-10 04:56:48 -05:00
|
|
|
;;; Copyright © 2018-2022 Ludovic Courtès <ludo@gnu.org>
|
2018-05-27 17:20:54 -04:00
|
|
|
;;;
|
|
|
|
;;; This file is part of GNU Guix.
|
|
|
|
;;;
|
|
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
|
|
;;; under the terms of the GNU General Public License as published by
|
|
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
|
|
;;; your option) any later version.
|
|
|
|
;;;
|
|
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
;;; GNU General Public License for more details.
|
|
|
|
;;;
|
|
|
|
;;; You should have received a copy of the GNU General Public License
|
|
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
;;; This houses stuff we do to files when they arrive at the store - resetting
|
|
|
|
;;; timestamps, deduplicating, etc.
|
|
|
|
|
|
|
|
(define-module (guix store deduplication)
|
Switch to Guile-Gcrypt.
This removes (guix hash) and (guix pk-crypto), which now live as part of
Guile-Gcrypt (version 0.1.0.)
* guix/gcrypt.scm, guix/hash.scm, guix/pk-crypto.scm,
tests/hash.scm, tests/pk-crypto.scm: Remove.
* configure.ac: Test for Guile-Gcrypt. Remove LIBGCRYPT and
LIBGCRYPT_LIBDIR assignments.
* m4/guix.m4 (GUIX_ASSERT_LIBGCRYPT_USABLE): Remove.
* README: Add Guile-Gcrypt to the dependencies; move libgcrypt as
"required unless --disable-daemon".
* doc/guix.texi (Requirements): Likewise.
* gnu/packages/bash.scm, guix/derivations.scm, guix/docker.scm,
guix/git.scm, guix/http-client.scm, guix/import/cpan.scm,
guix/import/cran.scm, guix/import/crate.scm, guix/import/elpa.scm,
guix/import/gnu.scm, guix/import/hackage.scm,
guix/import/texlive.scm, guix/import/utils.scm, guix/nar.scm,
guix/pki.scm, guix/scripts/archive.scm,
guix/scripts/authenticate.scm, guix/scripts/download.scm,
guix/scripts/hash.scm, guix/scripts/pack.scm,
guix/scripts/publish.scm, guix/scripts/refresh.scm,
guix/scripts/substitute.scm, guix/store.scm,
guix/store/deduplication.scm, guix/tests.scm, tests/base32.scm,
tests/builders.scm, tests/challenge.scm, tests/cpan.scm,
tests/crate.scm, tests/derivations.scm, tests/gem.scm,
tests/nar.scm, tests/opam.scm, tests/pki.scm,
tests/publish.scm, tests/pypi.scm, tests/store-deduplication.scm,
tests/store.scm, tests/substitute.scm: Adjust imports.
* gnu/system/vm.scm: Likewise.
(guile-sqlite3&co): Rename to...
(gcrypt-sqlite3&co): ... this. Add GUILE-GCRYPT.
(expression->derivation-in-linux-vm)[config]: Remove.
(iso9660-image)[config]: Remove.
(qemu-image)[config]: Remove.
(system-docker-image)[config]: Remove.
* guix/scripts/pack.scm: Adjust imports.
(guile-sqlite3&co): Rename to...
(gcrypt-sqlite3&co): ... this. Add GUILE-GCRYPT.
(self-contained-tarball)[build]: Call 'make-config.scm' without
#:libgcrypt argument.
(squashfs-image)[libgcrypt]: Remove.
[build]: Call 'make-config.scm' without #:libgcrypt.
(docker-image)[config, json]: Remove.
[build]: Add GUILE-GCRYPT to the extensions Remove (guix config) from
the imported modules.
* guix/self.scm (specification->package): Remove "libgcrypt", add
"guile-gcrypt".
(compiled-guix): Remove #:libgcrypt.
[guile-gcrypt]: New variable.
[dependencies]: Add it.
[*core-modules*]: Remove #:libgcrypt from 'make-config.scm' call.
Add #:extensions.
[*config*]: Remove #:libgcrypt from 'make-config.scm' call.
(%dependency-variables): Remove %libgcrypt.
(make-config.scm): Remove #:libgcrypt.
* build-aux/build-self.scm (guile-gcrypt): New variable.
(make-config.scm): Remove #:libgcrypt.
(build-program)[fake-gcrypt-hash]: New variable.
Add (gcrypt hash) to the imported modules. Adjust load path
assignments.
* gnu/packages/package-management.scm (guix)[propagated-inputs]: Add
GUILE-GCRYPT.
[arguments]: In 'wrap-program' phase, add GUILE-GCRYPT to the search
path.
2018-08-31 11:07:07 -04:00
|
|
|
#:use-module (gcrypt hash)
|
2022-05-20 14:39:48 -04:00
|
|
|
#:use-module (guix build utils)
|
2020-06-22 06:29:15 -04:00
|
|
|
#:use-module (guix build syscalls)
|
2020-02-21 18:40:30 -05:00
|
|
|
#:use-module (guix base32)
|
2018-05-27 17:20:54 -04:00
|
|
|
#:use-module (srfi srfi-11)
|
2020-12-10 05:21:14 -05:00
|
|
|
#:use-module (srfi srfi-34)
|
|
|
|
#:use-module (srfi srfi-35)
|
2018-05-27 17:20:54 -04:00
|
|
|
#:use-module (rnrs io ports)
|
2020-06-22 06:29:15 -04:00
|
|
|
#:use-module (ice-9 match)
|
2018-05-27 17:20:54 -04:00
|
|
|
#:use-module (guix serialization)
|
|
|
|
#:export (nar-sha256
|
2020-12-10 05:21:14 -05:00
|
|
|
deduplicate
|
2020-12-10 09:12:34 -05:00
|
|
|
dump-file/deduplicate
|
|
|
|
copy-file/deduplicate))
|
2018-05-27 17:20:54 -04:00
|
|
|
|
|
|
|
(define (nar-sha256 file)
|
|
|
|
"Gives the sha256 hash of a file and the size of the file in nar form."
|
2020-12-17 10:19:07 -05:00
|
|
|
(let-values (((port get-hash) (open-sha256-port)))
|
|
|
|
(write-file file port)
|
2018-07-19 11:12:48 -04:00
|
|
|
(force-output port)
|
|
|
|
(let ((hash (get-hash))
|
2020-12-17 10:19:07 -05:00
|
|
|
(size (port-position port)))
|
|
|
|
(close-port port)
|
2018-07-19 11:12:48 -04:00
|
|
|
(values hash size))))
|
2018-05-27 17:20:54 -04:00
|
|
|
|
|
|
|
(define (tempname-in directory)
|
|
|
|
"Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
|
|
|
|
unused by the time you create anything with that name, but a good shot."
|
|
|
|
(let ((const-part (string-append directory "/.tmp-link-"
|
|
|
|
(number->string (getpid)))))
|
|
|
|
(let try ((guess-part
|
|
|
|
(number->string (random most-positive-fixnum) 16)))
|
|
|
|
(if (file-exists? (string-append const-part "-" guess-part))
|
|
|
|
(try (number->string (random most-positive-fixnum) 16))
|
|
|
|
(string-append const-part "-" guess-part)))))
|
|
|
|
|
|
|
|
(define* (get-temp-link target #:optional (link-prefix (dirname target)))
|
|
|
|
"Like mkstemp!, but instead of creating a new file and giving you the name,
|
|
|
|
it creates a new hardlink to TARGET and gives you the name. Since
|
2019-04-24 13:31:39 -04:00
|
|
|
cross-file-system hardlinks don't work, the temp link must be created on the
|
|
|
|
same file system - where in that file system it is can be controlled by
|
2018-05-27 17:20:54 -04:00
|
|
|
LINK-PREFIX."
|
|
|
|
(let try ((tempname (tempname-in link-prefix)))
|
|
|
|
(catch 'system-error
|
|
|
|
(lambda ()
|
|
|
|
(link target tempname)
|
|
|
|
tempname)
|
2018-06-06 10:36:01 -04:00
|
|
|
(lambda args
|
2018-05-27 17:20:54 -04:00
|
|
|
(if (= (system-error-errno args) EEXIST)
|
|
|
|
(try (tempname-in link-prefix))
|
2018-07-02 18:01:20 -04:00
|
|
|
(apply throw args))))))
|
2018-05-27 17:20:54 -04:00
|
|
|
|
2020-08-08 11:05:22 -04:00
|
|
|
(define (call-with-writable-file file store thunk)
|
|
|
|
(if (string=? file store)
|
2020-06-25 04:18:17 -04:00
|
|
|
(thunk) ;don't meddle with the store's permissions
|
|
|
|
(let ((stat (lstat file)))
|
|
|
|
(dynamic-wind
|
|
|
|
(lambda ()
|
|
|
|
(make-file-writable file))
|
|
|
|
thunk
|
|
|
|
(lambda ()
|
|
|
|
(set-file-time file stat)
|
|
|
|
(chmod file (stat:mode stat)))))))
|
2020-06-25 04:10:09 -04:00
|
|
|
|
2020-08-08 11:05:22 -04:00
|
|
|
(define-syntax-rule (with-writable-file file store exp ...)
|
2020-06-25 04:18:17 -04:00
|
|
|
"Make FILE writable for the dynamic extent of EXP..., except if FILE is the
|
|
|
|
store."
|
2020-08-08 11:05:22 -04:00
|
|
|
(call-with-writable-file file store (lambda () exp ...)))
|
2020-06-25 04:10:09 -04:00
|
|
|
|
2018-05-27 17:20:54 -04:00
|
|
|
;; There are 3 main kinds of errors we can get from hardlinking: "Too many
|
|
|
|
;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
|
|
|
|
;; "can't fit more stuff in this directory" (ENOSPC).
|
|
|
|
|
2018-07-02 18:26:59 -04:00
|
|
|
(define* (replace-with-link target to-replace
|
2020-08-08 11:05:22 -04:00
|
|
|
#:key (swap-directory (dirname target))
|
|
|
|
(store (%store-directory)))
|
2018-07-02 18:26:59 -04:00
|
|
|
"Atomically replace the file TO-REPLACE with a link to TARGET. Use
|
2018-12-14 05:10:25 -05:00
|
|
|
SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC
|
|
|
|
and EMLINK, TO-REPLACE is left unchanged.
|
2018-07-02 18:26:59 -04:00
|
|
|
|
|
|
|
Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
|
2018-12-14 05:10:25 -05:00
|
|
|
(define temp-link
|
2018-07-02 18:26:59 -04:00
|
|
|
(catch 'system-error
|
|
|
|
(lambda ()
|
2018-12-14 05:10:25 -05:00
|
|
|
(get-temp-link target swap-directory))
|
2018-07-02 18:26:59 -04:00
|
|
|
(lambda args
|
2018-12-14 05:10:25 -05:00
|
|
|
;; We get ENOSPC when we can't fit an additional entry in
|
2019-01-23 16:27:29 -05:00
|
|
|
;; SWAP-DIRECTORY. If it's EMLINK, then TARGET has reached its
|
|
|
|
;; maximum number of links.
|
|
|
|
(if (memv (system-error-errno args) `(,ENOSPC ,EMLINK))
|
2018-12-14 05:10:25 -05:00
|
|
|
#f
|
|
|
|
(apply throw args)))))
|
|
|
|
|
|
|
|
;; If we couldn't create TEMP-LINK, that's OK: just don't do the
|
|
|
|
;; replacement, which means TO-REPLACE won't be deduplicated.
|
|
|
|
(when temp-link
|
2020-08-08 11:05:22 -04:00
|
|
|
(with-writable-file (dirname to-replace) store
|
2018-12-14 05:10:25 -05:00
|
|
|
(catch 'system-error
|
|
|
|
(lambda ()
|
|
|
|
(rename-file temp-link to-replace))
|
|
|
|
(lambda args
|
|
|
|
(delete-file temp-link)
|
|
|
|
(unless (= EMLINK (system-error-errno args))
|
2020-06-25 04:10:09 -04:00
|
|
|
(apply throw args)))))))
|
2018-05-27 17:20:54 -04:00
|
|
|
|
2021-11-13 15:47:15 -05:00
|
|
|
(define %deduplication-minimum-size
|
|
|
|
;; Size below which files are not deduplicated. This avoids adding too many
|
|
|
|
;; entries to '.links', which would slow down 'removeUnusedLinks' while
|
|
|
|
;; saving little space. Keep in sync with optimize-store.cc.
|
|
|
|
8192)
|
|
|
|
|
2020-06-25 04:15:38 -04:00
|
|
|
(define* (deduplicate path hash #:key (store (%store-directory)))
|
2018-05-27 17:20:54 -04:00
|
|
|
"Check if a store item with sha256 hash HASH already exists. If so,
|
|
|
|
replace PATH with a hardlink to the already-existing one. If not, register
|
|
|
|
PATH so that future duplicates can hardlink to it. PATH is assumed to be
|
|
|
|
under STORE."
|
2021-11-13 15:47:15 -05:00
|
|
|
;; Lightweight promises.
|
|
|
|
(define-syntax-rule (delay exp)
|
|
|
|
(let ((value #f))
|
|
|
|
(lambda ()
|
|
|
|
(unless value
|
|
|
|
(set! value exp))
|
|
|
|
value)))
|
|
|
|
(define-syntax-rule (force promise)
|
|
|
|
(promise))
|
|
|
|
|
2020-06-22 06:29:15 -04:00
|
|
|
(define links-directory
|
|
|
|
(string-append store "/.links"))
|
|
|
|
|
2020-12-11 09:48:02 -05:00
|
|
|
(let loop ((path path)
|
|
|
|
(type (stat:type (lstat path)))
|
|
|
|
(hash hash))
|
|
|
|
(if (eq? 'directory type)
|
|
|
|
;; Can't hardlink directories, so hardlink their atoms.
|
|
|
|
(for-each (match-lambda
|
|
|
|
((file . properties)
|
|
|
|
(unless (member file '("." ".."))
|
|
|
|
(let* ((file (string-append path "/" file))
|
2021-11-13 15:47:15 -05:00
|
|
|
(st (delay (lstat file)))
|
2020-12-11 09:48:02 -05:00
|
|
|
(type (match (assoc-ref properties 'type)
|
|
|
|
((or 'unknown #f)
|
2021-11-13 15:47:15 -05:00
|
|
|
(stat:type (force st)))
|
2020-12-11 09:48:02 -05:00
|
|
|
(type type))))
|
2021-11-13 15:47:15 -05:00
|
|
|
(when (or (eq? 'directory type)
|
|
|
|
(and (eq? 'regular type)
|
|
|
|
(>= (stat:size (force st))
|
|
|
|
%deduplication-minimum-size)))
|
|
|
|
(loop file type
|
|
|
|
(and (not (eq? 'directory type))
|
|
|
|
(nar-sha256 file))))))))
|
2020-12-11 09:48:02 -05:00
|
|
|
(scandir* path))
|
|
|
|
(let ((link-file (string-append links-directory "/"
|
|
|
|
(bytevector->nix-base32-string hash))))
|
|
|
|
(if (file-exists? link-file)
|
|
|
|
(replace-with-link link-file path
|
|
|
|
#:swap-directory links-directory
|
|
|
|
#:store store)
|
|
|
|
(catch 'system-error
|
|
|
|
(lambda ()
|
|
|
|
(link path link-file))
|
|
|
|
(lambda args
|
|
|
|
(let ((errno (system-error-errno args)))
|
|
|
|
(cond ((= errno EEXIST)
|
|
|
|
;; Someone else put an entry for PATH in
|
|
|
|
;; LINKS-DIRECTORY before we could. Let's use it.
|
|
|
|
(replace-with-link path link-file
|
|
|
|
#:swap-directory
|
|
|
|
links-directory
|
|
|
|
#:store store))
|
|
|
|
((= errno ENOENT)
|
|
|
|
;; This most likely means that LINKS-DIRECTORY does
|
|
|
|
;; not exist. Attempt to create it and try again.
|
|
|
|
(mkdir-p links-directory)
|
|
|
|
(loop path type hash))
|
|
|
|
((= errno ENOSPC)
|
|
|
|
;; There's not enough room in the directory index for
|
|
|
|
;; more entries in .links, but that's fine: we can
|
|
|
|
;; just stop.
|
|
|
|
#f)
|
|
|
|
((= errno EMLINK)
|
|
|
|
;; PATH has reached the maximum number of links, but
|
|
|
|
;; that's OK: we just can't deduplicate it more.
|
|
|
|
#f)
|
|
|
|
(else (apply throw args)))))))))))
|
2020-12-10 05:21:14 -05:00
|
|
|
|
|
|
|
(define (tee input len output)
|
|
|
|
"Return a port that reads up to LEN bytes from INPUT and writes them to
|
|
|
|
OUTPUT as it goes."
|
|
|
|
(define bytes-read 0)
|
|
|
|
|
|
|
|
(define (fail)
|
|
|
|
;; Reached EOF before we had read LEN bytes from INPUT.
|
|
|
|
(raise (condition
|
|
|
|
(&nar-error (port input)
|
|
|
|
(file (port-filename output))))))
|
|
|
|
|
|
|
|
(define (read! bv start count)
|
|
|
|
;; Read at most LEN bytes in total.
|
|
|
|
(let ((count (min count (- len bytes-read))))
|
|
|
|
(let loop ((ret (get-bytevector-n! input bv start count)))
|
|
|
|
(cond ((eof-object? ret)
|
|
|
|
(if (= bytes-read len)
|
|
|
|
0 ; EOF
|
|
|
|
(fail)))
|
|
|
|
((and (zero? ret) (> count 0))
|
|
|
|
;; Do not return zero since zero means EOF, so try again.
|
|
|
|
(loop (get-bytevector-n! input bv start count)))
|
|
|
|
(else
|
|
|
|
(put-bytevector output bv start ret)
|
|
|
|
(set! bytes-read (+ bytes-read ret))
|
|
|
|
ret)))))
|
|
|
|
|
|
|
|
(make-custom-binary-input-port "tee input port" read! #f #f #f))
|
|
|
|
|
|
|
|
(define* (dump-file/deduplicate file input size type
|
|
|
|
#:key (store (%store-directory)))
|
|
|
|
"Write SIZE bytes read from INPUT to FILE. TYPE is a symbol, either
|
|
|
|
'regular or 'executable.
|
|
|
|
|
|
|
|
This procedure is suitable as a #:dump-file argument to 'restore-file'. When
|
|
|
|
used that way, it deduplicates files on the fly as they are restored, thereby
|
2021-11-13 15:47:15 -05:00
|
|
|
removing the need for a deduplication pass that would re-read all the files
|
2020-12-10 05:21:14 -05:00
|
|
|
down the road."
|
2021-11-13 15:47:15 -05:00
|
|
|
(define (dump-and-compute-hash)
|
2020-12-10 05:21:14 -05:00
|
|
|
(call-with-output-file file
|
|
|
|
(lambda (output)
|
|
|
|
(let-values (((hash-port get-hash)
|
|
|
|
(open-hash-port (hash-algorithm sha256))))
|
|
|
|
(write-file-tree file hash-port
|
|
|
|
#:file-type+size (lambda (_) (values type size))
|
|
|
|
#:file-port
|
|
|
|
(const (tee input size output)))
|
|
|
|
(close-port hash-port)
|
|
|
|
(get-hash)))))
|
|
|
|
|
2021-11-13 15:47:15 -05:00
|
|
|
(if (>= size %deduplication-minimum-size)
|
|
|
|
(deduplicate file (dump-and-compute-hash) #:store store)
|
|
|
|
(call-with-output-file file
|
|
|
|
(lambda (output)
|
2022-12-10 12:12:59 -05:00
|
|
|
(if (file-port? input)
|
|
|
|
(sendfile output input size 0)
|
|
|
|
(dump-port input output size
|
|
|
|
#:buffer-size %deduplication-minimum-size))))))
|
2020-12-10 09:12:34 -05:00
|
|
|
|
|
|
|
(define* (copy-file/deduplicate source target
|
|
|
|
#:key (store (%store-directory)))
|
|
|
|
"Like 'copy-file', but additionally deduplicate TARGET in STORE."
|
|
|
|
(call-with-input-file source
|
|
|
|
(lambda (input)
|
|
|
|
(let ((stat (stat input)))
|
|
|
|
(dump-file/deduplicate target input (stat:size stat)
|
|
|
|
(if (zero? (logand (stat:mode stat)
|
|
|
|
#o100))
|
|
|
|
'regular
|
|
|
|
'executable)
|
|
|
|
#:store store)))))
|