d52e16d3b6
Suggested by Caleb Ristvedt <caleb.ristvedt@cune.org>. * guix/store/deduplication.scm (call-with-writable-file): New procedure. (with-writable-file): New macro. (replace-with-link): Use it.
195 lines
8.4 KiB
Scheme
195 lines
8.4 KiB
Scheme
;;; GNU Guix --- Functional package management for GNU
|
|
;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
|
|
;;; Copyright © 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
|
|
;;;
|
|
;;; This file is part of GNU Guix.
|
|
;;;
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
;;; under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
;;; your option) any later version.
|
|
;;;
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
;;; This houses stuff we do to files when they arrive at the store - resetting
|
|
;;; timestamps, deduplicating, etc.
|
|
|
|
(define-module (guix store deduplication)
|
|
#:use-module (gcrypt hash)
|
|
#:use-module (guix build utils)
|
|
#:use-module (guix build syscalls)
|
|
#:use-module (guix base32)
|
|
#:use-module (srfi srfi-11)
|
|
#:use-module (rnrs io ports)
|
|
#:use-module (ice-9 ftw)
|
|
#:use-module (ice-9 match)
|
|
#:use-module (guix serialization)
|
|
#:export (nar-sha256
|
|
deduplicate))
|
|
|
|
;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
|
|
;; 'port-position' throws to 'out-of-range' when the offset is great than or
|
|
;; equal to 2^32: <https://bugs.gnu.org/32161>.
|
|
(define (counting-wrapper-port output-port)
|
|
"Return two values: an output port that wraps OUTPUT-PORT, and a thunk to
|
|
retrieve the number of bytes written to OUTPUT-PORT."
|
|
(let ((byte-count 0))
|
|
(values (make-custom-binary-output-port "counting-wrapper"
|
|
(lambda (bytes offset count)
|
|
(put-bytevector output-port bytes
|
|
offset count)
|
|
(set! byte-count
|
|
(+ byte-count count))
|
|
count)
|
|
(lambda ()
|
|
byte-count)
|
|
#f
|
|
(lambda ()
|
|
(close-port output-port)))
|
|
(lambda ()
|
|
byte-count))))
|
|
|
|
(define (nar-sha256 file)
|
|
"Gives the sha256 hash of a file and the size of the file in nar form."
|
|
(let*-values (((port get-hash) (open-sha256-port))
|
|
((wrapper get-size) (counting-wrapper-port port)))
|
|
(write-file file wrapper)
|
|
(force-output wrapper)
|
|
(force-output port)
|
|
(let ((hash (get-hash))
|
|
(size (get-size)))
|
|
(close-port wrapper)
|
|
(values hash size))))
|
|
|
|
(define (tempname-in directory)
|
|
"Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
|
|
unused by the time you create anything with that name, but a good shot."
|
|
(let ((const-part (string-append directory "/.tmp-link-"
|
|
(number->string (getpid)))))
|
|
(let try ((guess-part
|
|
(number->string (random most-positive-fixnum) 16)))
|
|
(if (file-exists? (string-append const-part "-" guess-part))
|
|
(try (number->string (random most-positive-fixnum) 16))
|
|
(string-append const-part "-" guess-part)))))
|
|
|
|
(define* (get-temp-link target #:optional (link-prefix (dirname target)))
|
|
"Like mkstemp!, but instead of creating a new file and giving you the name,
|
|
it creates a new hardlink to TARGET and gives you the name. Since
|
|
cross-file-system hardlinks don't work, the temp link must be created on the
|
|
same file system - where in that file system it is can be controlled by
|
|
LINK-PREFIX."
|
|
(let try ((tempname (tempname-in link-prefix)))
|
|
(catch 'system-error
|
|
(lambda ()
|
|
(link target tempname)
|
|
tempname)
|
|
(lambda args
|
|
(if (= (system-error-errno args) EEXIST)
|
|
(try (tempname-in link-prefix))
|
|
(apply throw args))))))
|
|
|
|
(define (call-with-writable-file file thunk)
|
|
(let ((stat (lstat file)))
|
|
(dynamic-wind
|
|
(lambda ()
|
|
(make-file-writable file))
|
|
thunk
|
|
(lambda ()
|
|
(set-file-time file stat)
|
|
(chmod file (stat:mode stat))))))
|
|
|
|
(define-syntax-rule (with-writable-file file exp ...)
|
|
"Make FILE writable for the dynamic extent of EXP..."
|
|
(call-with-writable-file file (lambda () exp ...)))
|
|
|
|
;; There are 3 main kinds of errors we can get from hardlinking: "Too many
|
|
;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
|
|
;; "can't fit more stuff in this directory" (ENOSPC).
|
|
|
|
(define* (replace-with-link target to-replace
|
|
#:key (swap-directory (dirname target)))
|
|
"Atomically replace the file TO-REPLACE with a link to TARGET. Use
|
|
SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC
|
|
and EMLINK, TO-REPLACE is left unchanged.
|
|
|
|
Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
|
|
(define temp-link
|
|
(catch 'system-error
|
|
(lambda ()
|
|
(get-temp-link target swap-directory))
|
|
(lambda args
|
|
;; We get ENOSPC when we can't fit an additional entry in
|
|
;; SWAP-DIRECTORY. If it's EMLINK, then TARGET has reached its
|
|
;; maximum number of links.
|
|
(if (memv (system-error-errno args) `(,ENOSPC ,EMLINK))
|
|
#f
|
|
(apply throw args)))))
|
|
|
|
;; If we couldn't create TEMP-LINK, that's OK: just don't do the
|
|
;; replacement, which means TO-REPLACE won't be deduplicated.
|
|
(when temp-link
|
|
(with-writable-file (dirname to-replace)
|
|
(catch 'system-error
|
|
(lambda ()
|
|
(rename-file temp-link to-replace))
|
|
(lambda args
|
|
(delete-file temp-link)
|
|
(unless (= EMLINK (system-error-errno args))
|
|
(apply throw args)))))))
|
|
|
|
(define* (deduplicate path hash #:key (store %store-directory))
|
|
"Check if a store item with sha256 hash HASH already exists. If so,
|
|
replace PATH with a hardlink to the already-existing one. If not, register
|
|
PATH so that future duplicates can hardlink to it. PATH is assumed to be
|
|
under STORE."
|
|
(define links-directory
|
|
(string-append store "/.links"))
|
|
|
|
(mkdir-p links-directory)
|
|
(let loop ((path path)
|
|
(type (stat:type (lstat path)))
|
|
(hash hash))
|
|
(if (eq? 'directory type)
|
|
;; Can't hardlink directories, so hardlink their atoms.
|
|
(for-each (match-lambda
|
|
((file . properties)
|
|
(unless (member file '("." ".."))
|
|
(let* ((file (string-append path "/" file))
|
|
(type (or (assq-ref properties 'type)
|
|
(stat:type (lstat file)))))
|
|
(loop file type
|
|
(and (not (eq? 'directory type))
|
|
(nar-sha256 file)))))))
|
|
(scandir* path))
|
|
(let ((link-file (string-append links-directory "/"
|
|
(bytevector->nix-base32-string hash))))
|
|
(if (file-exists? link-file)
|
|
(replace-with-link link-file path
|
|
#:swap-directory links-directory)
|
|
(catch 'system-error
|
|
(lambda ()
|
|
(link path link-file))
|
|
(lambda args
|
|
(let ((errno (system-error-errno args)))
|
|
(cond ((= errno EEXIST)
|
|
;; Someone else put an entry for PATH in
|
|
;; LINKS-DIRECTORY before we could. Let's use it.
|
|
(replace-with-link path link-file
|
|
#:swap-directory links-directory))
|
|
((= errno ENOSPC)
|
|
;; There's not enough room in the directory index for
|
|
;; more entries in .links, but that's fine: we can
|
|
;; just stop.
|
|
#f)
|
|
((= errno EMLINK)
|
|
;; PATH has reached the maximum number of links, but
|
|
;; that's OK: we just can't deduplicate it more.
|
|
#f)
|
|
(else (apply throw args)))))))))))
|