2014-01-23 17:48:34 -05:00
|
|
|
|
;;; GNU Guix --- Functional package management for GNU
|
2016-05-31 09:08:42 -04:00
|
|
|
|
;;; Copyright © 2014, 2015, 2016 Ludovic Courtès <ludo@gnu.org>
|
2014-01-23 17:48:34 -05:00
|
|
|
|
;;;
|
|
|
|
|
;;; This file is part of GNU Guix.
|
|
|
|
|
;;;
|
|
|
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
|
|
|
;;; under the terms of the GNU General Public License as published by
|
|
|
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
|
|
|
;;; your option) any later version.
|
|
|
|
|
;;;
|
|
|
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
|
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
;;; GNU General Public License for more details.
|
|
|
|
|
;;;
|
|
|
|
|
;;; You should have received a copy of the GNU General Public License
|
|
|
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
(define-module (guix scripts offload)
|
2014-03-02 16:39:48 -05:00
|
|
|
|
#:use-module (ssh key)
|
|
|
|
|
#:use-module (ssh auth)
|
|
|
|
|
#:use-module (ssh session)
|
|
|
|
|
#:use-module (ssh channel)
|
2016-11-04 19:47:34 -04:00
|
|
|
|
#:use-module (ssh popen)
|
|
|
|
|
#:use-module (ssh dist)
|
|
|
|
|
#:use-module (ssh dist node)
|
2016-12-01 15:49:16 -05:00
|
|
|
|
#:use-module (ssh version)
|
2014-01-23 17:48:34 -05:00
|
|
|
|
#:use-module (guix config)
|
|
|
|
|
#:use-module (guix records)
|
|
|
|
|
#:use-module (guix store)
|
|
|
|
|
#:use-module (guix derivations)
|
2016-10-19 08:28:56 -04:00
|
|
|
|
#:use-module ((guix serialization)
|
|
|
|
|
#:select (nar-error? nar-error-file))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
#:use-module (guix nar)
|
|
|
|
|
#:use-module (guix utils)
|
2016-05-31 09:08:42 -04:00
|
|
|
|
#:use-module ((guix build syscalls) #:select (fcntl-flock))
|
2014-03-06 15:38:45 -05:00
|
|
|
|
#:use-module ((guix build utils) #:select (which mkdir-p))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
#:use-module (guix ui)
|
|
|
|
|
#:use-module (srfi srfi-1)
|
2014-03-26 11:22:41 -04:00
|
|
|
|
#:use-module (srfi srfi-11)
|
2014-01-23 17:48:34 -05:00
|
|
|
|
#:use-module (srfi srfi-26)
|
|
|
|
|
#:use-module (srfi srfi-34)
|
|
|
|
|
#:use-module (srfi srfi-35)
|
|
|
|
|
#:use-module (ice-9 popen)
|
|
|
|
|
#:use-module (ice-9 rdelim)
|
|
|
|
|
#:use-module (ice-9 match)
|
|
|
|
|
#:use-module (ice-9 regex)
|
|
|
|
|
#:use-module (ice-9 format)
|
2016-10-19 08:28:56 -04:00
|
|
|
|
#:use-module (ice-9 binary-ports)
|
2014-01-23 17:48:34 -05:00
|
|
|
|
#:export (build-machine
|
|
|
|
|
build-requirements
|
|
|
|
|
guix-offload))
|
|
|
|
|
|
|
|
|
|
;;; Commentary:
|
|
|
|
|
;;;
|
|
|
|
|
;;; Attempt to offload builds to the machines listed in
|
|
|
|
|
;;; /etc/guix/machines.scm, transferring missing dependencies over SSH, and
|
|
|
|
|
;;; retrieving the build output(s) over SSH upon success.
|
|
|
|
|
;;;
|
|
|
|
|
;;; This command should not be used directly; instead, it is called on-demand
|
|
|
|
|
;;; by the daemon, unless it was started with '--no-build-hook' or a client
|
|
|
|
|
;;; inhibited build hooks.
|
|
|
|
|
;;;
|
|
|
|
|
;;; Code:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define-record-type* <build-machine>
|
|
|
|
|
build-machine make-build-machine
|
|
|
|
|
build-machine?
|
|
|
|
|
(name build-machine-name) ; string
|
2014-03-13 16:58:04 -04:00
|
|
|
|
(port build-machine-port ; number
|
|
|
|
|
(default 22))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(system build-machine-system) ; string
|
|
|
|
|
(user build-machine-user) ; string
|
|
|
|
|
(private-key build-machine-private-key ; file name
|
2014-03-02 16:39:48 -05:00
|
|
|
|
(default (user-openssh-private-key)))
|
|
|
|
|
(host-key build-machine-host-key) ; string
|
2016-11-04 19:47:34 -04:00
|
|
|
|
(daemon-socket build-machine-daemon-socket ; string
|
|
|
|
|
(default "/var/guix/daemon-socket/socket"))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(parallel-builds build-machine-parallel-builds ; number
|
|
|
|
|
(default 1))
|
|
|
|
|
(speed build-machine-speed ; inexact real
|
|
|
|
|
(default 1.0))
|
|
|
|
|
(features build-machine-features ; list of strings
|
|
|
|
|
(default '())))
|
|
|
|
|
|
|
|
|
|
(define-record-type* <build-requirements>
|
|
|
|
|
build-requirements make-build-requirements
|
|
|
|
|
build-requirements?
|
|
|
|
|
(system build-requirements-system) ; string
|
|
|
|
|
(features build-requirements-features ; list of strings
|
|
|
|
|
(default '())))
|
|
|
|
|
|
|
|
|
|
(define %machine-file
|
|
|
|
|
;; File that lists machines available as build slaves.
|
|
|
|
|
(string-append %config-directory "/machines.scm"))
|
|
|
|
|
|
2014-03-02 16:39:48 -05:00
|
|
|
|
(define (user-openssh-private-key)
|
|
|
|
|
"Return the user's default SSH private key, or #f if it could not be
|
2014-01-23 17:48:34 -05:00
|
|
|
|
determined."
|
|
|
|
|
(and=> (getenv "HOME")
|
2014-03-02 16:39:48 -05:00
|
|
|
|
(cut string-append <> "/.ssh/id_rsa")))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
|
|
|
|
|
(define %user-module
|
|
|
|
|
;; Module in which the machine description file is loaded.
|
|
|
|
|
(let ((module (make-fresh-user-module)))
|
|
|
|
|
(module-use! module (resolve-interface '(guix scripts offload)))
|
|
|
|
|
module))
|
|
|
|
|
|
|
|
|
|
(define* (build-machines #:optional (file %machine-file))
|
|
|
|
|
"Read the list of build machines from FILE and return it."
|
|
|
|
|
(catch #t
|
|
|
|
|
(lambda ()
|
|
|
|
|
;; Avoid ABI incompatibility with the <build-machine> record.
|
|
|
|
|
(set! %fresh-auto-compile #t)
|
|
|
|
|
|
|
|
|
|
(save-module-excursion
|
|
|
|
|
(lambda ()
|
|
|
|
|
(set-current-module %user-module)
|
2014-02-01 19:32:50 -05:00
|
|
|
|
(primitive-load file))))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(lambda args
|
|
|
|
|
(match args
|
2016-09-20 04:51:39 -04:00
|
|
|
|
(('system-error . rest)
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(let ((err (system-error-errno args)))
|
|
|
|
|
;; Silently ignore missing file since this is a common case.
|
|
|
|
|
(if (= ENOENT err)
|
|
|
|
|
'()
|
|
|
|
|
(leave (_ "failed to open machine file '~a': ~a~%")
|
2014-02-01 19:32:50 -05:00
|
|
|
|
file (strerror err)))))
|
2014-06-26 16:23:36 -04:00
|
|
|
|
(('syntax-error proc message properties form . rest)
|
|
|
|
|
(let ((loc (source-properties->location properties)))
|
|
|
|
|
(leave (_ "~a: ~a~%")
|
|
|
|
|
(location->string loc) message)))
|
2016-09-20 04:51:39 -04:00
|
|
|
|
(x
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(leave (_ "failed to load machine file '~a': ~s~%")
|
2014-02-01 19:32:50 -05:00
|
|
|
|
file args))))))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
|
2014-03-02 16:39:48 -05:00
|
|
|
|
(define (host-key->type+key host-key)
|
|
|
|
|
"Destructure HOST-KEY, an OpenSSH host key string, and return two values:
|
|
|
|
|
its key type as a symbol, and the actual base64-encoded string."
|
|
|
|
|
(define (type->symbol type)
|
|
|
|
|
(and (string-prefix? "ssh-" type)
|
|
|
|
|
(string->symbol (string-drop type 4))))
|
|
|
|
|
|
|
|
|
|
(match (string-tokenize host-key)
|
|
|
|
|
((type key _)
|
|
|
|
|
(values (type->symbol type) key))
|
|
|
|
|
((type key)
|
|
|
|
|
(values (type->symbol type) key))))
|
|
|
|
|
|
|
|
|
|
(define (private-key-from-file* file)
|
|
|
|
|
"Like 'private-key-from-file', but raise an error that 'with-error-handling'
|
|
|
|
|
can interpret meaningfully."
|
|
|
|
|
(catch 'guile-ssh-error
|
|
|
|
|
(lambda ()
|
|
|
|
|
(private-key-from-file file))
|
|
|
|
|
(lambda (key proc str . rest)
|
|
|
|
|
(raise (condition
|
|
|
|
|
(&message (message (format #f (_ "failed to load SSH \
|
|
|
|
|
private key from '~a': ~a")
|
|
|
|
|
file str))))))))
|
|
|
|
|
|
|
|
|
|
(define (open-ssh-session machine)
|
|
|
|
|
"Open an SSH session for MACHINE and return it. Throw an error on failure."
|
|
|
|
|
(let ((private (private-key-from-file* (build-machine-private-key machine)))
|
|
|
|
|
(public (public-key-from-file
|
|
|
|
|
(string-append (build-machine-private-key machine)
|
|
|
|
|
".pub")))
|
|
|
|
|
(session (make-session #:user (build-machine-user machine)
|
|
|
|
|
#:host (build-machine-name machine)
|
|
|
|
|
#:port (build-machine-port machine)
|
|
|
|
|
#:timeout 5 ;seconds
|
|
|
|
|
;; #:log-verbosity 'protocol
|
|
|
|
|
#:identity (build-machine-private-key machine)
|
|
|
|
|
|
|
|
|
|
;; We need lightweight compression when
|
|
|
|
|
;; exchanging full archives.
|
|
|
|
|
#:compression "zlib"
|
|
|
|
|
#:compression-level 3)))
|
2016-12-01 17:20:18 -05:00
|
|
|
|
(match (connect! session)
|
|
|
|
|
('ok
|
|
|
|
|
;; Authenticate the server. XXX: Guile-SSH 0.10.1 doesn't know about
|
|
|
|
|
;; ed25519 keys and 'get-key-type' returns #f in that case.
|
|
|
|
|
(let-values (((server) (get-server-public-key session))
|
|
|
|
|
((type key) (host-key->type+key
|
|
|
|
|
(build-machine-host-key machine))))
|
|
|
|
|
(unless (and (or (not (get-key-type server))
|
|
|
|
|
(eq? (get-key-type server) type))
|
|
|
|
|
(string=? (public-key->string server) key))
|
|
|
|
|
;; Key mismatch: something's wrong. XXX: It could be that the server
|
|
|
|
|
;; provided its Ed25519 key when we where expecting its RSA key.
|
|
|
|
|
(leave (_ "server at '~a' returned host key '~a' of type '~a' \
|
2014-03-02 16:39:48 -05:00
|
|
|
|
instead of '~a' of type '~a'~%")
|
2016-12-01 17:20:18 -05:00
|
|
|
|
(build-machine-name machine)
|
|
|
|
|
(public-key->string server) (get-key-type server)
|
|
|
|
|
key type)))
|
|
|
|
|
|
|
|
|
|
(let ((auth (userauth-public-key! session private)))
|
|
|
|
|
(unless (eq? 'success auth)
|
|
|
|
|
(disconnect! session)
|
|
|
|
|
(leave (_ "SSH public key authentication failed for '~a': ~a~%")
|
|
|
|
|
(build-machine-name machine) (get-error session))))
|
|
|
|
|
|
|
|
|
|
session)
|
|
|
|
|
(x
|
|
|
|
|
;; Connection failed or timeout expired.
|
|
|
|
|
(leave (_ "failed to connect to '~a': ~a~%")
|
|
|
|
|
(build-machine-name machine) (get-error session))))))
|
2014-03-02 16:39:48 -05:00
|
|
|
|
|
2016-11-04 19:47:34 -04:00
|
|
|
|
(define* (connect-to-remote-daemon session
|
|
|
|
|
#:optional
|
|
|
|
|
(socket-name "/var/guix/daemon-socket/socket"))
|
|
|
|
|
"Connect to the remote build daemon listening on SOCKET-NAME over SESSION,
|
|
|
|
|
an SSH session. Return a <nix-server> object."
|
|
|
|
|
(define redirect
|
|
|
|
|
;; Code run in SESSION to redirect the remote process' stdin/stdout to the
|
|
|
|
|
;; daemon's socket, à la socat. The SSH protocol supports forwarding to
|
|
|
|
|
;; Unix-domain sockets but libssh doesn't have an API for that, hence this
|
|
|
|
|
;; hack.
|
|
|
|
|
`(begin
|
|
|
|
|
(use-modules (ice-9 match) (rnrs io ports))
|
|
|
|
|
|
|
|
|
|
(let ((sock (socket AF_UNIX SOCK_STREAM 0))
|
|
|
|
|
(stdin (current-input-port))
|
|
|
|
|
(stdout (current-output-port)))
|
|
|
|
|
(setvbuf stdin _IONBF)
|
|
|
|
|
(setvbuf stdout _IONBF)
|
|
|
|
|
(connect sock AF_UNIX ,socket-name)
|
|
|
|
|
|
|
|
|
|
(let loop ()
|
|
|
|
|
(match (select (list stdin sock) '() (list stdin stdout sock))
|
|
|
|
|
((reads writes ())
|
|
|
|
|
(when (memq stdin reads)
|
|
|
|
|
(match (get-bytevector-some stdin)
|
|
|
|
|
((? eof-object?)
|
|
|
|
|
(primitive-exit 0))
|
|
|
|
|
(bv
|
|
|
|
|
(put-bytevector sock bv))))
|
|
|
|
|
(when (memq sock reads)
|
|
|
|
|
(match (get-bytevector-some sock)
|
|
|
|
|
((? eof-object?)
|
|
|
|
|
(primitive-exit 0))
|
|
|
|
|
(bv
|
|
|
|
|
(put-bytevector stdout bv))))
|
|
|
|
|
(loop))
|
|
|
|
|
(_
|
|
|
|
|
(primitive-exit 1)))))))
|
|
|
|
|
|
|
|
|
|
(let ((channel
|
|
|
|
|
(open-remote-pipe* session OPEN_BOTH
|
|
|
|
|
;; Sort-of shell-quote REDIRECT.
|
|
|
|
|
"guile" "-c"
|
|
|
|
|
(object->string
|
|
|
|
|
(object->string redirect)))))
|
|
|
|
|
(open-connection #:port channel)))
|
|
|
|
|
|
2014-03-09 13:08:21 -04:00
|
|
|
|
|
|
|
|
|
;;;
|
|
|
|
|
;;; Synchronization.
|
|
|
|
|
;;;
|
|
|
|
|
|
|
|
|
|
(define (lock-file file)
|
|
|
|
|
"Wait and acquire an exclusive lock on FILE. Return an open port."
|
|
|
|
|
(mkdir-p (dirname file))
|
|
|
|
|
(let ((port (open-file file "w0")))
|
|
|
|
|
(fcntl-flock port 'write-lock)
|
|
|
|
|
port))
|
|
|
|
|
|
|
|
|
|
(define (unlock-file lock)
|
|
|
|
|
"Unlock LOCK."
|
|
|
|
|
(fcntl-flock lock 'unlock)
|
|
|
|
|
(close-port lock)
|
|
|
|
|
#t)
|
|
|
|
|
|
|
|
|
|
(define-syntax-rule (with-file-lock file exp ...)
|
|
|
|
|
"Wait to acquire a lock on FILE and evaluate EXP in that context."
|
|
|
|
|
(let ((port (lock-file file)))
|
|
|
|
|
(dynamic-wind
|
|
|
|
|
(lambda ()
|
|
|
|
|
#t)
|
|
|
|
|
(lambda ()
|
|
|
|
|
exp ...)
|
|
|
|
|
(lambda ()
|
|
|
|
|
(unlock-file port)))))
|
|
|
|
|
|
|
|
|
|
(define-syntax-rule (with-machine-lock machine hint exp ...)
|
|
|
|
|
"Wait to acquire MACHINE's exclusive lock for HINT, and evaluate EXP in that
|
|
|
|
|
context."
|
|
|
|
|
(with-file-lock (machine-lock-file machine hint)
|
|
|
|
|
exp ...))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define (machine-slot-file machine slot)
|
|
|
|
|
"Return the file name of MACHINE's file for SLOT."
|
|
|
|
|
;; For each machine we have a bunch of files representing each build slot.
|
|
|
|
|
;; When choosing a build machine, we attempt to get an exclusive lock on one
|
|
|
|
|
;; of these; if we fail, that means all the build slots are already taken.
|
|
|
|
|
;; Inspired by Nix's build-remote.pl.
|
|
|
|
|
(string-append (string-append %state-directory "/offload/"
|
|
|
|
|
(build-machine-name machine)
|
|
|
|
|
"/" (number->string slot))))
|
|
|
|
|
|
|
|
|
|
(define (acquire-build-slot machine)
|
|
|
|
|
"Attempt to acquire a build slot on MACHINE. Return the port representing
|
|
|
|
|
the slot, or #f if none is available.
|
|
|
|
|
|
|
|
|
|
This mechanism allows us to set a hard limit on the number of simultaneous
|
|
|
|
|
connections allowed to MACHINE."
|
|
|
|
|
(mkdir-p (dirname (machine-slot-file machine 0)))
|
|
|
|
|
(with-machine-lock machine 'slots
|
|
|
|
|
(any (lambda (slot)
|
|
|
|
|
(let ((port (open-file (machine-slot-file machine slot)
|
|
|
|
|
"w0")))
|
|
|
|
|
(catch 'flock-error
|
|
|
|
|
(lambda ()
|
|
|
|
|
(fcntl-flock port 'write-lock #:wait? #f)
|
|
|
|
|
;; Got it!
|
|
|
|
|
(format (current-error-port)
|
|
|
|
|
"process ~a acquired build slot '~a'~%"
|
|
|
|
|
(getpid) (port-filename port))
|
|
|
|
|
port)
|
|
|
|
|
(lambda args
|
|
|
|
|
;; PORT is already locked by another process.
|
|
|
|
|
(close-port port)
|
|
|
|
|
#f))))
|
|
|
|
|
(iota (build-machine-parallel-builds machine)))))
|
|
|
|
|
|
|
|
|
|
(define (release-build-slot slot)
|
|
|
|
|
"Release SLOT, a build slot as returned as by 'acquire-build-slot'."
|
|
|
|
|
(close-port slot))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;;
|
|
|
|
|
;;; Offloading.
|
|
|
|
|
;;;
|
|
|
|
|
|
2014-03-19 18:12:06 -04:00
|
|
|
|
(define (build-log-port)
|
|
|
|
|
"Return the default port where build logs should be sent. The default is
|
|
|
|
|
file descriptor 4, which is open by the daemon before running the offload
|
|
|
|
|
hook."
|
|
|
|
|
(let ((port (fdopen 4 "w0")))
|
|
|
|
|
;; Make sure file descriptor 4 isn't closed when PORT is GC'd.
|
|
|
|
|
(set-port-revealed! port 1)
|
|
|
|
|
port))
|
|
|
|
|
|
2014-03-09 09:05:30 -04:00
|
|
|
|
(define* (transfer-and-offload drv machine
|
|
|
|
|
#:key
|
|
|
|
|
(inputs '())
|
|
|
|
|
(outputs '())
|
|
|
|
|
(max-silent-time 3600)
|
2014-03-09 18:13:53 -04:00
|
|
|
|
build-timeout
|
2014-03-09 09:05:30 -04:00
|
|
|
|
print-build-trace?)
|
|
|
|
|
"Offload DRV to MACHINE. Prior to the actual offloading, transfer all of
|
|
|
|
|
INPUTS to MACHINE; if building DRV succeeds, retrieve all of OUTPUTS from
|
|
|
|
|
MACHINE."
|
2016-11-02 07:00:47 -04:00
|
|
|
|
(define session
|
|
|
|
|
(open-ssh-session machine))
|
|
|
|
|
|
2016-11-04 19:47:34 -04:00
|
|
|
|
(define store
|
|
|
|
|
(connect-to-remote-daemon session
|
|
|
|
|
(build-machine-daemon-socket machine)))
|
|
|
|
|
|
|
|
|
|
(set-build-options store
|
|
|
|
|
#:print-build-trace print-build-trace?
|
|
|
|
|
#:max-silent-time max-silent-time
|
|
|
|
|
#:timeout build-timeout)
|
|
|
|
|
|
|
|
|
|
;; Protect DRV from garbage collection.
|
|
|
|
|
(add-temp-root store (derivation-file-name drv))
|
|
|
|
|
|
|
|
|
|
(send-files (cons (derivation-file-name drv) inputs)
|
|
|
|
|
store)
|
|
|
|
|
(format (current-error-port) "offloading '~a' to '~a'...~%"
|
|
|
|
|
(derivation-file-name drv) (build-machine-name machine))
|
|
|
|
|
(format (current-error-port) "@ build-remote ~a ~a~%"
|
|
|
|
|
(derivation-file-name drv) (build-machine-name machine))
|
|
|
|
|
|
|
|
|
|
(guard (c ((nix-protocol-error? c)
|
|
|
|
|
(format (current-error-port)
|
|
|
|
|
(_ "derivation '~a' offloaded to '~a' failed: ~a~%")
|
|
|
|
|
(derivation-file-name drv)
|
|
|
|
|
(build-machine-name machine)
|
|
|
|
|
(nix-protocol-error-message c))
|
|
|
|
|
;; Use exit code 100 for a permanent build failure. The daemon
|
|
|
|
|
;; interprets other non-zero codes as transient build failures.
|
|
|
|
|
(primitive-exit 100)))
|
|
|
|
|
(build-derivations store (list drv)))
|
|
|
|
|
|
|
|
|
|
(retrieve-files outputs store)
|
|
|
|
|
(format (current-error-port) "done with offloaded '~a'~%"
|
|
|
|
|
(derivation-file-name drv)))
|
|
|
|
|
|
|
|
|
|
(define (store-import-channel session)
|
|
|
|
|
"Return an output port to which archives to be exported to SESSION's store
|
|
|
|
|
can be written."
|
|
|
|
|
;; Using the 'import-paths' RPC on a remote store would be slow because it
|
|
|
|
|
;; makes a round trip every time 32 KiB have been transferred. This
|
|
|
|
|
;; procedure instead opens a separate channel to use the remote
|
|
|
|
|
;; 'import-paths' procedure, which consumes all the data in a single round
|
|
|
|
|
;; trip.
|
|
|
|
|
(define import
|
|
|
|
|
`(begin
|
|
|
|
|
(use-modules (guix))
|
|
|
|
|
|
|
|
|
|
(with-store store
|
|
|
|
|
(setvbuf (current-input-port) _IONBF)
|
|
|
|
|
(import-paths store (current-input-port)))))
|
|
|
|
|
|
|
|
|
|
(open-remote-output-pipe session
|
|
|
|
|
(string-join
|
|
|
|
|
`("guile" "-c"
|
|
|
|
|
,(object->string
|
|
|
|
|
(object->string import))))))
|
|
|
|
|
|
|
|
|
|
(define (store-export-channel session files)
|
|
|
|
|
"Return an input port from which an export of FILES from SESSION's store can
|
|
|
|
|
be read."
|
|
|
|
|
;; Same as above: this is more efficient than calling 'export-paths' on a
|
|
|
|
|
;; remote store.
|
|
|
|
|
(define export
|
|
|
|
|
`(begin
|
|
|
|
|
(use-modules (guix))
|
|
|
|
|
|
|
|
|
|
(with-store store
|
|
|
|
|
(setvbuf (current-output-port) _IONBF)
|
|
|
|
|
(export-paths store ',files (current-output-port)))))
|
|
|
|
|
|
|
|
|
|
(open-remote-input-pipe session
|
|
|
|
|
(string-join
|
|
|
|
|
`("guile" "-c"
|
|
|
|
|
,(object->string
|
|
|
|
|
(object->string export))))))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
|
2016-11-04 19:47:34 -04:00
|
|
|
|
(define (send-files files remote)
|
|
|
|
|
"Send the subset of FILES that's missing to REMOTE, a remote store."
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(with-store store
|
2016-12-01 15:48:20 -05:00
|
|
|
|
;; Compute the subset of FILES missing on SESSION and send them.
|
|
|
|
|
(let* ((session (channel-get-session (nix-server-socket remote)))
|
2016-11-04 19:47:34 -04:00
|
|
|
|
(node (make-node session))
|
|
|
|
|
(missing (node-eval node
|
|
|
|
|
`(begin
|
|
|
|
|
(use-modules (guix)
|
|
|
|
|
(srfi srfi-1) (srfi srfi-26))
|
|
|
|
|
|
|
|
|
|
(with-store store
|
|
|
|
|
(remove (cut valid-path? store <>)
|
2016-12-01 15:48:20 -05:00
|
|
|
|
',files)))))
|
2016-11-04 19:47:34 -04:00
|
|
|
|
(port (store-import-channel session)))
|
|
|
|
|
(format #t (_ "sending ~a store files to '~a'...~%")
|
|
|
|
|
(length missing) (session-get session 'host))
|
|
|
|
|
|
2016-12-01 15:48:20 -05:00
|
|
|
|
;; Send MISSING in topological order.
|
2016-11-04 19:47:34 -04:00
|
|
|
|
(export-paths store missing port)
|
|
|
|
|
|
|
|
|
|
;; Tell the remote process that we're done. (In theory the
|
|
|
|
|
;; end-of-archive mark of 'export-paths' would be enough, but in
|
|
|
|
|
;; practice it's not.)
|
|
|
|
|
(channel-send-eof port)
|
|
|
|
|
|
|
|
|
|
;; Wait for completion of the remote process.
|
|
|
|
|
(let ((result (zero? (channel-get-exit-status port))))
|
|
|
|
|
(close-port port)
|
|
|
|
|
result))))
|
|
|
|
|
|
|
|
|
|
(define (retrieve-files files remote)
|
2016-11-02 07:00:47 -04:00
|
|
|
|
"Retrieve FILES from SESSION's store, and import them."
|
2016-11-04 19:47:34 -04:00
|
|
|
|
(let* ((session (channel-get-session (nix-server-socket remote)))
|
|
|
|
|
(host (session-get session 'host))
|
|
|
|
|
(port (store-export-channel session files)))
|
|
|
|
|
(format #t (_ "retrieving ~a files from '~a'...~%")
|
|
|
|
|
(length files) host)
|
|
|
|
|
|
|
|
|
|
;; We cannot use the 'import-paths' RPC here because we already
|
|
|
|
|
;; hold the locks for FILES.
|
|
|
|
|
(let ((result (restore-file-set port
|
|
|
|
|
#:log-port (current-error-port)
|
|
|
|
|
#:lock? #f)))
|
|
|
|
|
(close-port port)
|
|
|
|
|
result)))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
|
2014-03-09 13:08:21 -04:00
|
|
|
|
|
|
|
|
|
;;;
|
|
|
|
|
;;; Scheduling.
|
|
|
|
|
;;;
|
|
|
|
|
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(define (machine-matches? machine requirements)
|
|
|
|
|
"Return #t if MACHINE matches REQUIREMENTS."
|
|
|
|
|
(and (string=? (build-requirements-system requirements)
|
|
|
|
|
(build-machine-system machine))
|
|
|
|
|
(lset<= string=?
|
|
|
|
|
(build-requirements-features requirements)
|
|
|
|
|
(build-machine-features machine))))
|
|
|
|
|
|
2014-02-28 19:31:18 -05:00
|
|
|
|
(define (machine-load machine)
|
|
|
|
|
"Return the load of MACHINE, divided by the number of parallel builds
|
2016-12-01 17:21:15 -05:00
|
|
|
|
allowed on MACHINE. Return +∞ if MACHINE is unreachable."
|
2016-11-26 17:00:36 -05:00
|
|
|
|
;; Note: This procedure is costly since it creates a new SSH session.
|
2016-12-01 17:21:15 -05:00
|
|
|
|
(match (false-if-exception (open-ssh-session machine))
|
|
|
|
|
((? session? session)
|
|
|
|
|
(let* ((pipe (open-remote-pipe* session OPEN_READ
|
2016-11-25 16:47:37 -05:00
|
|
|
|
"cat" "/proc/loadavg"))
|
2016-12-01 17:21:15 -05:00
|
|
|
|
(line (read-line pipe)))
|
|
|
|
|
(close-port pipe)
|
|
|
|
|
|
|
|
|
|
(if (eof-object? line)
|
|
|
|
|
+inf.0 ;MACHINE does not respond, so assume it is infinitely loaded
|
|
|
|
|
(match (string-tokenize line)
|
|
|
|
|
((one five fifteen . _)
|
|
|
|
|
(let* ((raw (string->number five))
|
|
|
|
|
(jobs (build-machine-parallel-builds machine))
|
|
|
|
|
(normalized (/ raw jobs)))
|
|
|
|
|
(format (current-error-port) "load on machine '~a' is ~s\
|
2014-02-28 19:31:18 -05:00
|
|
|
|
(normalized: ~s)~%"
|
2016-12-01 17:21:15 -05:00
|
|
|
|
(build-machine-name machine) raw normalized)
|
|
|
|
|
normalized))
|
|
|
|
|
(_
|
|
|
|
|
+inf.0))))) ;something's fishy about MACHINE, so avoid it
|
|
|
|
|
(_
|
|
|
|
|
+inf.0))) ;failed to connect to MACHINE, so avoid it
|
2014-02-28 19:31:18 -05:00
|
|
|
|
|
2014-03-08 05:29:52 -05:00
|
|
|
|
(define (machine-lock-file machine hint)
|
|
|
|
|
"Return the name of MACHINE's lock file for HINT."
|
2014-03-06 15:38:45 -05:00
|
|
|
|
(string-append %state-directory "/offload/"
|
2014-03-08 05:29:52 -05:00
|
|
|
|
(build-machine-name machine)
|
|
|
|
|
"." (symbol->string hint) ".lock"))
|
2014-03-06 15:38:45 -05:00
|
|
|
|
|
2014-03-08 06:15:38 -05:00
|
|
|
|
(define (machine-choice-lock-file)
|
|
|
|
|
"Return the name of the file used as a lock when choosing a build machine."
|
|
|
|
|
(string-append %state-directory "/offload/machine-choice.lock"))
|
|
|
|
|
|
|
|
|
|
|
2014-03-08 15:23:12 -05:00
|
|
|
|
(define %slots
|
|
|
|
|
;; List of acquired build slots (open ports).
|
|
|
|
|
'())
|
|
|
|
|
|
2014-03-09 09:05:30 -04:00
|
|
|
|
(define (choose-build-machine machines)
|
|
|
|
|
"Return the best machine among MACHINES, or #f."
|
2014-03-08 06:15:38 -05:00
|
|
|
|
|
|
|
|
|
;; Proceed like this:
|
|
|
|
|
;; 1. Acquire the global machine-choice lock.
|
|
|
|
|
;; 2. For all MACHINES, attempt to acquire a build slot, and filter out
|
|
|
|
|
;; those machines for which we failed.
|
|
|
|
|
;; 3. Choose the best machine among those that are left.
|
|
|
|
|
;; 4. Release the previously-acquired build slots of the other machines.
|
|
|
|
|
;; 5. Release the global machine-choice lock.
|
|
|
|
|
|
|
|
|
|
(with-file-lock (machine-choice-lock-file)
|
2016-11-26 17:00:36 -05:00
|
|
|
|
(define machines+slots+loads
|
2014-03-08 06:22:49 -05:00
|
|
|
|
(filter-map (lambda (machine)
|
2016-11-26 17:00:36 -05:00
|
|
|
|
;; Call 'machine-load' from here to make sure it is called
|
|
|
|
|
;; only once per machine (it is expensive).
|
2014-03-08 06:22:49 -05:00
|
|
|
|
(let ((slot (acquire-build-slot machine)))
|
2016-11-26 17:00:36 -05:00
|
|
|
|
(and slot
|
|
|
|
|
(list machine slot (machine-load machine)))))
|
2014-03-08 06:22:49 -05:00
|
|
|
|
machines))
|
2014-03-08 06:15:38 -05:00
|
|
|
|
|
|
|
|
|
(define (undecorate pred)
|
2014-03-13 17:57:21 -04:00
|
|
|
|
(lambda (a b)
|
|
|
|
|
(match a
|
2016-11-26 17:00:36 -05:00
|
|
|
|
((machine1 slot1 load1)
|
2014-03-13 17:57:21 -04:00
|
|
|
|
(match b
|
2016-11-26 17:00:36 -05:00
|
|
|
|
((machine2 slot2 load2)
|
|
|
|
|
(pred machine1 load1 machine2 load2)))))))
|
|
|
|
|
|
|
|
|
|
(define (machine-less-loaded-or-faster? m1 l1 m2 l2)
|
|
|
|
|
;; Return #t if M1 is either less loaded or faster than M2, with L1
|
|
|
|
|
;; being the load of M1 and L2 the load of M2. (This relation defines a
|
|
|
|
|
;; total order on machines.)
|
|
|
|
|
(> (/ (build-machine-speed m1) (+ 1 l1))
|
|
|
|
|
(/ (build-machine-speed m2) (+ 1 l2))))
|
|
|
|
|
|
|
|
|
|
(let loop ((machines+slots+loads
|
|
|
|
|
(sort machines+slots+loads
|
2014-09-20 06:10:28 -04:00
|
|
|
|
(undecorate machine-less-loaded-or-faster?))))
|
2016-11-26 17:00:36 -05:00
|
|
|
|
(match machines+slots+loads
|
|
|
|
|
(((best slot load) others ...)
|
2014-03-08 06:15:38 -05:00
|
|
|
|
;; Return the best machine unless it's already overloaded.
|
2016-11-26 17:00:36 -05:00
|
|
|
|
(if (< load 2.)
|
2014-09-20 06:10:28 -04:00
|
|
|
|
(match others
|
2016-11-26 17:00:36 -05:00
|
|
|
|
(((machines slots loads) ...)
|
2014-09-20 06:10:28 -04:00
|
|
|
|
;; Release slots from the uninteresting machines.
|
|
|
|
|
(for-each release-build-slot slots)
|
|
|
|
|
|
|
|
|
|
;; Prevent SLOT from being GC'd.
|
|
|
|
|
(set! %slots (cons slot %slots))
|
|
|
|
|
best))
|
2014-03-08 15:23:12 -05:00
|
|
|
|
(begin
|
2014-09-20 06:10:28 -04:00
|
|
|
|
;; BEST is overloaded, so try the next one.
|
2014-03-08 06:15:38 -05:00
|
|
|
|
(release-build-slot slot)
|
2014-09-20 06:10:28 -04:00
|
|
|
|
(loop others))))
|
2014-03-08 06:15:38 -05:00
|
|
|
|
(() #f)))))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
|
|
|
|
|
(define* (process-request wants-local? system drv features
|
|
|
|
|
#:key
|
|
|
|
|
print-build-trace? (max-silent-time 3600)
|
2014-03-09 18:13:53 -04:00
|
|
|
|
build-timeout)
|
2014-01-23 17:48:34 -05:00
|
|
|
|
"Process a request to build DRV."
|
2014-03-09 09:05:30 -04:00
|
|
|
|
(let* ((local? (and wants-local? (string=? system (%current-system))))
|
|
|
|
|
(reqs (build-requirements
|
|
|
|
|
(system system)
|
|
|
|
|
(features features)))
|
|
|
|
|
(candidates (filter (cut machine-matches? <> reqs)
|
|
|
|
|
(build-machines))))
|
|
|
|
|
(match candidates
|
|
|
|
|
(()
|
|
|
|
|
;; We'll never be able to match REQS.
|
|
|
|
|
(display "# decline\n"))
|
|
|
|
|
((_ ...)
|
|
|
|
|
(let ((machine (choose-build-machine candidates)))
|
|
|
|
|
(if machine
|
|
|
|
|
(begin
|
|
|
|
|
;; Offload DRV to MACHINE.
|
|
|
|
|
(display "# accept\n")
|
|
|
|
|
(let ((inputs (string-tokenize (read-line)))
|
|
|
|
|
(outputs (string-tokenize (read-line))))
|
|
|
|
|
(transfer-and-offload drv machine
|
|
|
|
|
#:inputs inputs
|
|
|
|
|
#:outputs outputs
|
|
|
|
|
#:max-silent-time max-silent-time
|
|
|
|
|
#:build-timeout build-timeout
|
|
|
|
|
#:print-build-trace? print-build-trace?)))
|
|
|
|
|
|
|
|
|
|
;; Not now, all the machines are busy.
|
|
|
|
|
(display "# postpone\n")))))))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
|
2016-12-05 12:16:04 -05:00
|
|
|
|
|
|
|
|
|
;;;
|
|
|
|
|
;;; Installation tests.
|
|
|
|
|
;;;
|
|
|
|
|
|
|
|
|
|
(define (assert-node-repl node name)
|
|
|
|
|
"Bail out if NODE is not running Guile."
|
|
|
|
|
(match (node-guile-version node)
|
|
|
|
|
(#f
|
|
|
|
|
(leave (_ "Guile could not be started on '~a'~%")
|
|
|
|
|
name))
|
|
|
|
|
((? string? version)
|
|
|
|
|
;; Note: The version string already contains the word "Guile".
|
|
|
|
|
(info (_ "'~a' is running ~a~%")
|
|
|
|
|
name (node-guile-version node)))))
|
|
|
|
|
|
|
|
|
|
(define (assert-node-has-guix node name)
|
|
|
|
|
"Bail out if NODE lacks the (guix) module, or if its daemon is not running."
|
|
|
|
|
(match (node-eval node
|
|
|
|
|
'(begin
|
|
|
|
|
(use-modules (guix))
|
|
|
|
|
(with-store store
|
|
|
|
|
(add-text-to-store store "test"
|
|
|
|
|
"Hello, build machine!"))))
|
|
|
|
|
((? string? str)
|
|
|
|
|
(info (_ "Guix is usable on '~a' (test returned ~s)~%")
|
|
|
|
|
name str))
|
|
|
|
|
(x
|
|
|
|
|
(leave (_ "failed to use Guix module on '~a' (test returned ~s)~%")
|
|
|
|
|
name x))))
|
|
|
|
|
|
|
|
|
|
(define %random-state
|
|
|
|
|
(delay
|
|
|
|
|
(seed->random-state (logxor (getpid) (car (gettimeofday))))))
|
|
|
|
|
|
|
|
|
|
(define (nonce)
|
|
|
|
|
(string-append (gethostname) "-"
|
|
|
|
|
(number->string (random 1000000 (force %random-state)))))
|
|
|
|
|
|
|
|
|
|
(define (assert-node-can-import node name daemon-socket)
|
|
|
|
|
"Bail out if NODE refuses to import our archives."
|
|
|
|
|
(let ((session (node-session node)))
|
|
|
|
|
(with-store store
|
|
|
|
|
(let* ((item (add-text-to-store store "export-test" (nonce)))
|
|
|
|
|
(remote (connect-to-remote-daemon session daemon-socket)))
|
|
|
|
|
(send-files (list item) remote)
|
|
|
|
|
(if (valid-path? remote item)
|
|
|
|
|
(info (_ "'~a' successfully imported '~a'~%")
|
|
|
|
|
name item)
|
|
|
|
|
(leave (_ "'~a' was not properly imported on '~a'~%")
|
|
|
|
|
item name))))))
|
|
|
|
|
|
|
|
|
|
(define (assert-node-can-export node name daemon-socket)
|
|
|
|
|
"Bail out if we cannot import signed archives from NODE."
|
|
|
|
|
(let* ((session (node-session node))
|
|
|
|
|
(remote (connect-to-remote-daemon session daemon-socket))
|
|
|
|
|
(item (add-text-to-store remote "import-test" (nonce)))
|
|
|
|
|
(port (store-export-channel session (list item))))
|
|
|
|
|
(with-store store
|
|
|
|
|
(if (and (import-paths store port)
|
|
|
|
|
(valid-path? store item))
|
|
|
|
|
(info (_ "successfully imported '~a' from '~a'~%")
|
|
|
|
|
item name)
|
|
|
|
|
(leave (_ "failed to import '~a' from '~a'~%")
|
|
|
|
|
item name)))))
|
|
|
|
|
|
|
|
|
|
(define (check-machine-availability machine-file)
|
|
|
|
|
"Check that each machine in MACHINE-FILE is usable as a build machine."
|
|
|
|
|
(let ((machines (build-machines machine-file)))
|
|
|
|
|
(info (_ "testing ~a build machines defined in '~a'...~%")
|
|
|
|
|
(length machines) machine-file)
|
|
|
|
|
(let* ((names (map build-machine-name machines))
|
|
|
|
|
(sockets (map build-machine-daemon-socket machines))
|
|
|
|
|
(sessions (map open-ssh-session machines))
|
|
|
|
|
(nodes (map make-node sessions)))
|
|
|
|
|
(for-each assert-node-repl nodes names)
|
|
|
|
|
(for-each assert-node-has-guix nodes names)
|
|
|
|
|
(for-each assert-node-can-import nodes names sockets)
|
|
|
|
|
(for-each assert-node-can-export nodes names sockets))))
|
|
|
|
|
|
2014-01-23 17:48:34 -05:00
|
|
|
|
|
|
|
|
|
;;;
|
|
|
|
|
;;; Entry point.
|
|
|
|
|
;;;
|
|
|
|
|
|
|
|
|
|
(define (guix-offload . args)
|
|
|
|
|
(define request-line-rx
|
|
|
|
|
;; The request format. See 'tryBuildHook' method in build.cc.
|
|
|
|
|
(make-regexp "([01]) ([a-z0-9_-]+) (/[[:graph:]]+.drv) ([[:graph:]]*)"))
|
|
|
|
|
|
|
|
|
|
(define not-coma
|
|
|
|
|
(char-set-complement (char-set #\,)))
|
|
|
|
|
|
|
|
|
|
;; Make sure $HOME really corresponds to the current user. This is
|
|
|
|
|
;; necessary since lsh uses that to determine the location of the yarrow
|
|
|
|
|
;; seed file, and fails if it's owned by someone else.
|
|
|
|
|
(and=> (passwd:dir (getpw (getuid)))
|
|
|
|
|
(cut setenv "HOME" <>))
|
|
|
|
|
|
2016-12-01 15:49:16 -05:00
|
|
|
|
;; We rely on protocol-level compression from libssh to optimize large data
|
|
|
|
|
;; transfers. Warn if it's missing.
|
|
|
|
|
(unless (zlib-support?)
|
|
|
|
|
(warning (_ "Guile-SSH lacks zlib support"))
|
|
|
|
|
(warning (_ "data transfers will *not* be compressed!")))
|
|
|
|
|
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(match args
|
|
|
|
|
((system max-silent-time print-build-trace? build-timeout)
|
|
|
|
|
(let ((max-silent-time (string->number max-silent-time))
|
|
|
|
|
(build-timeout (string->number build-timeout))
|
|
|
|
|
(print-build-trace? (string=? print-build-trace? "1")))
|
|
|
|
|
(parameterize ((%current-system system))
|
|
|
|
|
(let loop ((line (read-line)))
|
|
|
|
|
(unless (eof-object? line)
|
|
|
|
|
(cond ((regexp-exec request-line-rx line)
|
|
|
|
|
=>
|
|
|
|
|
(lambda (match)
|
2016-11-04 19:46:04 -04:00
|
|
|
|
(with-error-handling
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(process-request (equal? (match:substring match 1) "1")
|
|
|
|
|
(match:substring match 2) ; system
|
|
|
|
|
(call-with-input-file
|
|
|
|
|
(match:substring match 3)
|
|
|
|
|
read-derivation)
|
|
|
|
|
(string-tokenize
|
|
|
|
|
(match:substring match 4) not-coma)
|
|
|
|
|
#:print-build-trace? print-build-trace?
|
|
|
|
|
#:max-silent-time max-silent-time
|
|
|
|
|
#:build-timeout build-timeout))))
|
|
|
|
|
(else
|
|
|
|
|
(leave (_ "invalid request line: ~s~%") line)))
|
|
|
|
|
(loop (read-line)))))))
|
2016-12-05 12:16:04 -05:00
|
|
|
|
(("test" rest ...)
|
|
|
|
|
(with-error-handling
|
|
|
|
|
(let ((file (match rest
|
|
|
|
|
((file) file)
|
|
|
|
|
(() %machine-file)
|
|
|
|
|
(_ (leave (_ "wrong number of arguments~%"))))))
|
|
|
|
|
(check-machine-availability (or file %machine-file)))))
|
2014-01-23 17:48:34 -05:00
|
|
|
|
(("--version")
|
|
|
|
|
(show-version-and-exit "guix offload"))
|
|
|
|
|
(("--help")
|
|
|
|
|
(format #t (_ "Usage: guix offload SYSTEM PRINT-BUILD-TRACE
|
|
|
|
|
Process build offload requests written on the standard input, possibly
|
|
|
|
|
offloading builds to the machines listed in '~a'.~%")
|
|
|
|
|
%machine-file)
|
|
|
|
|
(display (_ "
|
|
|
|
|
This tool is meant to be used internally by 'guix-daemon'.\n"))
|
|
|
|
|
(show-bug-report-information))
|
|
|
|
|
(x
|
|
|
|
|
(leave (_ "invalid arguments: ~{~s ~}~%") x))))
|
|
|
|
|
|
2014-03-06 15:38:45 -05:00
|
|
|
|
;;; Local Variables:
|
2014-03-08 05:29:52 -05:00
|
|
|
|
;;; eval: (put 'with-machine-lock 'scheme-indent-function 2)
|
2014-03-08 06:07:57 -05:00
|
|
|
|
;;; eval: (put 'with-file-lock 'scheme-indent-function 1)
|
2014-03-19 18:12:06 -04:00
|
|
|
|
;;; eval: (put 'with-error-to-port 'scheme-indent-function 1)
|
2014-03-06 15:38:45 -05:00
|
|
|
|
;;; End:
|
|
|
|
|
|
2014-01-23 17:48:34 -05:00
|
|
|
|
;;; offload.scm ends here
|