2020-06-05 16:47:39 -04:00
package archiver
import (
"archive/tar"
"bytes"
"fmt"
"io"
"log"
"os"
"path"
"path/filepath"
"strconv"
"strings"
)
// Tar provides facilities for operating TAR archives.
// See http://www.gnu.org/software/tar/manual/html_node/Standard.html.
type Tar struct {
// Whether to overwrite existing files; if false,
// an error is returned if the file exists.
OverwriteExisting bool
// Whether to make all the directories necessary
// to create a tar archive in the desired path.
MkdirAll bool
// A single top-level folder can be implicitly
// created by the Archive or Unarchive methods
// if the files to be added to the archive
// or the files to be extracted from the archive
// do not all have a common root. This roughly
// mimics the behavior of archival tools integrated
// into OS file browsers which create a subfolder
// to avoid unexpectedly littering the destination
// folder with potentially many files, causing a
// problematic cleanup/organization situation.
// This feature is available for both creation
// and extraction of archives, but may be slightly
// inefficient with lots and lots of files,
// especially on extraction.
ImplicitTopLevelFolder bool
2020-11-06 13:41:42 -05:00
// Strip number of leading paths. This feature is available
// only during unpacking of the entire archive.
StripComponents int
2020-06-05 16:47:39 -04:00
// If true, errors encountered during reading
// or writing a single file will be logged and
// the operation will continue on remaining files.
ContinueOnError bool
tw * tar . Writer
tr * tar . Reader
readerWrapFn func ( io . Reader ) ( io . Reader , error )
writerWrapFn func ( io . Writer ) ( io . Writer , error )
cleanupWrapFn func ( )
}
// CheckExt ensures the file extension matches the format.
func ( * Tar ) CheckExt ( filename string ) error {
if ! strings . HasSuffix ( filename , ".tar" ) {
return fmt . Errorf ( "filename must have a .tar extension" )
}
return nil
}
2020-10-16 01:06:27 -04:00
// CheckPath ensures that the filename has not been crafted to perform path traversal attacks
func ( * Tar ) CheckPath ( to , filename string ) error {
to , _ = filepath . Abs ( to ) //explicit the destination folder to prevent that 'string.HasPrefix' check can be 'bypassed' when no destination folder is supplied in input
dest := filepath . Join ( to , filename )
//prevent path traversal attacks
if ! strings . HasPrefix ( dest , to ) {
2020-11-06 13:41:42 -05:00
return & IllegalPathError { AbsolutePath : dest , Filename : filename }
2020-10-16 01:06:27 -04:00
}
return nil
}
2020-06-05 16:47:39 -04:00
// Archive creates a tarball file at destination containing
// the files listed in sources. The destination must end with
// ".tar". File paths can be those of regular files or
// directories; directories will be recursively added.
func ( t * Tar ) Archive ( sources [ ] string , destination string ) error {
err := t . CheckExt ( destination )
if t . writerWrapFn == nil && err != nil {
return fmt . Errorf ( "checking extension: %v" , err )
}
if ! t . OverwriteExisting && fileExists ( destination ) {
return fmt . Errorf ( "file already exists: %s" , destination )
}
// make the folder to contain the resulting archive
// if it does not already exist
destDir := filepath . Dir ( destination )
if t . MkdirAll && ! fileExists ( destDir ) {
err := mkdir ( destDir , 0755 )
if err != nil {
return fmt . Errorf ( "making folder for destination: %v" , err )
}
}
out , err := os . Create ( destination )
if err != nil {
return fmt . Errorf ( "creating %s: %v" , destination , err )
}
defer out . Close ( )
err = t . Create ( out )
if err != nil {
return fmt . Errorf ( "creating tar: %v" , err )
}
defer t . Close ( )
var topLevelFolder string
if t . ImplicitTopLevelFolder && multipleTopLevels ( sources ) {
topLevelFolder = folderNameFromFileName ( destination )
}
for _ , source := range sources {
err := t . writeWalk ( source , topLevelFolder , destination )
if err != nil {
return fmt . Errorf ( "walking %s: %v" , source , err )
}
}
return nil
}
// Unarchive unpacks the .tar file at source to destination.
// Destination will be treated as a folder name.
func ( t * Tar ) Unarchive ( source , destination string ) error {
if ! fileExists ( destination ) && t . MkdirAll {
err := mkdir ( destination , 0755 )
if err != nil {
return fmt . Errorf ( "preparing destination: %v" , err )
}
}
// if the files in the archive do not all share a common
// root, then make sure we extract to a single subfolder
// rather than potentially littering the destination...
if t . ImplicitTopLevelFolder {
var err error
destination , err = t . addTopLevelFolder ( source , destination )
if err != nil {
return fmt . Errorf ( "scanning source archive: %v" , err )
}
}
file , err := os . Open ( source )
if err != nil {
return fmt . Errorf ( "opening source archive: %v" , err )
}
defer file . Close ( )
err = t . Open ( file , 0 )
if err != nil {
return fmt . Errorf ( "opening tar archive for reading: %v" , err )
}
defer t . Close ( )
for {
err := t . untarNext ( destination )
if err == io . EOF {
break
}
if err != nil {
2020-11-06 13:41:42 -05:00
if t . ContinueOnError || IsIllegalPathError ( err ) {
2020-06-05 16:47:39 -04:00
log . Printf ( "[ERROR] Reading file in tar archive: %v" , err )
continue
}
return fmt . Errorf ( "reading file in tar archive: %v" , err )
}
}
return nil
}
// addTopLevelFolder scans the files contained inside
// the tarball named sourceArchive and returns a modified
// destination if all the files do not share the same
// top-level folder.
func ( t * Tar ) addTopLevelFolder ( sourceArchive , destination string ) ( string , error ) {
file , err := os . Open ( sourceArchive )
if err != nil {
return "" , fmt . Errorf ( "opening source archive: %v" , err )
}
defer file . Close ( )
// if the reader is to be wrapped, ensure we do that now
// or we will not be able to read the archive successfully
reader := io . Reader ( file )
if t . readerWrapFn != nil {
reader , err = t . readerWrapFn ( reader )
if err != nil {
return "" , fmt . Errorf ( "wrapping reader: %v" , err )
}
}
if t . cleanupWrapFn != nil {
defer t . cleanupWrapFn ( )
}
tr := tar . NewReader ( reader )
var files [ ] string
for {
hdr , err := tr . Next ( )
if err == io . EOF {
break
}
if err != nil {
return "" , fmt . Errorf ( "scanning tarball's file listing: %v" , err )
}
files = append ( files , hdr . Name )
}
if multipleTopLevels ( files ) {
destination = filepath . Join ( destination , folderNameFromFileName ( sourceArchive ) )
}
return destination , nil
}
2020-10-16 01:06:27 -04:00
func ( t * Tar ) untarNext ( destination string ) error {
2020-06-05 16:47:39 -04:00
f , err := t . Read ( )
if err != nil {
return err // don't wrap error; calling loop must break on io.EOF
}
2020-10-16 01:06:27 -04:00
defer f . Close ( )
2020-06-05 16:47:39 -04:00
header , ok := f . Header . ( * tar . Header )
if ! ok {
return fmt . Errorf ( "expected header to be *tar.Header but was %T" , f . Header )
}
2020-10-16 01:06:27 -04:00
errPath := t . CheckPath ( destination , header . Name )
if errPath != nil {
return fmt . Errorf ( "checking path traversal attempt: %v" , errPath )
}
2020-11-06 13:41:42 -05:00
if t . StripComponents > 0 {
if strings . Count ( header . Name , "/" ) < t . StripComponents {
return nil // skip path with fewer components
}
for i := 0 ; i < t . StripComponents ; i ++ {
slash := strings . Index ( header . Name , "/" )
header . Name = header . Name [ slash + 1 : ]
}
}
2020-10-16 01:06:27 -04:00
return t . untarFile ( f , destination , header )
2020-06-05 16:47:39 -04:00
}
2020-10-16 01:06:27 -04:00
func ( t * Tar ) untarFile ( f File , destination string , hdr * tar . Header ) error {
to := filepath . Join ( destination , hdr . Name )
2020-06-05 16:47:39 -04:00
// do not overwrite existing files, if configured
if ! f . IsDir ( ) && ! t . OverwriteExisting && fileExists ( to ) {
return fmt . Errorf ( "file already exists: %s" , to )
}
switch hdr . Typeflag {
case tar . TypeDir :
return mkdir ( to , f . Mode ( ) )
case tar . TypeReg , tar . TypeRegA , tar . TypeChar , tar . TypeBlock , tar . TypeFifo , tar . TypeGNUSparse :
return writeNewFile ( to , f , f . Mode ( ) )
case tar . TypeSymlink :
return writeNewSymbolicLink ( to , hdr . Linkname )
case tar . TypeLink :
2020-10-16 01:06:27 -04:00
return writeNewHardLink ( to , filepath . Join ( destination , hdr . Linkname ) )
2020-06-05 16:47:39 -04:00
case tar . TypeXGlobalHeader :
return nil // ignore the pax global header from git-generated tarballs
default :
return fmt . Errorf ( "%s: unknown type flag: %c" , hdr . Name , hdr . Typeflag )
}
}
func ( t * Tar ) writeWalk ( source , topLevelFolder , destination string ) error {
sourceInfo , err := os . Stat ( source )
if err != nil {
return fmt . Errorf ( "%s: stat: %v" , source , err )
}
destAbs , err := filepath . Abs ( destination )
if err != nil {
return fmt . Errorf ( "%s: getting absolute path of destination %s: %v" , source , destination , err )
}
return filepath . Walk ( source , func ( fpath string , info os . FileInfo , err error ) error {
handleErr := func ( err error ) error {
if t . ContinueOnError {
log . Printf ( "[ERROR] Walking %s: %v" , fpath , err )
return nil
}
return err
}
if err != nil {
return handleErr ( fmt . Errorf ( "traversing %s: %v" , fpath , err ) )
}
if info == nil {
return handleErr ( fmt . Errorf ( "no file info" ) )
}
// make sure we do not copy our output file into itself
fpathAbs , err := filepath . Abs ( fpath )
if err != nil {
return handleErr ( fmt . Errorf ( "%s: getting absolute path: %v" , fpath , err ) )
}
if within ( fpathAbs , destAbs ) {
return nil
}
// build the name to be used within the archive
nameInArchive , err := makeNameInArchive ( sourceInfo , source , topLevelFolder , fpath )
if err != nil {
return handleErr ( err )
}
var file io . ReadCloser
if info . Mode ( ) . IsRegular ( ) {
file , err = os . Open ( fpath )
if err != nil {
return handleErr ( fmt . Errorf ( "%s: opening: %v" , fpath , err ) )
}
defer file . Close ( )
}
err = t . Write ( File {
FileInfo : FileInfo {
FileInfo : info ,
CustomName : nameInArchive ,
} ,
ReadCloser : file ,
} )
if err != nil {
return handleErr ( fmt . Errorf ( "%s: writing: %s" , fpath , err ) )
}
return nil
} )
}
// Create opens t for writing a tar archive to out.
func ( t * Tar ) Create ( out io . Writer ) error {
if t . tw != nil {
return fmt . Errorf ( "tar archive is already created for writing" )
}
// wrapping writers allows us to output
// compressed tarballs, for example
if t . writerWrapFn != nil {
var err error
out , err = t . writerWrapFn ( out )
if err != nil {
return fmt . Errorf ( "wrapping writer: %v" , err )
}
}
t . tw = tar . NewWriter ( out )
return nil
}
// Write writes f to t, which must have been opened for writing first.
func ( t * Tar ) Write ( f File ) error {
if t . tw == nil {
return fmt . Errorf ( "tar archive was not created for writing first" )
}
if f . FileInfo == nil {
return fmt . Errorf ( "no file info" )
}
if f . FileInfo . Name ( ) == "" {
return fmt . Errorf ( "missing file name" )
}
var linkTarget string
if isSymlink ( f ) {
var err error
linkTarget , err = os . Readlink ( f . Name ( ) )
if err != nil {
return fmt . Errorf ( "%s: readlink: %v" , f . Name ( ) , err )
}
}
hdr , err := tar . FileInfoHeader ( f , filepath . ToSlash ( linkTarget ) )
if err != nil {
return fmt . Errorf ( "%s: making header: %v" , f . Name ( ) , err )
}
err = t . tw . WriteHeader ( hdr )
if err != nil {
return fmt . Errorf ( "%s: writing header: %v" , hdr . Name , err )
}
if f . IsDir ( ) {
return nil // directories have no contents
}
if hdr . Typeflag == tar . TypeReg {
if f . ReadCloser == nil {
return fmt . Errorf ( "%s: no way to read file contents" , f . Name ( ) )
}
_ , err := io . Copy ( t . tw , f )
if err != nil {
return fmt . Errorf ( "%s: copying contents: %v" , f . Name ( ) , err )
}
}
return nil
}
// Open opens t for reading an archive from
// in. The size parameter is not used.
func ( t * Tar ) Open ( in io . Reader , size int64 ) error {
if t . tr != nil {
return fmt . Errorf ( "tar archive is already open for reading" )
}
// wrapping readers allows us to open compressed tarballs
if t . readerWrapFn != nil {
var err error
in , err = t . readerWrapFn ( in )
if err != nil {
return fmt . Errorf ( "wrapping file reader: %v" , err )
}
}
t . tr = tar . NewReader ( in )
return nil
}
// Read reads the next file from t, which must have
// already been opened for reading. If there are no
// more files, the error is io.EOF. The File must
// be closed when finished reading from it.
func ( t * Tar ) Read ( ) ( File , error ) {
if t . tr == nil {
return File { } , fmt . Errorf ( "tar archive is not open" )
}
hdr , err := t . tr . Next ( )
if err != nil {
return File { } , err // don't wrap error; preserve io.EOF
}
file := File {
FileInfo : hdr . FileInfo ( ) ,
Header : hdr ,
ReadCloser : ReadFakeCloser { t . tr } ,
}
return file , nil
}
// Close closes the tar archive(s) opened by Create and Open.
func ( t * Tar ) Close ( ) error {
var err error
if t . tr != nil {
t . tr = nil
}
if t . tw != nil {
tw := t . tw
t . tw = nil
err = tw . Close ( )
}
// make sure cleanup of "Reader/Writer wrapper"
// (say that ten times fast) happens AFTER the
// underlying stream is closed
if t . cleanupWrapFn != nil {
t . cleanupWrapFn ( )
}
return err
}
// Walk calls walkFn for each visited item in archive.
func ( t * Tar ) Walk ( archive string , walkFn WalkFunc ) error {
file , err := os . Open ( archive )
if err != nil {
return fmt . Errorf ( "opening archive file: %v" , err )
}
defer file . Close ( )
err = t . Open ( file , 0 )
if err != nil {
return fmt . Errorf ( "opening archive: %v" , err )
}
defer t . Close ( )
for {
f , err := t . Read ( )
if err == io . EOF {
break
}
if err != nil {
if t . ContinueOnError {
log . Printf ( "[ERROR] Opening next file: %v" , err )
continue
}
return fmt . Errorf ( "opening next file: %v" , err )
}
err = walkFn ( f )
if err != nil {
if err == ErrStopWalk {
break
}
if t . ContinueOnError {
log . Printf ( "[ERROR] Walking %s: %v" , f . Name ( ) , err )
continue
}
return fmt . Errorf ( "walking %s: %v" , f . Name ( ) , err )
}
}
return nil
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func ( t * Tar ) Extract ( source , target , destination string ) error {
// target refers to a path inside the archive, which should be clean also
target = path . Clean ( target )
// if the target ends up being a directory, then
// we will continue walking and extracting files
// until we are no longer within that directory
var targetDirPath string
return t . Walk ( source , func ( f File ) error {
th , ok := f . Header . ( * tar . Header )
if ! ok {
return fmt . Errorf ( "expected header to be *tar.Header but was %T" , f . Header )
}
// importantly, cleaning the path strips tailing slash,
// which must be appended to folders within the archive
name := path . Clean ( th . Name )
if f . IsDir ( ) && target == name {
targetDirPath = path . Dir ( name )
}
if within ( target , th . Name ) {
// either this is the exact file we want, or is
// in the directory we want to extract
// build the filename we will extract to
end , err := filepath . Rel ( targetDirPath , th . Name )
if err != nil {
return fmt . Errorf ( "relativizing paths: %v" , err )
}
2020-10-16 01:06:27 -04:00
th . Name = end
// relativize any hardlink names
if th . Typeflag == tar . TypeLink {
th . Linkname = filepath . Join ( filepath . Base ( filepath . Dir ( th . Linkname ) ) , filepath . Base ( th . Linkname ) )
}
2020-06-05 16:47:39 -04:00
2020-10-16 01:06:27 -04:00
err = t . untarFile ( f , destination , th )
2020-06-05 16:47:39 -04:00
if err != nil {
return fmt . Errorf ( "extracting file %s: %v" , th . Name , err )
}
// if our target was not a directory, stop walk
if targetDirPath == "" {
return ErrStopWalk
}
} else if targetDirPath != "" {
// finished walking the entire directory
return ErrStopWalk
}
return nil
} )
}
// Match returns true if the format of file matches this
// type's format. It should not affect reader position.
func ( * Tar ) Match ( file io . ReadSeeker ) ( bool , error ) {
currentPos , err := file . Seek ( 0 , io . SeekCurrent )
if err != nil {
return false , err
}
_ , err = file . Seek ( 0 , 0 )
if err != nil {
return false , err
}
2020-10-16 01:06:27 -04:00
defer func ( ) {
_ , _ = file . Seek ( currentPos , io . SeekStart )
} ( )
2020-06-05 16:47:39 -04:00
buf := make ( [ ] byte , tarBlockSize )
if _ , err = io . ReadFull ( file , buf ) ; err != nil {
return false , nil
}
return hasTarHeader ( buf ) , nil
}
// hasTarHeader checks passed bytes has a valid tar header or not. buf must
// contain at least 512 bytes and if not, it always returns false.
func hasTarHeader ( buf [ ] byte ) bool {
if len ( buf ) < tarBlockSize {
return false
}
b := buf [ 148 : 156 ]
b = bytes . Trim ( b , " \x00" ) // clean up all spaces and null bytes
if len ( b ) == 0 {
return false // unknown format
}
hdrSum , err := strconv . ParseUint ( string ( b ) , 8 , 64 )
if err != nil {
return false
}
// According to the go official archive/tar, Sun tar uses signed byte
// values so this calcs both signed and unsigned
var usum uint64
var sum int64
for i , c := range buf {
if 148 <= i && i < 156 {
c = ' ' // checksum field itself is counted as branks
}
usum += uint64 ( uint8 ( c ) )
sum += int64 ( int8 ( c ) )
}
if hdrSum != usum && int64 ( hdrSum ) != sum {
return false // invalid checksum
}
return true
}
func ( t * Tar ) String ( ) string { return "tar" }
// NewTar returns a new, default instance ready to be customized and used.
func NewTar ( ) * Tar {
return & Tar {
MkdirAll : true ,
}
}
const tarBlockSize = 512
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader ( new ( Tar ) )
_ = Writer ( new ( Tar ) )
_ = Archiver ( new ( Tar ) )
_ = Unarchiver ( new ( Tar ) )
_ = Walker ( new ( Tar ) )
_ = Extractor ( new ( Tar ) )
_ = Matcher ( new ( Tar ) )
_ = ExtensionChecker ( new ( Tar ) )
2020-10-16 01:06:27 -04:00
_ = FilenameChecker ( new ( Tar ) )
2020-06-05 16:47:39 -04:00
)
// DefaultTar is a default instance that is conveniently ready to use.
var DefaultTar = NewTar ( )