biology/checkm: Update 1.0.18 -> 1.1.8

Reported by: portscout
2022-04-11 12:53:21 -07:00 · 2022-04-11 12:53:21 -07:00 · 350d96226d
commit 350d96226d
parent 848e360f0d
3 changed files with 8 additions and 656 deletions
--- a/biology/checkm/Makefile
+++ b/biology/checkm/Makefile
@ -1,6 +1,6 @@
 PORTNAME=	CheckM
 DISTVERSIONPREFIX=	v
-DISTVERSION=	1.0.18
+DISTVERSION=	1.1.8
 CATEGORIES=	biology python

 MAINTAINER=	yuri@FreeBSD.org
@ -10,10 +10,10 @@ LICENSE=	GPLv3
 LICENSE_FILE=	${WRKSRC}/LICENSE

 RUN_DEPENDS=	${PYNUMPY} \
-		${PYTHON_PKGNAMEPREFIX}DendroPy>=4.0.0:science/py-DendroPy@${PY_FLAVOR} \
-		${PYTHON_PKGNAMEPREFIX}matplotlib>=1.3.1:math/py-matplotlib@${PY_FLAVOR} \
-		${PYTHON_PKGNAMEPREFIX}pysam>=0.8.3:biology/py-pysam@${PY_FLAVOR} \
-		${PYTHON_PKGNAMEPREFIX}scipy>=0.9.0:science/py-scipy@${PY_FLAVOR}
+		${PYTHON_PKGNAMEPREFIX}DendroPy>=4.4.0:science/py-DendroPy@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}matplotlib>=2.1.0:math/py-matplotlib@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pysam>=0.12.0.1:biology/py-pysam@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}scipy>=0.19.1:science/py-scipy@${PY_FLAVOR}

 USES=		dos2unix python:3.7+
 USE_GITHUB=	yes
--- a/biology/checkm/distinfo
+++ b/biology/checkm/distinfo
@ -1,3 +1,3 @@
-TIMESTAMP = 1566202999
-SHA256 (Ecogenomics-CheckM-v1.0.18_GH0.tar.gz) = 240184bd7c708cd041d0fc14f81b22af5cb69cb96ae75177aee32effa578ca4e
-SIZE (Ecogenomics-CheckM-v1.0.18_GH0.tar.gz) = 212064
+TIMESTAMP = 1649697676
+SHA256 (Ecogenomics-CheckM-v1.1.8_GH0.tar.gz) = c6e9d007622808ae3312de73d54866292a83857837119380a036036e799c1f38
+SIZE (Ecogenomics-CheckM-v1.1.8_GH0.tar.gz) = 1016432
--- a/biology/checkm/files/patch-2to3
+++ b/biology/checkm/files/patch-2to3
@ -1,648 +0,0 @@
--- checkm/binTools.py.orig	2022-03-15 18:25:01 UTC
-+++ checkm/binTools.py
-@@ -26,7 +26,7 @@ import gzip
- 
- import numpy as np
- 
-from common import binIdFromFilename, checkFileExists, readDistribution, findNearest
-+from .common import binIdFromFilename, checkFileExists, readDistribution, findNearest
- from checkm.util.seqUtils import readFasta, writeFasta, baseCount
- from checkm.genomicSignatures import GenomicSignatures
- from checkm.prodigal import ProdigalGeneFeatureParser
-@@ -123,34 +123,34 @@ class BinTools():
-                     seqId = line[1:].split(None, 1)[0]
- 
-                     if seqId in seqIds:
-                        print '  [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId)
-+                        print('  [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId))
-                     seqIds.add(seqId)
- 
-             binSeqs[binId] = seqIds
- 
-         # check for sequences assigned to multiple bins
-         bDuplicates = False
-        binIds = binSeqs.keys()
-        for i in xrange(0, len(binIds)):
-            for j in xrange(i + 1, len(binIds)):
-+        binIds = list(binSeqs.keys())
-+        for i in range(0, len(binIds)):
-+            for j in range(i + 1, len(binIds)):
-                 seqInter = set(binSeqs[binIds[i]]).intersection(set(binSeqs[binIds[j]]))
- 
-                 if len(seqInter) > 0:
-                     bDuplicates = True
-                    print '  Sequences shared between %s and %s: ' % (binIds[i], binIds[j])
-+                    print('  Sequences shared between %s and %s: ' % (binIds[i], binIds[j]))
-                     for seqId in seqInter:
-                        print '    ' + seqId
-                    print ''
-+                        print('    ' + seqId)
-+                    print('')
- 
-         if not bDuplicates:
-            print '  No sequences assigned to multiple bins.'
-+            print('  No sequences assigned to multiple bins.')
- 
-     def gcDist(self, seqs):
-         """GC statistics for bin."""
-         GCs = []
-         gcTotal = 0
-         basesTotal = 0
-        for _, seq in seqs.iteritems():
-+        for _, seq in seqs.items():
-             a, c, g, t = baseCount(seq)
-             gc = g + c
-             bases = a + c + g + t
-@@ -171,7 +171,7 @@ class BinTools():
- 
-         codingBasesTotal = 0
-         basesTotal = 0
-        for seqId, seq in seqs.iteritems():
-+        for seqId, seq in seqs.items():
-             codingBases = prodigalParser.codingBases(seqId)
- 
-             CDs.append(float(codingBases) / len(seq))
-@@ -186,11 +186,11 @@ class BinTools():
-     def binTetraSig(self, seqs, tetraSigs):
-         """Tetranucleotide signature for bin. """
-         binSize = 0
-        for _, seq in seqs.iteritems():
-+        for _, seq in seqs.items():
-             binSize += len(seq)
- 
-         bInit = True
-        for seqId, seq in seqs.iteritems():
-+        for seqId, seq in seqs.items():
-             weightedTetraSig = tetraSigs[seqId] * (float(len(seq)) / binSize)
-             if bInit:
-                 binSig = weightedTetraSig
-@@ -247,32 +247,32 @@ class BinTools():
-             meanCD, deltaCDs, CDs = self.codingDensityDist(seqs, prodigalParser)
- 
-             # find keys into GC and CD distributions
-            closestGC = findNearest(np.array(gcBounds.keys()), meanGC)
-            sampleSeqLen = gcBounds[closestGC].keys()[0]
-+            closestGC = findNearest(np.array(list(gcBounds.keys())), meanGC)
-+            sampleSeqLen = list(gcBounds[closestGC].keys())[0]
-             d = gcBounds[closestGC][sampleSeqLen]
-            gcLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
-            gcUpperBoundKey = findNearest(d.keys(), (100 + distribution) / 2.0)
-+            gcLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
-+            gcUpperBoundKey = findNearest(list(d.keys()), (100 + distribution) / 2.0)
- 
-            closestCD = findNearest(np.array(cdBounds.keys()), meanCD)
-            sampleSeqLen = cdBounds[closestCD].keys()[0]
-+            closestCD = findNearest(np.array(list(cdBounds.keys())), meanCD)
-+            sampleSeqLen = list(cdBounds[closestCD].keys())[0]
-             d = cdBounds[closestCD][sampleSeqLen]
-            cdLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
-+            cdLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
- 
-            tdBoundKey = findNearest(tdBounds[tdBounds.keys()[0]].keys(), distribution)
-+            tdBoundKey = findNearest(list(tdBounds[list(tdBounds.keys())[0]].keys()), distribution)
- 
-             index = 0
-            for seqId, seq in seqs.iteritems():
-+            for seqId, seq in seqs.items():
-                 seqLen = len(seq)
- 
-                 # find GC, CD, and TD bounds
-                closestSeqLen = findNearest(gcBounds[closestGC].keys(), seqLen)
-+                closestSeqLen = findNearest(list(gcBounds[closestGC].keys()), seqLen)
-                 gcLowerBound = gcBounds[closestGC][closestSeqLen][gcLowerBoundKey]
-                 gcUpperBound = gcBounds[closestGC][closestSeqLen][gcUpperBoundKey]
- 
-                closestSeqLen = findNearest(cdBounds[closestCD].keys(), seqLen)
-+                closestSeqLen = findNearest(list(cdBounds[closestCD].keys()), seqLen)
-                 cdLowerBound = cdBounds[closestCD][closestSeqLen][cdLowerBoundKey]
- 
-                closestSeqLen = findNearest(tdBounds.keys(), seqLen)
-+                closestSeqLen = findNearest(list(tdBounds.keys()), seqLen)
-                 tdBound = tdBounds[closestSeqLen][tdBoundKey]
- 
-                 outlyingDists = []
--- checkm/checkmData.py.orig	2022-03-15 18:25:01 UTC
-+++ checkm/checkmData.py
-@@ -85,11 +85,11 @@ class DBConfig(object):
-         """Work out if we have permission to write to the CheckM config before attempting to make changes"""
-         try:
-             open(self.configFile, 'a')
-        except IOError, e:
-            print "You do not seem to have permission to edit the checkm config file"
-            print "located at %s" % self.configFile
-            print "Please try again with updated privileges. Error was:\n"
-            print e
-+        except IOError as e:
-+            print("You do not seem to have permission to edit the checkm config file")
-+            print("located at %s" % self.configFile)
-+            print("Please try again with updated privileges. Error was:\n")
-+            print(e)
-             return False
-         return True
- 
-@@ -167,28 +167,28 @@ class DBManager(mm.ManifestManager):
-             else:
-                 path = os.path.abspath(os.path.expanduser(path))
- 
-            print ""
-+            print("")
-             if os.path.exists(path):
-                 # path exists
-                 if os.access(path, os.W_OK):
-                     # path is writable
-                     path_set = True
-                    print "Path [%s] exists and you have permission to write to this folder." % path
-+                    print("Path [%s] exists and you have permission to write to this folder." % path)
-                 else:
-                    print "Path [%s] exists but you do not have permission to write to this folder." % path
-+                    print("Path [%s] exists but you do not have permission to write to this folder." % path)
-             else:
-                 # path does not exist, try to make it
-                 "Path [%s] does not exist so I will attempt to create it" % path
-                 try:
-                     self.makeSurePathExists(path)
-                    print "Path [%s] has been created and you have permission to write to this folder." % path
-+                    print("Path [%s] has been created and you have permission to write to this folder." % path)
-                     path_set = True
-                 except Exception:
-                    print "Unable to make the folder, Error was: %s" % sys.exc_info()[0]
-+                    print("Unable to make the folder, Error was: %s" % sys.exc_info()[0])
-                 minimal = True
- 
-         # (re)make the manifest file
-        print "(re) creating manifest file (please be patient)."
-+        print("(re) creating manifest file (please be patient).")
-         self.createManifest(path, self.config.values["localManifestName"])
- 
-         return path
-@@ -196,8 +196,8 @@ class DBManager(mm.ManifestManager):
-     def checkPermissions(self):
-         """See if the user has permission to write to the data directory"""
-         if not os.access(self.config.values["dataRoot"], os.W_OK):
-            print "You do not seem to have permission to edit the CheckM data folder"
-            print "located at %s" % self.config.values["dataRoot"]
-+            print("You do not seem to have permission to edit the CheckM data folder")
-+            print("located at %s" % self.config.values["dataRoot"])
-             return False
- 
-         return True
--- checkm/coverage.py.orig	2022-03-15 18:25:01 UTC
-+++ checkm/coverage.py
-@@ -62,7 +62,7 @@ class Coverage():
-             binId = binIdFromFilename(binFile)
- 
-             seqs = readFasta(binFile)
-            for seqId, seq in seqs.iteritems():
-+            for seqId, seq in seqs.items():
-                 seqIdToBinId[seqId] = binId
-                 seqIdToSeqLen[seqId] = len(seq)
- 
-@@ -97,12 +97,12 @@ class Coverage():
-         print(header)
- 
-         # get length of all seqs
-        for bamFile, seqIds in coverageInfo.iteritems():
-            for seqId in seqIds.keys():
-+        for bamFile, seqIds in coverageInfo.items():
-+            for seqId in list(seqIds.keys()):
-                 seqIdToSeqLen[seqId] = seqIds[seqId].seqLen
- 
-         # write coverage stats for all scaffolds to file
-        for seqId, seqLen in seqIdToSeqLen.iteritems():
-+        for seqId, seqLen in seqIdToSeqLen.items():
-             rowStr = seqId + '\t' + seqIdToBinId.get(seqId, DefaultValues.UNBINNED) + '\t' + str(seqLen)
-             for bamFile in bamFiles:
-                 bamId = binIdFromFilename(bamFile)
-@@ -171,7 +171,7 @@ class Coverage():
-             writeProc.join()
-         except:
-             # make sure all processes are terminated
-            print traceback.format_exc()
-+            print(traceback.format_exc())
-             for p in workerProc:
-                 p.terminate()
- 
-@@ -271,16 +271,16 @@ class Coverage():
-         if self.logger.getEffectiveLevel() <= logging.INFO:
-             sys.stderr.write('\n')
- 
-            print ''
-            print '    # total reads: %d' % totalReads
-            print '      # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
-            print '      # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
-            print '      # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
-            print '      # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
-            print '      # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
-            print '      # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
-            print '      # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
-            print ''
-+            print('')
-+            print('    # total reads: %d' % totalReads)
-+            print('      # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
-+            print('      # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
-+            print('      # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
-+            print('      # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
-+            print('      # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
-+            print('      # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
-+            print('      # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
-+            print('')
- 
-     def parseCoverage(self, coverageFile):
-         """Read coverage information from file."""
-@@ -301,7 +301,7 @@ class Coverage():
-             if seqId not in coverageStats[binId]:
-                 coverageStats[binId][seqId] = {}
- 
-            for i in xrange(3, len(lineSplit), 3):
-+            for i in range(3, len(lineSplit), 3):
-                 bamId = lineSplit[i]
-                 coverage = float(lineSplit[i + 1])
-                 coverageStats[binId][seqId][bamId] = coverage
-@@ -325,7 +325,7 @@ class Coverage():
- 
-             # calculate mean coverage (weighted by scaffold length)
-             # for each bin under each BAM file
-            for i in xrange(3, len(lineSplit), 3):
-+            for i in range(3, len(lineSplit), 3):
-                 bamId = lineSplit[i]
-                 coverage = float(lineSplit[i + 1])
-                 binCoverages[binId][bamId].append(coverage)
-@@ -341,13 +341,13 @@ class Coverage():
- 
-         profiles = defaultdict(dict)
-         for binId in binStats:
-            for bamId, stats in binStats[binId].iteritems():
-+            for bamId, stats in binStats[binId].items():
-                 binLength, meanBinCoverage = stats
-                 coverages = binCoverages[binId][bamId]
- 
-                 varCoverage = 0
-                 if len(coverages) > 1:
-                    varCoverage = mean(map(lambda x: (x - meanBinCoverage) ** 2, coverages))
-+                    varCoverage = mean([(x - meanBinCoverage) ** 2 for x in coverages])
- 
-                 profiles[binId][bamId] = [meanBinCoverage, sqrt(varCoverage)]
- 
--- checkm/coverageWindows.py.orig	2022-03-15 18:25:01 UTC
-+++ checkm/coverageWindows.py
-@@ -188,10 +188,10 @@ class CoverageWindows():
-                     try:
-                         end += windowSize
-                     except:
-                        print '*****************'
-                        print end
-                        print windowSize
-                        print '******************'
-+                        print('*****************')
-+                        print(end)
-+                        print(windowSize)
-+                        print('******************')
- 
-                 coverage = float(sum(readLoader.coverage)) / seqLen
- 
-@@ -239,13 +239,13 @@ class CoverageWindows():
-         if self.logger.getEffectiveLevel() <= logging.INFO:
-             sys.stderr.write('\n')
- 
-            print ''
-            print '    # total reads: %d' % totalReads
-            print '      # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
-            print '      # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
-            print '      # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
-            print '      # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
-            print '      # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
-            print '      # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
-            print '      # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
-            print ''
-+            print('')
-+            print('    # total reads: %d' % totalReads)
-+            print('      # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
-+            print('      # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
-+            print('      # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
-+            print('      # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
-+            print('      # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
-+            print('      # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
-+            print('      # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
-+            print('')
--- checkm/manifestManager.py.orig	2022-03-15 18:25:01 UTC
-+++ checkm/manifestManager.py
-@@ -47,8 +47,8 @@ __MANIFEST__ = ".dmanifest"
- # system includes
- import os
- import hashlib
-import urllib2
-import urllib
-+import urllib.request, urllib.error, urllib.parse
-+import urllib.request, urllib.parse, urllib.error
- import shutil
- import errno
- 
-@@ -121,15 +121,15 @@ class ManifestManager(object):
-         source = ""
-         # first we assume it is remote
-         try:
-            s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
-+            s_man = urllib.request.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
-             source = sourceManifestLocation + "/"
-         except ValueError:
-             # then it is probably a file
-             s_man = open(os.path.join(sourceManifestLocation, sourceManifestName))
-             source = os.path.join(sourceManifestLocation) + os.path.sep
-        except urllib2.URLError:
-+        except urllib.error.URLError:
-             # problems connecting to server, perhaps user is behind a proxy or firewall
-            print "Error: failed to connect to server."
-+            print("Error: failed to connect to server.")
-             return (None, None, None, None, None)
- 
-         first_line = True
-@@ -140,11 +140,11 @@ class ManifestManager(object):
-                     # get the type of the manifest
-                     s_type = self.getManType(line)
-                     if s_type != l_type:
-                        print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type)
-+                        print("Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type))
-                         return (None, None, None, None, None)
-                 else:
-                     # no type specified
-                    print "Error: type of source manifest is not specified. Is this a valid manifest file?"
-+                    print("Error: type of source manifest is not specified. Is this a valid manifest file?")
-                     return (None, None, None, None, None)
- 
-                 self.type = l_type
-@@ -174,7 +174,7 @@ class ManifestManager(object):
-                         deleted.append(fields[0])
- 
-         # check for new files
-        for f in source_man.keys():
-+        for f in list(source_man.keys()):
-             if source_man[f][2] == False:
-                 if source_man[f][0] == '-':
-                     addedDirs.append(f)
-@@ -190,28 +190,28 @@ class ManifestManager(object):
-                 modified_size += int(source_man[f][1])
- 
-             if len(addedFiles) > 0:
-                print "#------------------------------------------------------"
-                print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size))
-+                print("#------------------------------------------------------")
-+                print("# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size)))
-                 for f in addedFiles:
-                    print "\t".join([self.formatData(int(source_man[f][1])), f])
-+                    print("\t".join([self.formatData(int(source_man[f][1])), f]))
- 
-             if len(addedDirs) > 0:
-                print "#------------------------------------------------------"
-                print "# Source contains %d new folders(s)" % (len(addedDirs))
-+                print("#------------------------------------------------------")
-+                print("# Source contains %d new folders(s)" % (len(addedDirs)))
-                 for f in addedDirs:
-                    print f
-+                    print(f)
- 
-             if len(modified) > 0:
-                print "#------------------------------------------------------"
-                print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size))
-+                print("#------------------------------------------------------")
-+                print("# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size)))
-                 for f in modified:
-                    print f
-+                    print(f)
- 
-             if len(deleted) > 0:
-                print "#------------------------------------------------------"
-                print "# %d files have been deleted in the source:" % len(deleted)
-+                print("#------------------------------------------------------")
-+                print("# %d files have been deleted in the source:" % len(deleted))
-                 for f in deleted:
-                    print f
-+                    print(f)
-         else:
-             return (source,
-                     [(a, source_man[a]) for a in addedFiles],
-@@ -245,13 +245,13 @@ class ManifestManager(object):
-             for f in modified:
-                 total_size += int(f[1][1])
-             if total_size != 0:
-                print "****************************************************************"
-                print "%d new file(s) to be downloaded from source" % len(added_files)
-                print "%d existing file(s) to be updated" % len(modified)
-                print "%s will need to be downloaded" % self.formatData(total_size)
-+                print("****************************************************************")
-+                print("%d new file(s) to be downloaded from source" % len(added_files))
-+                print("%d existing file(s) to be updated" % len(modified))
-+                print("%s will need to be downloaded" % self.formatData(total_size))
-                 do_down = self.promptUserDownload()
-                 if not do_down:
-                    print "Download aborted"
-+                    print("Download aborted")
- 
-         update_manifest = False
-         if do_down:
-@@ -262,13 +262,13 @@ class ManifestManager(object):
-                 self.makeSurePathExists(full_path)
-             for add in added_files:
-                 full_path = os.path.abspath(os.path.join(localManifestLocation, add[0]))
-                urllib.urlretrieve(source+add[0], full_path)
-+                urllib.request.urlretrieve(source+add[0], full_path)
-             for modify in modified:
-                 full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0]))
-                urllib.urlretrieve(source+modify[0], full_path)
-+                urllib.request.urlretrieve(source+modify[0], full_path)
- 
-         if update_manifest:
-            print "(re) creating manifest file (please be patient)"
-+            print("(re) creating manifest file (please be patient)")
-             self.createManifest(localManifestLocation, manifestName=localManifestName)
-             
-         return True
-@@ -303,19 +303,19 @@ class ManifestManager(object):
-         input_not_ok = True
-         minimal=False
-         valid_responses = {'Y':True,'N':False}
-        vrs = ",".join([x.lower() for x in valid_responses.keys()])
-+        vrs = ",".join([x.lower() for x in list(valid_responses.keys())])
-         while(input_not_ok):
-             if(minimal):
-                option = raw_input("Download? ("+vrs+") : ").upper()
-+                option = input("Download? ("+vrs+") : ").upper()
-             else:
-                option = raw_input("Confirm you want to download this data\n" \
-+                option = input("Confirm you want to download this data\n" \
-                                    "Changes *WILL* be permanent\n" \
-                                    "Continue? ("+vrs+") : ").upper()
-             if(option in valid_responses):
-                print "****************************************************************"
-+                print("****************************************************************")
-                 return valid_responses[option]
-             else:
-                print "ERROR: unrecognised choice '"+option+"'"
-+                print("ERROR: unrecognised choice '"+option+"'")
-                 minimal = True
- 
-     def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__):
--- checkm/taxonParser.py.orig	2022-03-15 18:25:01 UTC
-+++ checkm/taxonParser.py
-@@ -73,8 +73,8 @@ class TaxonParser():
-                     numMarkers, numMarkerSets = markerSet.size()
-                     pTable.add_row([rank, taxon, markerSet.numGenomes, numMarkers, numMarkerSets])
- 
-        print ''
-        print pTable.get_string()
-+        print('')
-+        print(pTable.get_string())
- 
-     def markerSet(self, rank, taxon, markerFile):
-         """Obtain specified taxonomic-specific marker set."""
--- checkm/uniqueMarkers.py.orig	2022-03-15 18:25:01 UTC
-+++ checkm/uniqueMarkers.py
-@@ -51,7 +51,7 @@ def getOppositeRankSpecificTaxonId(cursor, *args):
-     query.append(' %s != \'%s\' ' % (ranks[len(args) - 1], args[-1]))
-     query.append(' %s IS NULL' % ranks[len(args)])
-     query_string = 'AND'.join(query)
-    print query_string
-+    print(query_string)
-     result = cursor.execute('SELECT Id, "Count" FROM taxons WHERE %s' % query_string)
-     return result.fetchall()
- 
-@@ -121,7 +121,7 @@ def doWork(args):
-                 markers_from_others[Id] += count
- 
-         descriptive_markers = []
-        for marker_id, _ in marker_in_taxon_mapping.items():
-+        for marker_id, _ in list(marker_in_taxon_mapping.items()):
-             if marker_id in markers_from_others:
-                 fraction_in_others = float(markers_from_others[marker_id]) / float(others_total_count)
-                 if fraction_in_others <= args.exclude:
-@@ -135,7 +135,7 @@ def doWork(args):
-             des_markers.append(getDescriptiveMarkers(cur, i))
- 
-         for des_acc, des_name in des_markers:
-            print des_acc, des_name
-+            print(des_acc, des_name)
- 
- if __name__ == '__main__':
- 
--- checkm/util/img.py.orig	2022-03-15 18:25:01 UTC
-+++ checkm/util/img.py
-@@ -195,7 +195,7 @@ class IMG(object):
-         genomeIdsOfInterest = set()
-         for genomeId in metadata:
-             bKeep = True
-            for r in xrange(0, len(searchTaxa)):
-+            for r in range(0, len(searchTaxa)):
-                 if taxonStr == 'universal':
-                     bKeep = True
-                 elif taxonStr == 'prokaryotes' and (metadata[genomeId]['taxonomy'][0] == 'Bacteria' or metadata[genomeId]['taxonomy'][0] == 'Archaea'):
-@@ -222,8 +222,8 @@ class IMG(object):
- 
-     def lineageStats(self, metadata, mostSpecificRank):
-         stats = {}
-        for r in xrange(0, mostSpecificRank + 1):
-            for _, data in metadata.iteritems():
-+        for r in range(0, mostSpecificRank + 1):
-+            for _, data in metadata.items():
-                 taxaStr = ';'.join(data['taxonomy'][0:r + 1])
-                 stats[taxaStr] = stats.get(taxaStr, 0) + 1
- 
-@@ -231,9 +231,9 @@ class IMG(object):
- 
-     def lineagesSorted(self, metadata, mostSpecificRank=6):
-         lineages = []
-        for r in xrange(0, mostSpecificRank + 1):
-+        for r in range(0, mostSpecificRank + 1):
-             taxa = set()
-            for _, data in metadata.iteritems():
-+            for _, data in metadata.items():
-                 if 'unclassified' not in data['taxonomy'][0:r + 1]:
-                     taxa.add(';'.join(data['taxonomy'][0:r + 1]))
- 
-@@ -274,7 +274,7 @@ class IMG(object):
-                     geneIdToFamilyIds[geneId].add(clusterId)
-                     count[clusterId] = count.get(clusterId, 0) + 1
- 
-            for clusterId, c in count.iteritems():
-+            for clusterId, c in count.items():
-                 if clusterId not in table:
-                     table[clusterId] = {}
-                 table[clusterId][genomeId] = c
-@@ -288,7 +288,7 @@ class IMG(object):
- 
-     def filterGeneCountTable(self, genomeIds, table, ubiquityThreshold=0.9, singleCopyThreshold=0.9):
-         idsToFilter = []
-        for pfamId, genomeCounts in table.iteritems():
-+        for pfamId, genomeCounts in table.items():
-             ubiquity = 0
-             singleCopy = 0
-             for genomeId in genomeIds:
-@@ -342,7 +342,7 @@ class IMG(object):
-             # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
-             # with GFF entries are considered.
-             familyIdToScaffoldIds = {}
-            for pfamId, geneIds in pfamIdToGeneIds.iteritems():
-+            for pfamId, geneIds in pfamIdToGeneIds.items():
-                 scaffolds = []
-                 for geneId in geneIds:
-                     scaffold = genePosition.get(geneId, None)
-@@ -352,7 +352,7 @@ class IMG(object):
-                 if scaffolds:
-                     familyIdToScaffoldIds[pfamId] = scaffolds
- 
-            for tigrId, geneIds in tigrIdToGeneIds.iteritems():
-+            for tigrId, geneIds in tigrIdToGeneIds.items():
-                 scaffolds = []
-                 for geneId in geneIds:
-                     scaffold = genePosition.get(geneId, None)
-@@ -362,9 +362,9 @@ class IMG(object):
-                 if scaffold:
-                     familyIdToScaffoldIds[tigrId] = scaffolds
-         except:
-            print '[BUG]: __genomeIdToClusterScaffold'
-            print sys.exc_info()[0]
-            print genomeId, geneId, tigrId, pfamId
-+            print('[BUG]: __genomeIdToClusterScaffold')
-+            print(sys.exc_info()[0])
-+            print(genomeId, geneId, tigrId, pfamId)
-             sys.exit()
- 
-         return familyIdToScaffoldIds
-@@ -400,7 +400,7 @@ class IMG(object):
-         seqs = readFasta(genomeFile)
- 
-         seqLens = {}
-        for seqId, seq in seqs.iteritems():
-+        for seqId, seq in seqs.items():
-             seqLens[seqId] = len(seq)
- 
-         return seqLens
-@@ -462,7 +462,7 @@ class IMG(object):
-             # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
-             # with GFF entries are considered.
-             familyIdToGenomePositions = {}
-            for pfamId, geneIds in pfamIdToGeneIds.iteritems():
-+            for pfamId, geneIds in pfamIdToGeneIds.items():
-                 positions = []
-                 for geneId in geneIds:
-                     position = genePosition.get(geneId, None)
-@@ -472,7 +472,7 @@ class IMG(object):
-                 if positions:
-                     familyIdToGenomePositions[pfamId] = positions
- 
-            for tigrId, geneIds in tigrIdToGeneIds.iteritems():
-+            for tigrId, geneIds in tigrIdToGeneIds.items():
-                 positions = []
-                 for geneId in geneIds:
-                     position = genePosition.get(geneId, None)
-@@ -482,9 +482,9 @@ class IMG(object):
-                 if positions:
-                     familyIdToGenomePositions[tigrId] = positions
-         except:
-            print '[BUG]: __genomeFamilyPositions'
-            print sys.exc_info()[0]
-            print genomeId, geneId, tigrId, pfamId
-+            print('[BUG]: __genomeFamilyPositions')
-+            print(sys.exc_info()[0])
-+            print(genomeId, geneId, tigrId, pfamId)
-             sys.exit()
- 
-         return familyIdToGenomePositions