biology/checkm: Update 1.0.18 -> 1.1.8
Reported by: portscout
This commit is contained in:
parent
848e360f0d
commit
350d96226d
@ -1,6 +1,6 @@
|
||||
PORTNAME= CheckM
|
||||
DISTVERSIONPREFIX= v
|
||||
DISTVERSION= 1.0.18
|
||||
DISTVERSION= 1.1.8
|
||||
CATEGORIES= biology python
|
||||
|
||||
MAINTAINER= yuri@FreeBSD.org
|
||||
@ -10,10 +10,10 @@ LICENSE= GPLv3
|
||||
LICENSE_FILE= ${WRKSRC}/LICENSE
|
||||
|
||||
RUN_DEPENDS= ${PYNUMPY} \
|
||||
${PYTHON_PKGNAMEPREFIX}DendroPy>=4.0.0:science/py-DendroPy@${PY_FLAVOR} \
|
||||
${PYTHON_PKGNAMEPREFIX}matplotlib>=1.3.1:math/py-matplotlib@${PY_FLAVOR} \
|
||||
${PYTHON_PKGNAMEPREFIX}pysam>=0.8.3:biology/py-pysam@${PY_FLAVOR} \
|
||||
${PYTHON_PKGNAMEPREFIX}scipy>=0.9.0:science/py-scipy@${PY_FLAVOR}
|
||||
${PYTHON_PKGNAMEPREFIX}DendroPy>=4.4.0:science/py-DendroPy@${PY_FLAVOR} \
|
||||
${PYTHON_PKGNAMEPREFIX}matplotlib>=2.1.0:math/py-matplotlib@${PY_FLAVOR} \
|
||||
${PYTHON_PKGNAMEPREFIX}pysam>=0.12.0.1:biology/py-pysam@${PY_FLAVOR} \
|
||||
${PYTHON_PKGNAMEPREFIX}scipy>=0.19.1:science/py-scipy@${PY_FLAVOR}
|
||||
|
||||
USES= dos2unix python:3.7+
|
||||
USE_GITHUB= yes
|
||||
|
@ -1,3 +1,3 @@
|
||||
TIMESTAMP = 1566202999
|
||||
SHA256 (Ecogenomics-CheckM-v1.0.18_GH0.tar.gz) = 240184bd7c708cd041d0fc14f81b22af5cb69cb96ae75177aee32effa578ca4e
|
||||
SIZE (Ecogenomics-CheckM-v1.0.18_GH0.tar.gz) = 212064
|
||||
TIMESTAMP = 1649697676
|
||||
SHA256 (Ecogenomics-CheckM-v1.1.8_GH0.tar.gz) = c6e9d007622808ae3312de73d54866292a83857837119380a036036e799c1f38
|
||||
SIZE (Ecogenomics-CheckM-v1.1.8_GH0.tar.gz) = 1016432
|
||||
|
@ -1,648 +0,0 @@
|
||||
--- checkm/binTools.py.orig 2022-03-15 18:25:01 UTC
|
||||
+++ checkm/binTools.py
|
||||
@@ -26,7 +26,7 @@ import gzip
|
||||
|
||||
import numpy as np
|
||||
|
||||
-from common import binIdFromFilename, checkFileExists, readDistribution, findNearest
|
||||
+from .common import binIdFromFilename, checkFileExists, readDistribution, findNearest
|
||||
from checkm.util.seqUtils import readFasta, writeFasta, baseCount
|
||||
from checkm.genomicSignatures import GenomicSignatures
|
||||
from checkm.prodigal import ProdigalGeneFeatureParser
|
||||
@@ -123,34 +123,34 @@ class BinTools():
|
||||
seqId = line[1:].split(None, 1)[0]
|
||||
|
||||
if seqId in seqIds:
|
||||
- print ' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId)
|
||||
+ print(' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId))
|
||||
seqIds.add(seqId)
|
||||
|
||||
binSeqs[binId] = seqIds
|
||||
|
||||
# check for sequences assigned to multiple bins
|
||||
bDuplicates = False
|
||||
- binIds = binSeqs.keys()
|
||||
- for i in xrange(0, len(binIds)):
|
||||
- for j in xrange(i + 1, len(binIds)):
|
||||
+ binIds = list(binSeqs.keys())
|
||||
+ for i in range(0, len(binIds)):
|
||||
+ for j in range(i + 1, len(binIds)):
|
||||
seqInter = set(binSeqs[binIds[i]]).intersection(set(binSeqs[binIds[j]]))
|
||||
|
||||
if len(seqInter) > 0:
|
||||
bDuplicates = True
|
||||
- print ' Sequences shared between %s and %s: ' % (binIds[i], binIds[j])
|
||||
+ print(' Sequences shared between %s and %s: ' % (binIds[i], binIds[j]))
|
||||
for seqId in seqInter:
|
||||
- print ' ' + seqId
|
||||
- print ''
|
||||
+ print(' ' + seqId)
|
||||
+ print('')
|
||||
|
||||
if not bDuplicates:
|
||||
- print ' No sequences assigned to multiple bins.'
|
||||
+ print(' No sequences assigned to multiple bins.')
|
||||
|
||||
def gcDist(self, seqs):
|
||||
"""GC statistics for bin."""
|
||||
GCs = []
|
||||
gcTotal = 0
|
||||
basesTotal = 0
|
||||
- for _, seq in seqs.iteritems():
|
||||
+ for _, seq in seqs.items():
|
||||
a, c, g, t = baseCount(seq)
|
||||
gc = g + c
|
||||
bases = a + c + g + t
|
||||
@@ -171,7 +171,7 @@ class BinTools():
|
||||
|
||||
codingBasesTotal = 0
|
||||
basesTotal = 0
|
||||
- for seqId, seq in seqs.iteritems():
|
||||
+ for seqId, seq in seqs.items():
|
||||
codingBases = prodigalParser.codingBases(seqId)
|
||||
|
||||
CDs.append(float(codingBases) / len(seq))
|
||||
@@ -186,11 +186,11 @@ class BinTools():
|
||||
def binTetraSig(self, seqs, tetraSigs):
|
||||
"""Tetranucleotide signature for bin. """
|
||||
binSize = 0
|
||||
- for _, seq in seqs.iteritems():
|
||||
+ for _, seq in seqs.items():
|
||||
binSize += len(seq)
|
||||
|
||||
bInit = True
|
||||
- for seqId, seq in seqs.iteritems():
|
||||
+ for seqId, seq in seqs.items():
|
||||
weightedTetraSig = tetraSigs[seqId] * (float(len(seq)) / binSize)
|
||||
if bInit:
|
||||
binSig = weightedTetraSig
|
||||
@@ -247,32 +247,32 @@ class BinTools():
|
||||
meanCD, deltaCDs, CDs = self.codingDensityDist(seqs, prodigalParser)
|
||||
|
||||
# find keys into GC and CD distributions
|
||||
- closestGC = findNearest(np.array(gcBounds.keys()), meanGC)
|
||||
- sampleSeqLen = gcBounds[closestGC].keys()[0]
|
||||
+ closestGC = findNearest(np.array(list(gcBounds.keys())), meanGC)
|
||||
+ sampleSeqLen = list(gcBounds[closestGC].keys())[0]
|
||||
d = gcBounds[closestGC][sampleSeqLen]
|
||||
- gcLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
|
||||
- gcUpperBoundKey = findNearest(d.keys(), (100 + distribution) / 2.0)
|
||||
+ gcLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
|
||||
+ gcUpperBoundKey = findNearest(list(d.keys()), (100 + distribution) / 2.0)
|
||||
|
||||
- closestCD = findNearest(np.array(cdBounds.keys()), meanCD)
|
||||
- sampleSeqLen = cdBounds[closestCD].keys()[0]
|
||||
+ closestCD = findNearest(np.array(list(cdBounds.keys())), meanCD)
|
||||
+ sampleSeqLen = list(cdBounds[closestCD].keys())[0]
|
||||
d = cdBounds[closestCD][sampleSeqLen]
|
||||
- cdLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
|
||||
+ cdLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
|
||||
|
||||
- tdBoundKey = findNearest(tdBounds[tdBounds.keys()[0]].keys(), distribution)
|
||||
+ tdBoundKey = findNearest(list(tdBounds[list(tdBounds.keys())[0]].keys()), distribution)
|
||||
|
||||
index = 0
|
||||
- for seqId, seq in seqs.iteritems():
|
||||
+ for seqId, seq in seqs.items():
|
||||
seqLen = len(seq)
|
||||
|
||||
# find GC, CD, and TD bounds
|
||||
- closestSeqLen = findNearest(gcBounds[closestGC].keys(), seqLen)
|
||||
+ closestSeqLen = findNearest(list(gcBounds[closestGC].keys()), seqLen)
|
||||
gcLowerBound = gcBounds[closestGC][closestSeqLen][gcLowerBoundKey]
|
||||
gcUpperBound = gcBounds[closestGC][closestSeqLen][gcUpperBoundKey]
|
||||
|
||||
- closestSeqLen = findNearest(cdBounds[closestCD].keys(), seqLen)
|
||||
+ closestSeqLen = findNearest(list(cdBounds[closestCD].keys()), seqLen)
|
||||
cdLowerBound = cdBounds[closestCD][closestSeqLen][cdLowerBoundKey]
|
||||
|
||||
- closestSeqLen = findNearest(tdBounds.keys(), seqLen)
|
||||
+ closestSeqLen = findNearest(list(tdBounds.keys()), seqLen)
|
||||
tdBound = tdBounds[closestSeqLen][tdBoundKey]
|
||||
|
||||
outlyingDists = []
|
||||
--- checkm/checkmData.py.orig 2022-03-15 18:25:01 UTC
|
||||
+++ checkm/checkmData.py
|
||||
@@ -85,11 +85,11 @@ class DBConfig(object):
|
||||
"""Work out if we have permission to write to the CheckM config before attempting to make changes"""
|
||||
try:
|
||||
open(self.configFile, 'a')
|
||||
- except IOError, e:
|
||||
- print "You do not seem to have permission to edit the checkm config file"
|
||||
- print "located at %s" % self.configFile
|
||||
- print "Please try again with updated privileges. Error was:\n"
|
||||
- print e
|
||||
+ except IOError as e:
|
||||
+ print("You do not seem to have permission to edit the checkm config file")
|
||||
+ print("located at %s" % self.configFile)
|
||||
+ print("Please try again with updated privileges. Error was:\n")
|
||||
+ print(e)
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -167,28 +167,28 @@ class DBManager(mm.ManifestManager):
|
||||
else:
|
||||
path = os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
- print ""
|
||||
+ print("")
|
||||
if os.path.exists(path):
|
||||
# path exists
|
||||
if os.access(path, os.W_OK):
|
||||
# path is writable
|
||||
path_set = True
|
||||
- print "Path [%s] exists and you have permission to write to this folder." % path
|
||||
+ print("Path [%s] exists and you have permission to write to this folder." % path)
|
||||
else:
|
||||
- print "Path [%s] exists but you do not have permission to write to this folder." % path
|
||||
+ print("Path [%s] exists but you do not have permission to write to this folder." % path)
|
||||
else:
|
||||
# path does not exist, try to make it
|
||||
"Path [%s] does not exist so I will attempt to create it" % path
|
||||
try:
|
||||
self.makeSurePathExists(path)
|
||||
- print "Path [%s] has been created and you have permission to write to this folder." % path
|
||||
+ print("Path [%s] has been created and you have permission to write to this folder." % path)
|
||||
path_set = True
|
||||
except Exception:
|
||||
- print "Unable to make the folder, Error was: %s" % sys.exc_info()[0]
|
||||
+ print("Unable to make the folder, Error was: %s" % sys.exc_info()[0])
|
||||
minimal = True
|
||||
|
||||
# (re)make the manifest file
|
||||
- print "(re) creating manifest file (please be patient)."
|
||||
+ print("(re) creating manifest file (please be patient).")
|
||||
self.createManifest(path, self.config.values["localManifestName"])
|
||||
|
||||
return path
|
||||
@@ -196,8 +196,8 @@ class DBManager(mm.ManifestManager):
|
||||
def checkPermissions(self):
|
||||
"""See if the user has permission to write to the data directory"""
|
||||
if not os.access(self.config.values["dataRoot"], os.W_OK):
|
||||
- print "You do not seem to have permission to edit the CheckM data folder"
|
||||
- print "located at %s" % self.config.values["dataRoot"]
|
||||
+ print("You do not seem to have permission to edit the CheckM data folder")
|
||||
+ print("located at %s" % self.config.values["dataRoot"])
|
||||
return False
|
||||
|
||||
return True
|
||||
--- checkm/coverage.py.orig 2022-03-15 18:25:01 UTC
|
||||
+++ checkm/coverage.py
|
||||
@@ -62,7 +62,7 @@ class Coverage():
|
||||
binId = binIdFromFilename(binFile)
|
||||
|
||||
seqs = readFasta(binFile)
|
||||
- for seqId, seq in seqs.iteritems():
|
||||
+ for seqId, seq in seqs.items():
|
||||
seqIdToBinId[seqId] = binId
|
||||
seqIdToSeqLen[seqId] = len(seq)
|
||||
|
||||
@@ -97,12 +97,12 @@ class Coverage():
|
||||
print(header)
|
||||
|
||||
# get length of all seqs
|
||||
- for bamFile, seqIds in coverageInfo.iteritems():
|
||||
- for seqId in seqIds.keys():
|
||||
+ for bamFile, seqIds in coverageInfo.items():
|
||||
+ for seqId in list(seqIds.keys()):
|
||||
seqIdToSeqLen[seqId] = seqIds[seqId].seqLen
|
||||
|
||||
# write coverage stats for all scaffolds to file
|
||||
- for seqId, seqLen in seqIdToSeqLen.iteritems():
|
||||
+ for seqId, seqLen in seqIdToSeqLen.items():
|
||||
rowStr = seqId + '\t' + seqIdToBinId.get(seqId, DefaultValues.UNBINNED) + '\t' + str(seqLen)
|
||||
for bamFile in bamFiles:
|
||||
bamId = binIdFromFilename(bamFile)
|
||||
@@ -171,7 +171,7 @@ class Coverage():
|
||||
writeProc.join()
|
||||
except:
|
||||
# make sure all processes are terminated
|
||||
- print traceback.format_exc()
|
||||
+ print(traceback.format_exc())
|
||||
for p in workerProc:
|
||||
p.terminate()
|
||||
|
||||
@@ -271,16 +271,16 @@ class Coverage():
|
||||
if self.logger.getEffectiveLevel() <= logging.INFO:
|
||||
sys.stderr.write('\n')
|
||||
|
||||
- print ''
|
||||
- print ' # total reads: %d' % totalReads
|
||||
- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
|
||||
- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
|
||||
- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
|
||||
- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
|
||||
- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
|
||||
- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
|
||||
- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
|
||||
- print ''
|
||||
+ print('')
|
||||
+ print(' # total reads: %d' % totalReads)
|
||||
+ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
|
||||
+ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
|
||||
+ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
|
||||
+ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
|
||||
+ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
|
||||
+ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
|
||||
+ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
|
||||
+ print('')
|
||||
|
||||
def parseCoverage(self, coverageFile):
|
||||
"""Read coverage information from file."""
|
||||
@@ -301,7 +301,7 @@ class Coverage():
|
||||
if seqId not in coverageStats[binId]:
|
||||
coverageStats[binId][seqId] = {}
|
||||
|
||||
- for i in xrange(3, len(lineSplit), 3):
|
||||
+ for i in range(3, len(lineSplit), 3):
|
||||
bamId = lineSplit[i]
|
||||
coverage = float(lineSplit[i + 1])
|
||||
coverageStats[binId][seqId][bamId] = coverage
|
||||
@@ -325,7 +325,7 @@ class Coverage():
|
||||
|
||||
# calculate mean coverage (weighted by scaffold length)
|
||||
# for each bin under each BAM file
|
||||
- for i in xrange(3, len(lineSplit), 3):
|
||||
+ for i in range(3, len(lineSplit), 3):
|
||||
bamId = lineSplit[i]
|
||||
coverage = float(lineSplit[i + 1])
|
||||
binCoverages[binId][bamId].append(coverage)
|
||||
@@ -341,13 +341,13 @@ class Coverage():
|
||||
|
||||
profiles = defaultdict(dict)
|
||||
for binId in binStats:
|
||||
- for bamId, stats in binStats[binId].iteritems():
|
||||
+ for bamId, stats in binStats[binId].items():
|
||||
binLength, meanBinCoverage = stats
|
||||
coverages = binCoverages[binId][bamId]
|
||||
|
||||
varCoverage = 0
|
||||
if len(coverages) > 1:
|
||||
- varCoverage = mean(map(lambda x: (x - meanBinCoverage) ** 2, coverages))
|
||||
+ varCoverage = mean([(x - meanBinCoverage) ** 2 for x in coverages])
|
||||
|
||||
profiles[binId][bamId] = [meanBinCoverage, sqrt(varCoverage)]
|
||||
|
||||
--- checkm/coverageWindows.py.orig 2022-03-15 18:25:01 UTC
|
||||
+++ checkm/coverageWindows.py
|
||||
@@ -188,10 +188,10 @@ class CoverageWindows():
|
||||
try:
|
||||
end += windowSize
|
||||
except:
|
||||
- print '*****************'
|
||||
- print end
|
||||
- print windowSize
|
||||
- print '******************'
|
||||
+ print('*****************')
|
||||
+ print(end)
|
||||
+ print(windowSize)
|
||||
+ print('******************')
|
||||
|
||||
coverage = float(sum(readLoader.coverage)) / seqLen
|
||||
|
||||
@@ -239,13 +239,13 @@ class CoverageWindows():
|
||||
if self.logger.getEffectiveLevel() <= logging.INFO:
|
||||
sys.stderr.write('\n')
|
||||
|
||||
- print ''
|
||||
- print ' # total reads: %d' % totalReads
|
||||
- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
|
||||
- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
|
||||
- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
|
||||
- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
|
||||
- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
|
||||
- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
|
||||
- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
|
||||
- print ''
|
||||
+ print('')
|
||||
+ print(' # total reads: %d' % totalReads)
|
||||
+ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
|
||||
+ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
|
||||
+ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
|
||||
+ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
|
||||
+ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
|
||||
+ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
|
||||
+ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
|
||||
+ print('')
|
||||
--- checkm/manifestManager.py.orig 2022-03-15 18:25:01 UTC
|
||||
+++ checkm/manifestManager.py
|
||||
@@ -47,8 +47,8 @@ __MANIFEST__ = ".dmanifest"
|
||||
# system includes
|
||||
import os
|
||||
import hashlib
|
||||
-import urllib2
|
||||
-import urllib
|
||||
+import urllib.request, urllib.error, urllib.parse
|
||||
+import urllib.request, urllib.parse, urllib.error
|
||||
import shutil
|
||||
import errno
|
||||
|
||||
@@ -121,15 +121,15 @@ class ManifestManager(object):
|
||||
source = ""
|
||||
# first we assume it is remote
|
||||
try:
|
||||
- s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
|
||||
+ s_man = urllib.request.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
|
||||
source = sourceManifestLocation + "/"
|
||||
except ValueError:
|
||||
# then it is probably a file
|
||||
s_man = open(os.path.join(sourceManifestLocation, sourceManifestName))
|
||||
source = os.path.join(sourceManifestLocation) + os.path.sep
|
||||
- except urllib2.URLError:
|
||||
+ except urllib.error.URLError:
|
||||
# problems connecting to server, perhaps user is behind a proxy or firewall
|
||||
- print "Error: failed to connect to server."
|
||||
+ print("Error: failed to connect to server.")
|
||||
return (None, None, None, None, None)
|
||||
|
||||
first_line = True
|
||||
@@ -140,11 +140,11 @@ class ManifestManager(object):
|
||||
# get the type of the manifest
|
||||
s_type = self.getManType(line)
|
||||
if s_type != l_type:
|
||||
- print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type)
|
||||
+ print("Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type))
|
||||
return (None, None, None, None, None)
|
||||
else:
|
||||
# no type specified
|
||||
- print "Error: type of source manifest is not specified. Is this a valid manifest file?"
|
||||
+ print("Error: type of source manifest is not specified. Is this a valid manifest file?")
|
||||
return (None, None, None, None, None)
|
||||
|
||||
self.type = l_type
|
||||
@@ -174,7 +174,7 @@ class ManifestManager(object):
|
||||
deleted.append(fields[0])
|
||||
|
||||
# check for new files
|
||||
- for f in source_man.keys():
|
||||
+ for f in list(source_man.keys()):
|
||||
if source_man[f][2] == False:
|
||||
if source_man[f][0] == '-':
|
||||
addedDirs.append(f)
|
||||
@@ -190,28 +190,28 @@ class ManifestManager(object):
|
||||
modified_size += int(source_man[f][1])
|
||||
|
||||
if len(addedFiles) > 0:
|
||||
- print "#------------------------------------------------------"
|
||||
- print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size))
|
||||
+ print("#------------------------------------------------------")
|
||||
+ print("# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size)))
|
||||
for f in addedFiles:
|
||||
- print "\t".join([self.formatData(int(source_man[f][1])), f])
|
||||
+ print("\t".join([self.formatData(int(source_man[f][1])), f]))
|
||||
|
||||
if len(addedDirs) > 0:
|
||||
- print "#------------------------------------------------------"
|
||||
- print "# Source contains %d new folders(s)" % (len(addedDirs))
|
||||
+ print("#------------------------------------------------------")
|
||||
+ print("# Source contains %d new folders(s)" % (len(addedDirs)))
|
||||
for f in addedDirs:
|
||||
- print f
|
||||
+ print(f)
|
||||
|
||||
if len(modified) > 0:
|
||||
- print "#------------------------------------------------------"
|
||||
- print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size))
|
||||
+ print("#------------------------------------------------------")
|
||||
+ print("# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size)))
|
||||
for f in modified:
|
||||
- print f
|
||||
+ print(f)
|
||||
|
||||
if len(deleted) > 0:
|
||||
- print "#------------------------------------------------------"
|
||||
- print "# %d files have been deleted in the source:" % len(deleted)
|
||||
+ print("#------------------------------------------------------")
|
||||
+ print("# %d files have been deleted in the source:" % len(deleted))
|
||||
for f in deleted:
|
||||
- print f
|
||||
+ print(f)
|
||||
else:
|
||||
return (source,
|
||||
[(a, source_man[a]) for a in addedFiles],
|
||||
@@ -245,13 +245,13 @@ class ManifestManager(object):
|
||||
for f in modified:
|
||||
total_size += int(f[1][1])
|
||||
if total_size != 0:
|
||||
- print "****************************************************************"
|
||||
- print "%d new file(s) to be downloaded from source" % len(added_files)
|
||||
- print "%d existing file(s) to be updated" % len(modified)
|
||||
- print "%s will need to be downloaded" % self.formatData(total_size)
|
||||
+ print("****************************************************************")
|
||||
+ print("%d new file(s) to be downloaded from source" % len(added_files))
|
||||
+ print("%d existing file(s) to be updated" % len(modified))
|
||||
+ print("%s will need to be downloaded" % self.formatData(total_size))
|
||||
do_down = self.promptUserDownload()
|
||||
if not do_down:
|
||||
- print "Download aborted"
|
||||
+ print("Download aborted")
|
||||
|
||||
update_manifest = False
|
||||
if do_down:
|
||||
@@ -262,13 +262,13 @@ class ManifestManager(object):
|
||||
self.makeSurePathExists(full_path)
|
||||
for add in added_files:
|
||||
full_path = os.path.abspath(os.path.join(localManifestLocation, add[0]))
|
||||
- urllib.urlretrieve(source+add[0], full_path)
|
||||
+ urllib.request.urlretrieve(source+add[0], full_path)
|
||||
for modify in modified:
|
||||
full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0]))
|
||||
- urllib.urlretrieve(source+modify[0], full_path)
|
||||
+ urllib.request.urlretrieve(source+modify[0], full_path)
|
||||
|
||||
if update_manifest:
|
||||
- print "(re) creating manifest file (please be patient)"
|
||||
+ print("(re) creating manifest file (please be patient)")
|
||||
self.createManifest(localManifestLocation, manifestName=localManifestName)
|
||||
|
||||
return True
|
||||
@@ -303,19 +303,19 @@ class ManifestManager(object):
|
||||
input_not_ok = True
|
||||
minimal=False
|
||||
valid_responses = {'Y':True,'N':False}
|
||||
- vrs = ",".join([x.lower() for x in valid_responses.keys()])
|
||||
+ vrs = ",".join([x.lower() for x in list(valid_responses.keys())])
|
||||
while(input_not_ok):
|
||||
if(minimal):
|
||||
- option = raw_input("Download? ("+vrs+") : ").upper()
|
||||
+ option = input("Download? ("+vrs+") : ").upper()
|
||||
else:
|
||||
- option = raw_input("Confirm you want to download this data\n" \
|
||||
+ option = input("Confirm you want to download this data\n" \
|
||||
"Changes *WILL* be permanent\n" \
|
||||
"Continue? ("+vrs+") : ").upper()
|
||||
if(option in valid_responses):
|
||||
- print "****************************************************************"
|
||||
+ print("****************************************************************")
|
||||
return valid_responses[option]
|
||||
else:
|
||||
- print "ERROR: unrecognised choice '"+option+"'"
|
||||
+ print("ERROR: unrecognised choice '"+option+"'")
|
||||
minimal = True
|
||||
|
||||
def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__):
|
||||
--- checkm/taxonParser.py.orig 2022-03-15 18:25:01 UTC
|
||||
+++ checkm/taxonParser.py
|
||||
@@ -73,8 +73,8 @@ class TaxonParser():
|
||||
numMarkers, numMarkerSets = markerSet.size()
|
||||
pTable.add_row([rank, taxon, markerSet.numGenomes, numMarkers, numMarkerSets])
|
||||
|
||||
- print ''
|
||||
- print pTable.get_string()
|
||||
+ print('')
|
||||
+ print(pTable.get_string())
|
||||
|
||||
def markerSet(self, rank, taxon, markerFile):
|
||||
"""Obtain specified taxonomic-specific marker set."""
|
||||
--- checkm/uniqueMarkers.py.orig 2022-03-15 18:25:01 UTC
|
||||
+++ checkm/uniqueMarkers.py
|
||||
@@ -51,7 +51,7 @@ def getOppositeRankSpecificTaxonId(cursor, *args):
|
||||
query.append(' %s != \'%s\' ' % (ranks[len(args) - 1], args[-1]))
|
||||
query.append(' %s IS NULL' % ranks[len(args)])
|
||||
query_string = 'AND'.join(query)
|
||||
- print query_string
|
||||
+ print(query_string)
|
||||
result = cursor.execute('SELECT Id, "Count" FROM taxons WHERE %s' % query_string)
|
||||
return result.fetchall()
|
||||
|
||||
@@ -121,7 +121,7 @@ def doWork(args):
|
||||
markers_from_others[Id] += count
|
||||
|
||||
descriptive_markers = []
|
||||
- for marker_id, _ in marker_in_taxon_mapping.items():
|
||||
+ for marker_id, _ in list(marker_in_taxon_mapping.items()):
|
||||
if marker_id in markers_from_others:
|
||||
fraction_in_others = float(markers_from_others[marker_id]) / float(others_total_count)
|
||||
if fraction_in_others <= args.exclude:
|
||||
@@ -135,7 +135,7 @@ def doWork(args):
|
||||
des_markers.append(getDescriptiveMarkers(cur, i))
|
||||
|
||||
for des_acc, des_name in des_markers:
|
||||
- print des_acc, des_name
|
||||
+ print(des_acc, des_name)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
--- checkm/util/img.py.orig 2022-03-15 18:25:01 UTC
|
||||
+++ checkm/util/img.py
|
||||
@@ -195,7 +195,7 @@ class IMG(object):
|
||||
genomeIdsOfInterest = set()
|
||||
for genomeId in metadata:
|
||||
bKeep = True
|
||||
- for r in xrange(0, len(searchTaxa)):
|
||||
+ for r in range(0, len(searchTaxa)):
|
||||
if taxonStr == 'universal':
|
||||
bKeep = True
|
||||
elif taxonStr == 'prokaryotes' and (metadata[genomeId]['taxonomy'][0] == 'Bacteria' or metadata[genomeId]['taxonomy'][0] == 'Archaea'):
|
||||
@@ -222,8 +222,8 @@ class IMG(object):
|
||||
|
||||
def lineageStats(self, metadata, mostSpecificRank):
|
||||
stats = {}
|
||||
- for r in xrange(0, mostSpecificRank + 1):
|
||||
- for _, data in metadata.iteritems():
|
||||
+ for r in range(0, mostSpecificRank + 1):
|
||||
+ for _, data in metadata.items():
|
||||
taxaStr = ';'.join(data['taxonomy'][0:r + 1])
|
||||
stats[taxaStr] = stats.get(taxaStr, 0) + 1
|
||||
|
||||
@@ -231,9 +231,9 @@ class IMG(object):
|
||||
|
||||
def lineagesSorted(self, metadata, mostSpecificRank=6):
|
||||
lineages = []
|
||||
- for r in xrange(0, mostSpecificRank + 1):
|
||||
+ for r in range(0, mostSpecificRank + 1):
|
||||
taxa = set()
|
||||
- for _, data in metadata.iteritems():
|
||||
+ for _, data in metadata.items():
|
||||
if 'unclassified' not in data['taxonomy'][0:r + 1]:
|
||||
taxa.add(';'.join(data['taxonomy'][0:r + 1]))
|
||||
|
||||
@@ -274,7 +274,7 @@ class IMG(object):
|
||||
geneIdToFamilyIds[geneId].add(clusterId)
|
||||
count[clusterId] = count.get(clusterId, 0) + 1
|
||||
|
||||
- for clusterId, c in count.iteritems():
|
||||
+ for clusterId, c in count.items():
|
||||
if clusterId not in table:
|
||||
table[clusterId] = {}
|
||||
table[clusterId][genomeId] = c
|
||||
@@ -288,7 +288,7 @@ class IMG(object):
|
||||
|
||||
def filterGeneCountTable(self, genomeIds, table, ubiquityThreshold=0.9, singleCopyThreshold=0.9):
|
||||
idsToFilter = []
|
||||
- for pfamId, genomeCounts in table.iteritems():
|
||||
+ for pfamId, genomeCounts in table.items():
|
||||
ubiquity = 0
|
||||
singleCopy = 0
|
||||
for genomeId in genomeIds:
|
||||
@@ -342,7 +342,7 @@ class IMG(object):
|
||||
# are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
|
||||
# with GFF entries are considered.
|
||||
familyIdToScaffoldIds = {}
|
||||
- for pfamId, geneIds in pfamIdToGeneIds.iteritems():
|
||||
+ for pfamId, geneIds in pfamIdToGeneIds.items():
|
||||
scaffolds = []
|
||||
for geneId in geneIds:
|
||||
scaffold = genePosition.get(geneId, None)
|
||||
@@ -352,7 +352,7 @@ class IMG(object):
|
||||
if scaffolds:
|
||||
familyIdToScaffoldIds[pfamId] = scaffolds
|
||||
|
||||
- for tigrId, geneIds in tigrIdToGeneIds.iteritems():
|
||||
+ for tigrId, geneIds in tigrIdToGeneIds.items():
|
||||
scaffolds = []
|
||||
for geneId in geneIds:
|
||||
scaffold = genePosition.get(geneId, None)
|
||||
@@ -362,9 +362,9 @@ class IMG(object):
|
||||
if scaffold:
|
||||
familyIdToScaffoldIds[tigrId] = scaffolds
|
||||
except:
|
||||
- print '[BUG]: __genomeIdToClusterScaffold'
|
||||
- print sys.exc_info()[0]
|
||||
- print genomeId, geneId, tigrId, pfamId
|
||||
+ print('[BUG]: __genomeIdToClusterScaffold')
|
||||
+ print(sys.exc_info()[0])
|
||||
+ print(genomeId, geneId, tigrId, pfamId)
|
||||
sys.exit()
|
||||
|
||||
return familyIdToScaffoldIds
|
||||
@@ -400,7 +400,7 @@ class IMG(object):
|
||||
seqs = readFasta(genomeFile)
|
||||
|
||||
seqLens = {}
|
||||
- for seqId, seq in seqs.iteritems():
|
||||
+ for seqId, seq in seqs.items():
|
||||
seqLens[seqId] = len(seq)
|
||||
|
||||
return seqLens
|
||||
@@ -462,7 +462,7 @@ class IMG(object):
|
||||
# are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
|
||||
# with GFF entries are considered.
|
||||
familyIdToGenomePositions = {}
|
||||
- for pfamId, geneIds in pfamIdToGeneIds.iteritems():
|
||||
+ for pfamId, geneIds in pfamIdToGeneIds.items():
|
||||
positions = []
|
||||
for geneId in geneIds:
|
||||
position = genePosition.get(geneId, None)
|
||||
@@ -472,7 +472,7 @@ class IMG(object):
|
||||
if positions:
|
||||
familyIdToGenomePositions[pfamId] = positions
|
||||
|
||||
- for tigrId, geneIds in tigrIdToGeneIds.iteritems():
|
||||
+ for tigrId, geneIds in tigrIdToGeneIds.items():
|
||||
positions = []
|
||||
for geneId in geneIds:
|
||||
position = genePosition.get(geneId, None)
|
||||
@@ -482,9 +482,9 @@ class IMG(object):
|
||||
if positions:
|
||||
familyIdToGenomePositions[tigrId] = positions
|
||||
except:
|
||||
- print '[BUG]: __genomeFamilyPositions'
|
||||
- print sys.exc_info()[0]
|
||||
- print genomeId, geneId, tigrId, pfamId
|
||||
+ print('[BUG]: __genomeFamilyPositions')
|
||||
+ print(sys.exc_info()[0])
|
||||
+ print(genomeId, geneId, tigrId, pfamId)
|
||||
sys.exit()
|
||||
|
||||
return familyIdToGenomePositions
|
Loading…
Reference in New Issue
Block a user