biology/checkm: Update 1.0.18 -> 1.1.8

Reported by:	portscout
This commit is contained in:
Yuri Victorovich 2022-04-11 12:53:21 -07:00
parent 848e360f0d
commit 350d96226d
3 changed files with 8 additions and 656 deletions

View File

@ -1,6 +1,6 @@
PORTNAME= CheckM
DISTVERSIONPREFIX= v
DISTVERSION= 1.0.18
DISTVERSION= 1.1.8
CATEGORIES= biology python
MAINTAINER= yuri@FreeBSD.org
@ -10,10 +10,10 @@ LICENSE= GPLv3
LICENSE_FILE= ${WRKSRC}/LICENSE
RUN_DEPENDS= ${PYNUMPY} \
${PYTHON_PKGNAMEPREFIX}DendroPy>=4.0.0:science/py-DendroPy@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}matplotlib>=1.3.1:math/py-matplotlib@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}pysam>=0.8.3:biology/py-pysam@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}scipy>=0.9.0:science/py-scipy@${PY_FLAVOR}
${PYTHON_PKGNAMEPREFIX}DendroPy>=4.4.0:science/py-DendroPy@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}matplotlib>=2.1.0:math/py-matplotlib@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}pysam>=0.12.0.1:biology/py-pysam@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}scipy>=0.19.1:science/py-scipy@${PY_FLAVOR}
USES= dos2unix python:3.7+
USE_GITHUB= yes

View File

@ -1,3 +1,3 @@
TIMESTAMP = 1566202999
SHA256 (Ecogenomics-CheckM-v1.0.18_GH0.tar.gz) = 240184bd7c708cd041d0fc14f81b22af5cb69cb96ae75177aee32effa578ca4e
SIZE (Ecogenomics-CheckM-v1.0.18_GH0.tar.gz) = 212064
TIMESTAMP = 1649697676
SHA256 (Ecogenomics-CheckM-v1.1.8_GH0.tar.gz) = c6e9d007622808ae3312de73d54866292a83857837119380a036036e799c1f38
SIZE (Ecogenomics-CheckM-v1.1.8_GH0.tar.gz) = 1016432

View File

@ -1,648 +0,0 @@
--- checkm/binTools.py.orig 2022-03-15 18:25:01 UTC
+++ checkm/binTools.py
@@ -26,7 +26,7 @@ import gzip
import numpy as np
-from common import binIdFromFilename, checkFileExists, readDistribution, findNearest
+from .common import binIdFromFilename, checkFileExists, readDistribution, findNearest
from checkm.util.seqUtils import readFasta, writeFasta, baseCount
from checkm.genomicSignatures import GenomicSignatures
from checkm.prodigal import ProdigalGeneFeatureParser
@@ -123,34 +123,34 @@ class BinTools():
seqId = line[1:].split(None, 1)[0]
if seqId in seqIds:
- print ' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId)
+ print(' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId))
seqIds.add(seqId)
binSeqs[binId] = seqIds
# check for sequences assigned to multiple bins
bDuplicates = False
- binIds = binSeqs.keys()
- for i in xrange(0, len(binIds)):
- for j in xrange(i + 1, len(binIds)):
+ binIds = list(binSeqs.keys())
+ for i in range(0, len(binIds)):
+ for j in range(i + 1, len(binIds)):
seqInter = set(binSeqs[binIds[i]]).intersection(set(binSeqs[binIds[j]]))
if len(seqInter) > 0:
bDuplicates = True
- print ' Sequences shared between %s and %s: ' % (binIds[i], binIds[j])
+ print(' Sequences shared between %s and %s: ' % (binIds[i], binIds[j]))
for seqId in seqInter:
- print ' ' + seqId
- print ''
+ print(' ' + seqId)
+ print('')
if not bDuplicates:
- print ' No sequences assigned to multiple bins.'
+ print(' No sequences assigned to multiple bins.')
def gcDist(self, seqs):
"""GC statistics for bin."""
GCs = []
gcTotal = 0
basesTotal = 0
- for _, seq in seqs.iteritems():
+ for _, seq in seqs.items():
a, c, g, t = baseCount(seq)
gc = g + c
bases = a + c + g + t
@@ -171,7 +171,7 @@ class BinTools():
codingBasesTotal = 0
basesTotal = 0
- for seqId, seq in seqs.iteritems():
+ for seqId, seq in seqs.items():
codingBases = prodigalParser.codingBases(seqId)
CDs.append(float(codingBases) / len(seq))
@@ -186,11 +186,11 @@ class BinTools():
def binTetraSig(self, seqs, tetraSigs):
"""Tetranucleotide signature for bin. """
binSize = 0
- for _, seq in seqs.iteritems():
+ for _, seq in seqs.items():
binSize += len(seq)
bInit = True
- for seqId, seq in seqs.iteritems():
+ for seqId, seq in seqs.items():
weightedTetraSig = tetraSigs[seqId] * (float(len(seq)) / binSize)
if bInit:
binSig = weightedTetraSig
@@ -247,32 +247,32 @@ class BinTools():
meanCD, deltaCDs, CDs = self.codingDensityDist(seqs, prodigalParser)
# find keys into GC and CD distributions
- closestGC = findNearest(np.array(gcBounds.keys()), meanGC)
- sampleSeqLen = gcBounds[closestGC].keys()[0]
+ closestGC = findNearest(np.array(list(gcBounds.keys())), meanGC)
+ sampleSeqLen = list(gcBounds[closestGC].keys())[0]
d = gcBounds[closestGC][sampleSeqLen]
- gcLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
- gcUpperBoundKey = findNearest(d.keys(), (100 + distribution) / 2.0)
+ gcLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
+ gcUpperBoundKey = findNearest(list(d.keys()), (100 + distribution) / 2.0)
- closestCD = findNearest(np.array(cdBounds.keys()), meanCD)
- sampleSeqLen = cdBounds[closestCD].keys()[0]
+ closestCD = findNearest(np.array(list(cdBounds.keys())), meanCD)
+ sampleSeqLen = list(cdBounds[closestCD].keys())[0]
d = cdBounds[closestCD][sampleSeqLen]
- cdLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
+ cdLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
- tdBoundKey = findNearest(tdBounds[tdBounds.keys()[0]].keys(), distribution)
+ tdBoundKey = findNearest(list(tdBounds[list(tdBounds.keys())[0]].keys()), distribution)
index = 0
- for seqId, seq in seqs.iteritems():
+ for seqId, seq in seqs.items():
seqLen = len(seq)
# find GC, CD, and TD bounds
- closestSeqLen = findNearest(gcBounds[closestGC].keys(), seqLen)
+ closestSeqLen = findNearest(list(gcBounds[closestGC].keys()), seqLen)
gcLowerBound = gcBounds[closestGC][closestSeqLen][gcLowerBoundKey]
gcUpperBound = gcBounds[closestGC][closestSeqLen][gcUpperBoundKey]
- closestSeqLen = findNearest(cdBounds[closestCD].keys(), seqLen)
+ closestSeqLen = findNearest(list(cdBounds[closestCD].keys()), seqLen)
cdLowerBound = cdBounds[closestCD][closestSeqLen][cdLowerBoundKey]
- closestSeqLen = findNearest(tdBounds.keys(), seqLen)
+ closestSeqLen = findNearest(list(tdBounds.keys()), seqLen)
tdBound = tdBounds[closestSeqLen][tdBoundKey]
outlyingDists = []
--- checkm/checkmData.py.orig 2022-03-15 18:25:01 UTC
+++ checkm/checkmData.py
@@ -85,11 +85,11 @@ class DBConfig(object):
"""Work out if we have permission to write to the CheckM config before attempting to make changes"""
try:
open(self.configFile, 'a')
- except IOError, e:
- print "You do not seem to have permission to edit the checkm config file"
- print "located at %s" % self.configFile
- print "Please try again with updated privileges. Error was:\n"
- print e
+ except IOError as e:
+ print("You do not seem to have permission to edit the checkm config file")
+ print("located at %s" % self.configFile)
+ print("Please try again with updated privileges. Error was:\n")
+ print(e)
return False
return True
@@ -167,28 +167,28 @@ class DBManager(mm.ManifestManager):
else:
path = os.path.abspath(os.path.expanduser(path))
- print ""
+ print("")
if os.path.exists(path):
# path exists
if os.access(path, os.W_OK):
# path is writable
path_set = True
- print "Path [%s] exists and you have permission to write to this folder." % path
+ print("Path [%s] exists and you have permission to write to this folder." % path)
else:
- print "Path [%s] exists but you do not have permission to write to this folder." % path
+ print("Path [%s] exists but you do not have permission to write to this folder." % path)
else:
# path does not exist, try to make it
"Path [%s] does not exist so I will attempt to create it" % path
try:
self.makeSurePathExists(path)
- print "Path [%s] has been created and you have permission to write to this folder." % path
+ print("Path [%s] has been created and you have permission to write to this folder." % path)
path_set = True
except Exception:
- print "Unable to make the folder, Error was: %s" % sys.exc_info()[0]
+ print("Unable to make the folder, Error was: %s" % sys.exc_info()[0])
minimal = True
# (re)make the manifest file
- print "(re) creating manifest file (please be patient)."
+ print("(re) creating manifest file (please be patient).")
self.createManifest(path, self.config.values["localManifestName"])
return path
@@ -196,8 +196,8 @@ class DBManager(mm.ManifestManager):
def checkPermissions(self):
"""See if the user has permission to write to the data directory"""
if not os.access(self.config.values["dataRoot"], os.W_OK):
- print "You do not seem to have permission to edit the CheckM data folder"
- print "located at %s" % self.config.values["dataRoot"]
+ print("You do not seem to have permission to edit the CheckM data folder")
+ print("located at %s" % self.config.values["dataRoot"])
return False
return True
--- checkm/coverage.py.orig 2022-03-15 18:25:01 UTC
+++ checkm/coverage.py
@@ -62,7 +62,7 @@ class Coverage():
binId = binIdFromFilename(binFile)
seqs = readFasta(binFile)
- for seqId, seq in seqs.iteritems():
+ for seqId, seq in seqs.items():
seqIdToBinId[seqId] = binId
seqIdToSeqLen[seqId] = len(seq)
@@ -97,12 +97,12 @@ class Coverage():
print(header)
# get length of all seqs
- for bamFile, seqIds in coverageInfo.iteritems():
- for seqId in seqIds.keys():
+ for bamFile, seqIds in coverageInfo.items():
+ for seqId in list(seqIds.keys()):
seqIdToSeqLen[seqId] = seqIds[seqId].seqLen
# write coverage stats for all scaffolds to file
- for seqId, seqLen in seqIdToSeqLen.iteritems():
+ for seqId, seqLen in seqIdToSeqLen.items():
rowStr = seqId + '\t' + seqIdToBinId.get(seqId, DefaultValues.UNBINNED) + '\t' + str(seqLen)
for bamFile in bamFiles:
bamId = binIdFromFilename(bamFile)
@@ -171,7 +171,7 @@ class Coverage():
writeProc.join()
except:
# make sure all processes are terminated
- print traceback.format_exc()
+ print(traceback.format_exc())
for p in workerProc:
p.terminate()
@@ -271,16 +271,16 @@ class Coverage():
if self.logger.getEffectiveLevel() <= logging.INFO:
sys.stderr.write('\n')
- print ''
- print ' # total reads: %d' % totalReads
- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
- print ''
+ print('')
+ print(' # total reads: %d' % totalReads)
+ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
+ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
+ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
+ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
+ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
+ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
+ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
+ print('')
def parseCoverage(self, coverageFile):
"""Read coverage information from file."""
@@ -301,7 +301,7 @@ class Coverage():
if seqId not in coverageStats[binId]:
coverageStats[binId][seqId] = {}
- for i in xrange(3, len(lineSplit), 3):
+ for i in range(3, len(lineSplit), 3):
bamId = lineSplit[i]
coverage = float(lineSplit[i + 1])
coverageStats[binId][seqId][bamId] = coverage
@@ -325,7 +325,7 @@ class Coverage():
# calculate mean coverage (weighted by scaffold length)
# for each bin under each BAM file
- for i in xrange(3, len(lineSplit), 3):
+ for i in range(3, len(lineSplit), 3):
bamId = lineSplit[i]
coverage = float(lineSplit[i + 1])
binCoverages[binId][bamId].append(coverage)
@@ -341,13 +341,13 @@ class Coverage():
profiles = defaultdict(dict)
for binId in binStats:
- for bamId, stats in binStats[binId].iteritems():
+ for bamId, stats in binStats[binId].items():
binLength, meanBinCoverage = stats
coverages = binCoverages[binId][bamId]
varCoverage = 0
if len(coverages) > 1:
- varCoverage = mean(map(lambda x: (x - meanBinCoverage) ** 2, coverages))
+ varCoverage = mean([(x - meanBinCoverage) ** 2 for x in coverages])
profiles[binId][bamId] = [meanBinCoverage, sqrt(varCoverage)]
--- checkm/coverageWindows.py.orig 2022-03-15 18:25:01 UTC
+++ checkm/coverageWindows.py
@@ -188,10 +188,10 @@ class CoverageWindows():
try:
end += windowSize
except:
- print '*****************'
- print end
- print windowSize
- print '******************'
+ print('*****************')
+ print(end)
+ print(windowSize)
+ print('******************')
coverage = float(sum(readLoader.coverage)) / seqLen
@@ -239,13 +239,13 @@ class CoverageWindows():
if self.logger.getEffectiveLevel() <= logging.INFO:
sys.stderr.write('\n')
- print ''
- print ' # total reads: %d' % totalReads
- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
- print ''
+ print('')
+ print(' # total reads: %d' % totalReads)
+ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
+ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
+ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
+ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
+ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
+ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
+ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
+ print('')
--- checkm/manifestManager.py.orig 2022-03-15 18:25:01 UTC
+++ checkm/manifestManager.py
@@ -47,8 +47,8 @@ __MANIFEST__ = ".dmanifest"
# system includes
import os
import hashlib
-import urllib2
-import urllib
+import urllib.request, urllib.error, urllib.parse
+import urllib.request, urllib.parse, urllib.error
import shutil
import errno
@@ -121,15 +121,15 @@ class ManifestManager(object):
source = ""
# first we assume it is remote
try:
- s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
+ s_man = urllib.request.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
source = sourceManifestLocation + "/"
except ValueError:
# then it is probably a file
s_man = open(os.path.join(sourceManifestLocation, sourceManifestName))
source = os.path.join(sourceManifestLocation) + os.path.sep
- except urllib2.URLError:
+ except urllib.error.URLError:
# problems connecting to server, perhaps user is behind a proxy or firewall
- print "Error: failed to connect to server."
+ print("Error: failed to connect to server.")
return (None, None, None, None, None)
first_line = True
@@ -140,11 +140,11 @@ class ManifestManager(object):
# get the type of the manifest
s_type = self.getManType(line)
if s_type != l_type:
- print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type)
+ print("Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type))
return (None, None, None, None, None)
else:
# no type specified
- print "Error: type of source manifest is not specified. Is this a valid manifest file?"
+ print("Error: type of source manifest is not specified. Is this a valid manifest file?")
return (None, None, None, None, None)
self.type = l_type
@@ -174,7 +174,7 @@ class ManifestManager(object):
deleted.append(fields[0])
# check for new files
- for f in source_man.keys():
+ for f in list(source_man.keys()):
if source_man[f][2] == False:
if source_man[f][0] == '-':
addedDirs.append(f)
@@ -190,28 +190,28 @@ class ManifestManager(object):
modified_size += int(source_man[f][1])
if len(addedFiles) > 0:
- print "#------------------------------------------------------"
- print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size))
+ print("#------------------------------------------------------")
+ print("# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size)))
for f in addedFiles:
- print "\t".join([self.formatData(int(source_man[f][1])), f])
+ print("\t".join([self.formatData(int(source_man[f][1])), f]))
if len(addedDirs) > 0:
- print "#------------------------------------------------------"
- print "# Source contains %d new folders(s)" % (len(addedDirs))
+ print("#------------------------------------------------------")
+ print("# Source contains %d new folders(s)" % (len(addedDirs)))
for f in addedDirs:
- print f
+ print(f)
if len(modified) > 0:
- print "#------------------------------------------------------"
- print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size))
+ print("#------------------------------------------------------")
+ print("# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size)))
for f in modified:
- print f
+ print(f)
if len(deleted) > 0:
- print "#------------------------------------------------------"
- print "# %d files have been deleted in the source:" % len(deleted)
+ print("#------------------------------------------------------")
+ print("# %d files have been deleted in the source:" % len(deleted))
for f in deleted:
- print f
+ print(f)
else:
return (source,
[(a, source_man[a]) for a in addedFiles],
@@ -245,13 +245,13 @@ class ManifestManager(object):
for f in modified:
total_size += int(f[1][1])
if total_size != 0:
- print "****************************************************************"
- print "%d new file(s) to be downloaded from source" % len(added_files)
- print "%d existing file(s) to be updated" % len(modified)
- print "%s will need to be downloaded" % self.formatData(total_size)
+ print("****************************************************************")
+ print("%d new file(s) to be downloaded from source" % len(added_files))
+ print("%d existing file(s) to be updated" % len(modified))
+ print("%s will need to be downloaded" % self.formatData(total_size))
do_down = self.promptUserDownload()
if not do_down:
- print "Download aborted"
+ print("Download aborted")
update_manifest = False
if do_down:
@@ -262,13 +262,13 @@ class ManifestManager(object):
self.makeSurePathExists(full_path)
for add in added_files:
full_path = os.path.abspath(os.path.join(localManifestLocation, add[0]))
- urllib.urlretrieve(source+add[0], full_path)
+ urllib.request.urlretrieve(source+add[0], full_path)
for modify in modified:
full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0]))
- urllib.urlretrieve(source+modify[0], full_path)
+ urllib.request.urlretrieve(source+modify[0], full_path)
if update_manifest:
- print "(re) creating manifest file (please be patient)"
+ print("(re) creating manifest file (please be patient)")
self.createManifest(localManifestLocation, manifestName=localManifestName)
return True
@@ -303,19 +303,19 @@ class ManifestManager(object):
input_not_ok = True
minimal=False
valid_responses = {'Y':True,'N':False}
- vrs = ",".join([x.lower() for x in valid_responses.keys()])
+ vrs = ",".join([x.lower() for x in list(valid_responses.keys())])
while(input_not_ok):
if(minimal):
- option = raw_input("Download? ("+vrs+") : ").upper()
+ option = input("Download? ("+vrs+") : ").upper()
else:
- option = raw_input("Confirm you want to download this data\n" \
+ option = input("Confirm you want to download this data\n" \
"Changes *WILL* be permanent\n" \
"Continue? ("+vrs+") : ").upper()
if(option in valid_responses):
- print "****************************************************************"
+ print("****************************************************************")
return valid_responses[option]
else:
- print "ERROR: unrecognised choice '"+option+"'"
+ print("ERROR: unrecognised choice '"+option+"'")
minimal = True
def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__):
--- checkm/taxonParser.py.orig 2022-03-15 18:25:01 UTC
+++ checkm/taxonParser.py
@@ -73,8 +73,8 @@ class TaxonParser():
numMarkers, numMarkerSets = markerSet.size()
pTable.add_row([rank, taxon, markerSet.numGenomes, numMarkers, numMarkerSets])
- print ''
- print pTable.get_string()
+ print('')
+ print(pTable.get_string())
def markerSet(self, rank, taxon, markerFile):
"""Obtain specified taxonomic-specific marker set."""
--- checkm/uniqueMarkers.py.orig 2022-03-15 18:25:01 UTC
+++ checkm/uniqueMarkers.py
@@ -51,7 +51,7 @@ def getOppositeRankSpecificTaxonId(cursor, *args):
query.append(' %s != \'%s\' ' % (ranks[len(args) - 1], args[-1]))
query.append(' %s IS NULL' % ranks[len(args)])
query_string = 'AND'.join(query)
- print query_string
+ print(query_string)
result = cursor.execute('SELECT Id, "Count" FROM taxons WHERE %s' % query_string)
return result.fetchall()
@@ -121,7 +121,7 @@ def doWork(args):
markers_from_others[Id] += count
descriptive_markers = []
- for marker_id, _ in marker_in_taxon_mapping.items():
+ for marker_id, _ in list(marker_in_taxon_mapping.items()):
if marker_id in markers_from_others:
fraction_in_others = float(markers_from_others[marker_id]) / float(others_total_count)
if fraction_in_others <= args.exclude:
@@ -135,7 +135,7 @@ def doWork(args):
des_markers.append(getDescriptiveMarkers(cur, i))
for des_acc, des_name in des_markers:
- print des_acc, des_name
+ print(des_acc, des_name)
if __name__ == '__main__':
--- checkm/util/img.py.orig 2022-03-15 18:25:01 UTC
+++ checkm/util/img.py
@@ -195,7 +195,7 @@ class IMG(object):
genomeIdsOfInterest = set()
for genomeId in metadata:
bKeep = True
- for r in xrange(0, len(searchTaxa)):
+ for r in range(0, len(searchTaxa)):
if taxonStr == 'universal':
bKeep = True
elif taxonStr == 'prokaryotes' and (metadata[genomeId]['taxonomy'][0] == 'Bacteria' or metadata[genomeId]['taxonomy'][0] == 'Archaea'):
@@ -222,8 +222,8 @@ class IMG(object):
def lineageStats(self, metadata, mostSpecificRank):
stats = {}
- for r in xrange(0, mostSpecificRank + 1):
- for _, data in metadata.iteritems():
+ for r in range(0, mostSpecificRank + 1):
+ for _, data in metadata.items():
taxaStr = ';'.join(data['taxonomy'][0:r + 1])
stats[taxaStr] = stats.get(taxaStr, 0) + 1
@@ -231,9 +231,9 @@ class IMG(object):
def lineagesSorted(self, metadata, mostSpecificRank=6):
lineages = []
- for r in xrange(0, mostSpecificRank + 1):
+ for r in range(0, mostSpecificRank + 1):
taxa = set()
- for _, data in metadata.iteritems():
+ for _, data in metadata.items():
if 'unclassified' not in data['taxonomy'][0:r + 1]:
taxa.add(';'.join(data['taxonomy'][0:r + 1]))
@@ -274,7 +274,7 @@ class IMG(object):
geneIdToFamilyIds[geneId].add(clusterId)
count[clusterId] = count.get(clusterId, 0) + 1
- for clusterId, c in count.iteritems():
+ for clusterId, c in count.items():
if clusterId not in table:
table[clusterId] = {}
table[clusterId][genomeId] = c
@@ -288,7 +288,7 @@ class IMG(object):
def filterGeneCountTable(self, genomeIds, table, ubiquityThreshold=0.9, singleCopyThreshold=0.9):
idsToFilter = []
- for pfamId, genomeCounts in table.iteritems():
+ for pfamId, genomeCounts in table.items():
ubiquity = 0
singleCopy = 0
for genomeId in genomeIds:
@@ -342,7 +342,7 @@ class IMG(object):
# are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
# with GFF entries are considered.
familyIdToScaffoldIds = {}
- for pfamId, geneIds in pfamIdToGeneIds.iteritems():
+ for pfamId, geneIds in pfamIdToGeneIds.items():
scaffolds = []
for geneId in geneIds:
scaffold = genePosition.get(geneId, None)
@@ -352,7 +352,7 @@ class IMG(object):
if scaffolds:
familyIdToScaffoldIds[pfamId] = scaffolds
- for tigrId, geneIds in tigrIdToGeneIds.iteritems():
+ for tigrId, geneIds in tigrIdToGeneIds.items():
scaffolds = []
for geneId in geneIds:
scaffold = genePosition.get(geneId, None)
@@ -362,9 +362,9 @@ class IMG(object):
if scaffold:
familyIdToScaffoldIds[tigrId] = scaffolds
except:
- print '[BUG]: __genomeIdToClusterScaffold'
- print sys.exc_info()[0]
- print genomeId, geneId, tigrId, pfamId
+ print('[BUG]: __genomeIdToClusterScaffold')
+ print(sys.exc_info()[0])
+ print(genomeId, geneId, tigrId, pfamId)
sys.exit()
return familyIdToScaffoldIds
@@ -400,7 +400,7 @@ class IMG(object):
seqs = readFasta(genomeFile)
seqLens = {}
- for seqId, seq in seqs.iteritems():
+ for seqId, seq in seqs.items():
seqLens[seqId] = len(seq)
return seqLens
@@ -462,7 +462,7 @@ class IMG(object):
# are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
# with GFF entries are considered.
familyIdToGenomePositions = {}
- for pfamId, geneIds in pfamIdToGeneIds.iteritems():
+ for pfamId, geneIds in pfamIdToGeneIds.items():
positions = []
for geneId in geneIds:
position = genePosition.get(geneId, None)
@@ -472,7 +472,7 @@ class IMG(object):
if positions:
familyIdToGenomePositions[pfamId] = positions
- for tigrId, geneIds in tigrIdToGeneIds.iteritems():
+ for tigrId, geneIds in tigrIdToGeneIds.items():
positions = []
for geneId in geneIds:
position = genePosition.get(geneId, None)
@@ -482,9 +482,9 @@ class IMG(object):
if positions:
familyIdToGenomePositions[tigrId] = positions
except:
- print '[BUG]: __genomeFamilyPositions'
- print sys.exc_info()[0]
- print genomeId, geneId, tigrId, pfamId
+ print('[BUG]: __genomeFamilyPositions')
+ print(sys.exc_info()[0])
+ print(genomeId, geneId, tigrId, pfamId)
sys.exit()
return familyIdToGenomePositions