1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
--- a/centrifuge_evaluate_mason.py 2023-04-14 21:29:29.482568396 +0530
+++ b/centrifuge_evaluate_mason.py 2023-04-14 22:05:44.988504275 +0530
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
import sys, os, subprocess, inspect
import platform, multiprocessing
@@ -27,7 +27,7 @@
higher_ranked = {}
ancestors = set()
- for tax_id in taxonomy_tree.keys():
+ for tax_id in list(taxonomy_tree.keys()):
if tax_id in ancestors:
continue
while True:
@@ -82,7 +82,7 @@
fields = line.strip().split('\t')
if len(fields) != 3:
- print >> sys.stderr, "Warning: %s missing" % (line.strip())
+ print("Warning: %s missing" % (line.strip()), file=sys.stderr)
continue
read_name, tax_id = fields[1:3]
# Traverse up taxonomy tree to match the given rank parameter
@@ -117,7 +117,7 @@
# print read_name
raw_unique_classified = 0
- for read_name, maps in db_dic.items():
+ for read_name, maps in list(db_dic.items()):
if len(maps) == 1 and read_name not in higher_ranked:
raw_unique_classified += 1
return classified, unique_classified, unclassified, len(db_dic), raw_unique_classified
@@ -184,7 +184,7 @@
read_fname]
if verbose:
- print >> sys.stderr, ' '.join(centrifuge_cmd)
+ print(' '.join(centrifuge_cmd), file=sys.stderr)
out_fname = "centrifuge.output"
proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
@@ -208,12 +208,12 @@
# if rank == "strain":
# assert num_cases == num_fragment
- print >> sys.stderr, "\t\t%s" % rank
- print >> sys.stderr, "\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases)
- print >> sys.stderr, "\t\t\tprecision : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified)
- print >> sys.stderr, "\n\t\t\tfor uniquely classified "
- print >> sys.stderr, "\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases)
- print >> sys.stderr, "\t\t\t\t\tprecision : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified)
+ print("\t\t%s" % rank, file=sys.stderr)
+ print("\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases), file=sys.stderr)
+ print("\t\t\tprecision : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified), file=sys.stderr)
+ print("\n\t\t\tfor uniquely classified ", file=sys.stderr)
+ print("\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases), file=sys.stderr)
+ print("\t\t\t\t\tprecision : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified), file=sys.stderr)
# Calculate sum of squared residuals in abundance
"""
@@ -252,12 +252,12 @@
if rank_taxID not in true_abundance:
true_abundance[rank_taxID] = 0.0
true_abundance[rank_taxID] += (reads / float(genomeSize))
- for taxID, reads in true_abundance.items():
+ for taxID, reads in list(true_abundance.items()):
true_abundance[taxID] /= total_sum
- print >> sys.stderr, "number of genomes:", num_genomes
- print >> sys.stderr, "number of species:", num_species
- print >> sys.stderr, "number of uniq species:", len(true_abundance)
+ print("number of genomes:", num_genomes, file=sys.stderr)
+ print("number of species:", num_species, file=sys.stderr)
+ print("number of uniq species:", len(true_abundance), file=sys.stderr)
read_fname = "centrifuge_data/bacteria_sim10M/bacteria_sim10M.fa"
summary_fname = "centrifuge.summary"
@@ -271,14 +271,14 @@
read_fname]
if verbose:
- print >> sys.stderr, ' '.join(centrifuge_cmd)
+ print(' '.join(centrifuge_cmd), file=sys.stderr)
out_fname = "centrifuge.output"
proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
proc.communicate()
calc_abundance = {}
- for taxID in true_abundance.keys():
+ for taxID in list(true_abundance.keys()):
calc_abundance[taxID] = 0.0
first = True
for line in open(summary_fname):
@@ -296,12 +296,12 @@
"""
abundance_file = open("abundance.cmp", 'w')
- print >> abundance_file, "taxID\ttrue\tcalc\trank"
+ print("taxID\ttrue\tcalc\trank", file=abundance_file)
for rank in ranks:
if rank == "strain":
continue
true_abundance_rank, calc_abundance_rank = {}, {}
- for taxID in true_abundance.keys():
+ for taxID in list(true_abundance.keys()):
assert taxID in calc_abundance
rank_taxID = taxID
while True:
@@ -322,11 +322,11 @@
calc_abundance_rank[rank_taxID] += calc_abundance[taxID]
ssr = 0.0 # Sum of Squared Residuals
- for taxID in true_abundance_rank.keys():
+ for taxID in list(true_abundance_rank.keys()):
assert taxID in calc_abundance_rank
ssr += (true_abundance_rank[taxID] - calc_abundance_rank[taxID]) ** 2
- print >> abundance_file, "%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank)
- print >> sys.stderr, "%s) Sum of squared residuals: %.6f" % (rank, ssr)
+ print("%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank), file=abundance_file)
+ print("%s) Sum of squared residuals: %.6f" % (rank, ssr), file=sys.stderr)
abundance_file.close()
|