centrifuge_evaluate_mason.py.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

--- a/centrifuge_evaluate_mason.py	2023-04-14 21:29:29.482568396 +0530
+++ b/centrifuge_evaluate_mason.py	2023-04-14 22:05:44.988504275 +0530
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, subprocess, inspect
 import platform, multiprocessing
@@ -27,7 +27,7 @@
     higher_ranked = {}
         
     ancestors = set()
-    for tax_id in taxonomy_tree.keys():
+    for tax_id in list(taxonomy_tree.keys()):
         if tax_id in ancestors:
             continue
         while True:
@@ -82,7 +82,7 @@
 
         fields = line.strip().split('\t')
         if len(fields) != 3:
-            print >> sys.stderr, "Warning: %s missing" % (line.strip())
+            print("Warning: %s missing" % (line.strip()), file=sys.stderr)
             continue
         read_name, tax_id = fields[1:3] 
         # Traverse up taxonomy tree to match the given rank parameter
@@ -117,7 +117,7 @@
             # print read_name
 
     raw_unique_classified = 0
-    for read_name, maps in db_dic.items():
+    for read_name, maps in list(db_dic.items()):
         if len(maps) == 1 and read_name not in higher_ranked:
             raw_unique_classified += 1
     return classified, unique_classified, unclassified, len(db_dic), raw_unique_classified
@@ -184,7 +184,7 @@
                       read_fname]
 
     if verbose:
-        print >> sys.stderr, ' '.join(centrifuge_cmd)
+        print(' '.join(centrifuge_cmd), file=sys.stderr)
 
     out_fname = "centrifuge.output"
     proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
@@ -208,12 +208,12 @@
         # if rank == "strain":
         #    assert num_cases == num_fragment
 
-        print >> sys.stderr, "\t\t%s" % rank
-        print >> sys.stderr, "\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases)
-        print >> sys.stderr, "\t\t\tprecision  : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified)
-        print >> sys.stderr, "\n\t\t\tfor uniquely classified "
-        print >> sys.stderr, "\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases)
-        print >> sys.stderr, "\t\t\t\t\tprecision  : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified)
+        print("\t\t%s" % rank, file=sys.stderr)
+        print("\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases), file=sys.stderr)
+        print("\t\t\tprecision  : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified), file=sys.stderr)
+        print("\n\t\t\tfor uniquely classified ", file=sys.stderr)
+        print("\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases), file=sys.stderr)
+        print("\t\t\t\t\tprecision  : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified), file=sys.stderr)
 
         # Calculate sum of squared residuals in abundance
         """
@@ -252,12 +252,12 @@
         if rank_taxID not in true_abundance:
             true_abundance[rank_taxID] = 0.0
         true_abundance[rank_taxID] += (reads / float(genomeSize))
-    for taxID, reads in true_abundance.items():
+    for taxID, reads in list(true_abundance.items()):
         true_abundance[taxID] /= total_sum
 
-    print >> sys.stderr, "number of genomes:", num_genomes
-    print >> sys.stderr, "number of species:", num_species
-    print >> sys.stderr, "number of uniq species:", len(true_abundance)
+    print("number of genomes:", num_genomes, file=sys.stderr)
+    print("number of species:", num_species, file=sys.stderr)
+    print("number of uniq species:", len(true_abundance), file=sys.stderr)
 
     read_fname = "centrifuge_data/bacteria_sim10M/bacteria_sim10M.fa"
     summary_fname = "centrifuge.summary"
@@ -271,14 +271,14 @@
                       read_fname]
 
     if verbose:
-        print >> sys.stderr, ' '.join(centrifuge_cmd)
+        print(' '.join(centrifuge_cmd), file=sys.stderr)
 
     out_fname = "centrifuge.output"
     proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
     proc.communicate()
 
     calc_abundance = {}
-    for taxID in true_abundance.keys():
+    for taxID in list(true_abundance.keys()):
         calc_abundance[taxID] = 0.0
     first = True
     for line in open(summary_fname):
@@ -296,12 +296,12 @@
         """
 
     abundance_file = open("abundance.cmp", 'w')
-    print >> abundance_file, "taxID\ttrue\tcalc\trank"
+    print("taxID\ttrue\tcalc\trank", file=abundance_file)
     for rank in ranks:
         if rank == "strain":
             continue
         true_abundance_rank, calc_abundance_rank = {}, {}
-        for taxID in true_abundance.keys():
+        for taxID in list(true_abundance.keys()):
             assert taxID in calc_abundance
             rank_taxID = taxID
             while True:
@@ -322,11 +322,11 @@
             calc_abundance_rank[rank_taxID] += calc_abundance[taxID]
 
         ssr = 0.0 # Sum of Squared Residuals
-        for taxID in true_abundance_rank.keys():
+        for taxID in list(true_abundance_rank.keys()):
             assert taxID in calc_abundance_rank
             ssr += (true_abundance_rank[taxID] - calc_abundance_rank[taxID]) ** 2
-            print >> abundance_file, "%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank)
-        print >> sys.stderr, "%s) Sum of squared residuals: %.6f" % (rank, ssr)
+            print("%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank), file=abundance_file)
+        print("%s) Sum of squared residuals: %.6f" % (rank, ssr), file=sys.stderr)
     abundance_file.close()