python 3 compatibility, flake8 setup, "quotes" -> 'quotes'

ccp4 · Nov 19, 2018 · c4e3e55 · c4e3e55
1 parent 966ce7f
commit c4e3e55
Show file tree

Hide file tree

Showing 12 changed files with 689 additions and 654 deletions.
diff --git a/__init__.py b/__init__.py
@@ -1,5 +1,5 @@
 
 import sys
-if sys.version_info[:2] != (2, 7):
-    sys.stderr.write("Error. Python 2.7 is required.\n")
+if sys.version_info[:2] != (2, 7) and sys.version_info[:2] < (3, 5):
+    sys.stderr.write('Error. Python 2.7 or 3.5+ is required.\n')
     sys.exit(1)
diff --git a/ccp4i-dimple.py b/ccp4i-dimple.py
@@ -36,9 +36,9 @@ def add_blob(n):
 opt = parse_dimple_commands(sys.argv[1:])
 jobdir = os.path.abspath(opt.output_dir)
 if not os.path.exists(jobdir):
-    os.makedirs(jobdir) # it would be created by dimple, but too late
+    os.makedirs(jobdir)  # it would be created by dimple, but too late
 
-print "_JOB_DIRECTORY:", jobdir
+print("_JOB_DIRECTORY: %s" % jobdir)
 sys.stdout.flush()
 
 input_files = qtrapi.Files()

diff --git a/cell.py b/cell.py
@@ -31,8 +31,8 @@ def angle(x):
             if x == 90.: return '90'
             else:        return '%.2f' % x
         return '%.2f, %.2f, %.2f,  %s, %s, %s' % (
-                self.a, self.b, self.c,
-                angle(self.alpha), angle(self.beta), angle(self.gamma))
+               self.a, self.b, self.c,
+               angle(self.alpha), angle(self.beta), angle(self.gamma))
 
     def __str__(self):
         return '%-12s (%s)' % (self.symmetry, self.parameters_as_str())
@@ -105,7 +105,7 @@ def unscrewed_symmetry(self):
         return ' '.join(a[0] for a in self.symmetry.split())
 
 class Mat3(object):
-    "Matrix 3x3"
+    'Matrix 3x3'
     def __init__(self, *args):
         if len(args) == 1:
             self.m = tuple(args[0])
@@ -116,16 +116,16 @@ def __init__(self, *args):
     def __getitem__(self, index):
         return self.m[index]
     def __str__(self):
-        return "[%g %g %g; %g %g %g; %g %g %g]" % self.m
+        return '[%g %g %g; %g %g %g; %g %g %g]' % self.m
     def __repr__(self):
-        return "Mat3" + str(self.m)
+        return 'Mat3' + str(self.m)
 
     def __add__(self, other):
         assert isinstance(other, Mat3)
-        return Mat3(a+b for a,b in zip(self.m, other.m))
+        return Mat3(a+b for a, b in zip(self.m, other.m))
     def __sub__(self, other):
         assert isinstance(other, Mat3)
-        return Mat3(a-b for a,b in zip(self.m, other.m))
+        return Mat3(a-b for a, b in zip(self.m, other.m))
     # scalar must be float
     def __mul__(self, scalar):
         assert isinstance(scalar, float)
@@ -137,7 +137,6 @@ def identity():
                     0, 1, 0,
                     0, 0, 1)
 
-
     def transpose(self):
         m = self.m
         return Mat3(m[0], m[3], m[6],
@@ -164,7 +163,7 @@ def trace(self):
     def inverse(self):
         d = self.det()
         if d == 0:
-            raise ValueError("Matrix is not invertible")
+            raise ValueError('Matrix is not invertible')
         m = self.m
         return Mat3(( m[4] * m[8] - m[5] * m[7]) / d,
                     (-m[1] * m[8] + m[2] * m[7]) / d,
@@ -212,7 +211,6 @@ def calculate_difference(meta1, meta2):
         return sys.float_info.max
     if match is False:
         return sys.float_info.max / 2
-    #return sum(abs(a-b) for a,b in zip(meta1.cell, meta2.cell))
     return meta1.to_standard().max_shift_in_mapping(meta2.to_standard())
 
 # space group utils
@@ -223,7 +221,7 @@ def match_symmetry(meta1, meta2):
     def sig(sym):
         first_chars = [a[0] for a in sym.split()]
         s = first_chars[0] + ''.join(sorted(first_chars[1:]))
-        if s == 'I112': # I2 is equivalent to C2
+        if s == 'I112':  # I2 is equivalent to C2
             return 'C112'
         # note: I see names such as 'H 3' used in ccp4, never 'R 3'
         if s[0] == 'R':
@@ -246,7 +244,7 @@ def sig(sym):
              '622': 12,
              '23': 12,
              '432': 24,
-            }
+             }
 
 
 # the list of space group names extracted from symop.lib with this script:
@@ -261,7 +259,7 @@ def sig(sym):
 #         sg = sgtbx.space_group(sgs.hall())
 #         if sg.is_chiral():
 #             print '"%s": "%s",' % (fields[3], spacegroups[0])
-_short_spg_names = {
+_short_spg_names = {  # noqa: E122
 "P1": "P 1", "P2": "P 1 2 1", "P21": "P 1 21 1", "C2": "C 1 2 1",
 "P222": "P 2 2 2", "P2221": "P 2 2 21", "P21212": "P 21 21 2",
 "P212121": "P 21 21 21", "C2221": "C 2 2 21", "C222": "C 2 2 2",
@@ -290,4 +288,3 @@ def sig(sym):
 def calculate_z_order(hm):
     pg = ''.join(a[0] for a in hm.split()[1:])
     return _centering_n[hm[0]] * _pg_symop[pg]
-
diff --git a/contaminants/prepare.py b/contaminants/prepare.py
@@ -1,9 +1,10 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Script that generates data.py - list of unit cells of contaminants
 """
 
+from __future__ import print_function
 import csv
 import hashlib
 import itertools
@@ -12,7 +13,10 @@
 import os
 import re
 import sys
-import urllib2
+try:
+    from urllib2 import urlopen  # Python 2
+except ImportError:
+    from urllib.request import urlopen  # Python 3
 from collections import OrderedDict
 import xml.etree.ElementTree as ET
 import numpy
@@ -25,8 +29,8 @@
 
 if __name__ == '__main__' and __package__ is None:
     assert os.path.basename(os.getcwd()) == 'contaminants'
-    sys.path.insert(1,
-          os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+    sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__),
+                                                    '..', '..')))
 import dimple.cell
 
 WIKI_URL = ('https://raw.githubusercontent.com/wiki/'
@@ -52,8 +56,8 @@ def cached_urlopen(url, cache_name):
     if not os.path.exists(path):
         if not os.path.isdir(CACHE_DIR):
             os.mkdir(CACHE_DIR)
-        print '--> %s' % path
-        response = urllib2.urlopen(url)
+        print('--> %s' % path)
+        response = urlopen(url)
         with open(path, 'wb') as f:
             f.write(response.read())
     return open(path)
@@ -113,7 +117,7 @@ def fetch_pdb_info_from_ebi(pdb_id):
     if not any(m in single_crystal_methods for m in exper_method):
         return None
     forms = set(a['form'] for a in summary['assemblies'])
-    assert forms.issubset({'homo', 'hetero'}) # some are both (2EKS)
+    assert forms.issubset({'homo', 'hetero'})  # some are both (2EKS)
     if forms != {'homo'}:
         return None
     entities = summary['number_of_entities']
@@ -132,11 +136,11 @@ def fetch_pdb_info_from_ebi(pdb_id):
     for alt_sum in summaries[1:]:
         assert [alt_sum['cell'][x] for x in par_names] == parameters
         assert alt_sum['spacegroup'] == sg
-    if sg == 'A 1': # pesky 1LKS
+    if sg == 'A 1':  # pesky 1LKS
         return None
     # Correct spacegroup, but in non-standard settings.
     # We could convert it to the reference/standard settings.
-    if sg in ['P 1 1 21']: # only this one for now
+    if sg in ['P 1 1 21']:  # only this one for now
         return None
 
     # Standarize unit cell settings (e.g. 3wai, 1y6e).
@@ -164,13 +168,12 @@ def dump_uniprot_tab(pdb_ids):
                  '?query=database:(type:pdb%%20%s)&sort=score&format=tab')
     pdb_ids = [x.strip(',') for x in sys.stdin.read().split()]
     assert all(len(x) == 4 for x in pdb_ids)
-    print >>sys.stderr, 'Read %d IDs' % len(pdb_ids)
+    print('Read %d IDs' % len(pdb_ids), file=sys.stderr)
     for pdb_id in pdb_ids:
         response = cached_urlopen(query_url % pdb_id, 'up-pdb-%s.tab' % pdb_id)
         text = response.read()
-        print pdb_id
-        print text
-        print
+        print(pdb_id)
+        print(text + '\n')
 
 def uniprot_names_to_acs(names):
     query_url = ('http://www.uniprot.org/uniprot/'
@@ -207,13 +210,13 @@ def fetch_uniref_clusters(acs, verbose=False):
                 continue
             #if key != 'UniRef100_P02931': continue
             if verbose:
-                print '%s -> %s (%s) - %d members, %d residues' % (
-                        ac, key, name, len(members), int(d['Length']))
+                print('%s -> %s (%s) - %d members, %d residues' % (
+                      ac, key, name, len(members), int(d['Length'])))
             if key in clusters:
                 sys.exit('Duplicated cluster: ' + key)
             clusters[key] = members
         if reader.line_num == 0:
-            print 'Not found in UniRef:', ac
+            print('Not found in UniRef:', ac)
             clusters['_' + ac] = [ac]  # we still want to keep this AC
     return clusters
 
@@ -222,7 +225,7 @@ def read_current_pdb_entries():
                        'current-rcsb.xml')
     root = ET.parse(f).getroot()
     entries = set(child.attrib['structureId'] for child in root)
-    print 'Current PDB entries: %d' % len(entries)
+    print('Current PDB entries:', len(entries))
     return entries
 
 def read_sifts_mapping():
@@ -264,8 +267,8 @@ def cluster_by_cell_size_scipy(cells):
     # matrix like from scipy.spatial.distance.pdist(..., metric=func)
     dist_matrix = numpy.zeros(n * (n - 1) // 2, dtype=numpy.double)
     k = 0
-    for i in xrange(0, n-1):
-        for j in xrange(i+1, n):
+    for i in range(0, n-1):
+        for j in range(i+1, n):
             dist_matrix[k] = cells[i].max_shift_in_mapping(cells[j])
             k += 1
     assert k == len(dist_matrix)
@@ -291,7 +294,6 @@ def get_representative_unit_cell(cells):
     def med_metric(c):
         return max(cell_distance(c, other) for other in cells)
     best = min(cells[:cutoff], key=med_metric)
-    #print '===>', med_metric(best)
     return best
 
 def write_output_file(representants):
@@ -319,7 +321,7 @@ def write_json_file(uniprot_acs, extra_info, representants):
         leaf = {'name': leaf_name}
         group.setdefault(ref.symmetry, []).append(leaf)
     data = {'name': '', 'children': []}
-    for ac, v in uniprot_acs.iteritems():
+    for ac, v in uniprot_acs.items():
         item = {'name': v, 'ac': ac, 'uniref': extra_info[v][0],
                 'desc': extra_info[v][1]}
         if v in groups:
@@ -331,48 +333,48 @@ def write_json_file(uniprot_acs, extra_info, representants):
 
 def main(verbose=False):
     page = cached_urlopen(WIKI_URL, -1).readlines()
-    parsed_page = parse_wiki_page(page) # { UniProt name: [some PDB IDs] }
-    uniprot_acs = uniprot_names_to_acs(parsed_page) # { AC: UniProt name }
+    parsed_page = parse_wiki_page(page)  # { UniProt name: [some PDB IDs] }
+    uniprot_acs = uniprot_names_to_acs(parsed_page)  # { AC: UniProt name }
     pdb2up = read_sifts_mapping()
     descriptions_from_wiki = {}
 
     # check for missing PDB IDs that were given explicitely in the wiki
-    for u, (desc, pp) in parsed_page.iteritems():
+    for u, (desc, pp) in parsed_page.items():
         descriptions_from_wiki[u] = desc
         for p in pp:
             if p not in pdb2up:
-                print 'Missing in SIFTS: %s' % p
-                pdb2up[p] = [k for k, v in uniprot_acs.iteritems() if v == u]
+                print('Missing in SIFTS:', p)
+                pdb2up[p] = [k for k, v in uniprot_acs.items() if v == u]
 
     # check for deprecated/removed PDB entries
     current_pdb_entries = read_current_pdb_entries()
     obsolete = [k for k in pdb2up if k not in current_pdb_entries]
-    print 'Obsolete PDB entries in SIFTS:', len(obsolete)
+    print('Obsolete PDB entries in SIFTS:', len(obsolete))
     for k in obsolete:
         del pdb2up[k]
 
     # reverse the PDB->UniProt mapping
     ac2pdb = {}
-    for p, acs in pdb2up.iteritems():
-        if len(acs) == 1: # don't care about heteromers
+    for p, acs in pdb2up.items():
+        if len(acs) == 1:  # don't care about heteromers
             ac2pdb.setdefault(acs[0], []).append(p)
 
     uniref_clusters = fetch_uniref_clusters(uniprot_acs, verbose)
 
     # mapping back uniref id to uniprot name that was used an input
     uniref_sources = {}
-    for uniref_name, uclust in uniref_clusters.iteritems():
+    for uniref_name, uclust in uniref_clusters.items():
         sources = [uniprot_acs[ac] for ac in uclust if ac in uniprot_acs]
         assert len(sources) == 1, sources
         uniref_sources[uniref_name] = sources[0]
 
     representants = []
     empty_cnt = 0
-    for uniref_name, uclust in uniref_clusters.iteritems():
+    for uniref_name, uclust in uniref_clusters.items():
         pdb_set = sum([ac2pdb[ac] for ac in uclust if ac in ac2pdb], [])
-        print '%s -> %s (%d entries) -> %d PDBs' % (
-                uniref_sources[uniref_name], uniref_name,
-                len(uclust), len(pdb_set))
+        print('%s -> %s (%d entries) -> %d PDBs' % (
+              uniref_sources[uniref_name], uniref_name,
+              len(uclust), len(pdb_set)))
         cells = []
         for pdb_id in pdb_set:
             cell = fetch_pdb_info_from_ebi(pdb_id)
@@ -391,35 +393,35 @@ def main(verbose=False):
             r.uniprot_name = uniref_sources[uniref_name]
             r.comment = uniref_name
             if verbose:
-                print ' '.join(p.pdb_id for p in pdb_cluster), '=>', r.pdb_id
+                print(' '.join(p.pdb_id for p in pdb_cluster), '=>', r.pdb_id)
                 print('\t%s %-9s (%6.2f %6.2f %6.2f %5.1f %5.1f %5.1f) %8.2f'
                       % ((r.pdb_id, r.symmetry) + r.cell + (r.quality,)))
             representants.append(r)
         if len(representants) == prev_len:
             empty_cnt += 1
         if not verbose:
-            print '\t-> %d' % (len(representants) - prev_len)
+            print('\t-> %d' % (len(representants) - prev_len))
     representants.sort(key=lambda x: x.a)
     write_output_file(representants)
-    print '%d (incl. %d absent) proteins -> %d unique unit cells -> %s' % (
-            len(uniprot_acs), empty_cnt, len(representants), OUTPUT_PY_FILE)
+    print('%d (incl. %d absent) proteins -> %d unique unit cells -> %s' % (
+          len(uniprot_acs), empty_cnt, len(representants), OUTPUT_PY_FILE))
     write_json_file(uniprot_acs,
                     dict((v, (k, descriptions_from_wiki[v]))
                          for k, v in uniref_sources.items()),
                     representants)
     a, b = min(itertools.combinations(representants, 2),
                key=lambda x: cell_distance(*x))
-    print 'Min. dist: %.2f%% between %s and %s' % (100*cell_distance(a, b),
-                                                   a.pdb_id, b.pdb_id)
+    print('Min. dist: %.2f%% between %s and %s' % (
+          100*cell_distance(a, b), a.pdb_id, b.pdb_id))
     contaminer_acs = fetch_contaminer_protein_acs()
     for ac in contaminer_acs:
         if not any(ac in uclust for uclust in uniref_clusters.values()):
-            print 'Only in ContaBase: %s %s' % (ac, contaminer_acs[ac])
-    for uniref_name, uclust in uniref_clusters.iteritems():
-        if len(set(uclust).intersection(contaminer_acs.viewkeys())) == 0:
+            print('Only in ContaBase:', ac, contaminer_acs[ac])
+    for uniref_name, uclust in uniref_clusters.items():
+        if len(set(uclust).intersection(contaminer_acs.keys())) == 0:
             src = uniref_sources[uniref_name]
-            if src not in NOT_IN_CONTABASE: # these are not news
-                print 'Not in ContaBase:', src
+            if src not in NOT_IN_CONTABASE:  # these are not news
+                print('Not in ContaBase:', src)
 
 if __name__ == '__main__':
     verbose = ('-v' in sys.argv[1:])