From 1d3f40f90737bfc6fc19f034c02669acb562bbdd Mon Sep 17 00:00:00 2001 From: Matthew Pitkin Date: Tue, 21 Nov 2017 16:49:15 +0000 Subject: [PATCH 1/6] Various updates in particular adding to the Pulsar and Pulsars class - still needs testing though, refs #2 --- psrqpy/__init__.py | 7 +- psrqpy/config.py | 16 +-- psrqpy/search.py | 303 +++++++++++++++++++++++---------------------- psrqpy/utils.py | 2 +- 4 files changed, 168 insertions(+), 160 deletions(-) diff --git a/psrqpy/__init__.py b/psrqpy/__init__.py index 0d4d6e1a..cb9a828c 100644 --- a/psrqpy/__init__.py +++ b/psrqpy/__init__.py @@ -2,7 +2,8 @@ """ A Python tool for interacting with the ATNF pulsar catalogue """ -__version__ = "0.1.2" +__version__ = "0.2.1" -from .search import QueryATNF, Pulsar -from .utils import * \ No newline at end of file +from .search import QueryATNF +from .pulsar import Pulsar, Pulsars +from .utils import * diff --git a/psrqpy/config.py b/psrqpy/config.py index 077f2d76..49caa884 100644 --- a/psrqpy/config.py +++ b/psrqpy/config.py @@ -8,13 +8,13 @@ ATNF_BASE_URL = r'http://www.atnf.csiro.au/people/pulsar/psrcat/' ATNF_URL = ATNF_BASE_URL + r'proc_form.php?version={version}' -PARAMS_QUERY = '{params}' -USERDEFINED_QUERY = '&startUserDefined=true&c1_val=&c2_val=&c3_val=&c4_val=' -CONDITION_QUERY = '&condition={condition}' -PSRNAMES_QUERY = '&pulsar_names={psrnames}' -SORT_QUERY = '&sort_attr={sortattr}&sort_order={sortorder}' -EPHEMERIS_QUERY = '&submit_ephemeris={getephemeris}' -QUERY_FLUFF = '&ephemeris=long&coords_unit=raj%2Fdecj&radius=&coords_1=&coords_2=&style=Long+with+errors&no_value=*&nohead=nohead&state=query&table_bottom.x=30&table_bottom.y=22' +PARAMS_QUERY = r'{params}' +USERDEFINED_QUERY = r'&startUserDefined=true&c1_val=&c2_val=&c3_val=&c4_val=' +CONDITION_QUERY = r'&condition={condition}' +PSRNAMES_QUERY = r'&pulsar_names={psrnames}' +SORT_QUERY = r'&sort_attr={sortattr}&sort_order={sortorder}' +EPHEMERIS_QUERY = r'&submit_ephemeris={getephemeris}' +QUERY_FLUFF = r'&ephemeris=long&coords_unit=raj%2Fdecj&radius=&coords_1=&coords_2=&style=Long+with+errors&no_value=*&nohead=nohead&state=query&table_bottom.x=30&table_bottom.y=22' QUERY_URL = ATNF_URL + PARAMS_QUERY + USERDEFINED_QUERY + SORT_QUERY + CONDITION_QUERY + PSRNAMES_QUERY + EPHEMERIS_QUERY + QUERY_FLUFF @@ -33,7 +33,7 @@ 'PMRA': {'ref': True, 'err': True, 'units': 'mas/yr', 'format': 'f8'}, # Proper motion in the right ascension direction (mas/yr) 'PMDEC': {'ref': True, 'err': True, 'units': 'mas/yr', 'format': 'f8'}, # Proper motion in declination (mas/yr) 'PX': {'ref': True, 'err': True, 'units': 'mas', 'format': 'f8'}, # Annual parallax (mas) - 'POSEPOCH': {'ref': True, 'err': False, 'units': 'd', 'format': 'f8'}, # Epoch of position, defaults to PEpoch (MJD) + 'POSEPOCH': {'ref': True, 'err': False, 'units': 'd', 'format': 'f8'}, # Epoch of position, defaults to PEpoch (MJD) 'ELONG': {'ref': True, 'err': True, 'units': 'deg', 'format': 'f8'}, # Ecliptic longitude (degrees) 'ELAT': {'ref': True, 'err': True, 'units': 'deg', 'format': 'f8'}, # Ecliptic latitude (degrees) 'PMELONG': {'ref': True, 'err': True, 'units': 'mas/yr', 'format': 'f8'}, # Proper motion in the ecliptic longitude direction (mas/yr) diff --git a/psrqpy/search.py b/psrqpy/search.py index 0cf9c250..6aadbd27 100644 --- a/psrqpy/search.py +++ b/psrqpy/search.py @@ -2,7 +2,7 @@ Search query """ -from __future__ import print_function +from __future__ import print_function, division import warnings from collections import OrderedDict @@ -16,85 +16,6 @@ from .config import * from .utils import * - -class Pulsar(object): - """ - An object to hold a single pulsar - """ - - def __init__(self, psrname, version=None, **kwargs): - """ - Set object attributes from kwargs - """ - - self._name = psrname - self._raw = kwargs - self._version = version if not version else get_version() - - for key, value in six.iteritems(kwargs): - setattr(self, key, value) - - def keys(self): - return self._raw.keys() - - def items(self): - return self._raw.items() - - @property - def name(self): - """ - Return the pulsar name - """ - - return self._name - - def __getitem__(self, key): - """ - If the class has a attribute given by the key then return it, otherwise generate a - query for that key to set it - """ - - ukey = key.upper() - pulsarname = self.name - - if hasattr(self, ukey): - param = getattr(self, ukey) - else: - if ukey[-4:] == '_ERR': # an error parameter - tkey = ukey[:-4] # parameter name without error - else: - tkey = ukey - - if tkey not in PSR_ALL_PARS: - raise Exception('"{}" is not a recognised pulsar parameter'.format(tkey)) - else: - # generate a query for the key and add it - try: - q = QueryATNF(params=tkey, psrs=pulsarname, version=self._version, include_errs=True) - except IOError: - raise Exception('Problem querying ATNF catalogue') - - if q.num_pulsars != 1: - raise Exception('Problem getting parameter "{}"'.format(tkey)) - - param = q.get_dict()[ukey][0] # required output parameter - setattr(self, ukey, param) # set output parameter value - - # set parameter value if an error value was requested - if PSR_ALL[tkey]['err']: - if tkey != ukey: # asking for error, so set actual value - setattr(self, tkey, q.get_dict()[tkey][0]) # set parameter value - else: # asking for value, so set error - setattr(self, tkey+'_ERR', q.get_dict()[tkey+'_ERR'][0]) # set error value - - return param - - def get_ephemeris(self): - """ - Query the ATNF to get the ephemeris for the given pulsar - """ - - class QueryATNF(object): """ Class to generate a query of the ATNF catalogue @@ -102,7 +23,8 @@ class QueryATNF(object): def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincomp=None, exactmatch=False, sort_attr='jname', sort_order='asc', psrs=None, - include_errs=True, include_refs=False, version=None, adsref=False, **kwargs): + include_errs=True, include_refs=False, get_ephemeris=False, version=None, + adsref=False): """ Set up and perform the query of the ATNF catalogue @@ -117,6 +39,7 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom :param psrs: a list of pulsar names to get the information for :param include_errs: boolean to set whether to include parameter errors :param include_refs: boolean to set whether to include parameter references + :param get_ephemeris: boolean to set whether to get pulsar ephemerides (only works if `psrs` have been specified) :param version: a string with the ATNF version to use (this will default to the current version if set as None) :param adsref: boolean to set whether the python 'ads' module can be used to get reference information """ @@ -125,7 +48,7 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom self._include_errs = include_errs self._include_refs = include_refs self._atnf_version = version - self._atnf_version = self.get_version() # if no version is set this will return the current or default value + self._atnf_version = self.get_version # if no version is set this will return the current or default value self._adsref = adsref # check sort order is either 'asc' or 'desc' (or some synonyms) @@ -141,6 +64,7 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom self._refs = None # set of pulsar references self._query_output = None + self._get_ephemeris = get_ephemeris # check parameters are allowed values if isinstance(params, list): @@ -156,15 +80,18 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom if isinstance(params, basestring): self._query_params = [params.upper()] # make sure parameter is all upper case else: - raise Exception("'params' must be a list or string") + if self._psrs and self._get_ephemeris: # if getting ephemerides then param can be None + self._query_params = [] + else: + raise Exception("'params' must be a list or string") for p in list(self._query_params): if p not in PSR_ALL_PARS: warnings.warn("Parameter {} not recognised".format(p), UserWarning) self._query_params.remove(p) - if len(p) == 0: + if len(self._query_params) == 0 and (not self._psrs or not self._get_ephemeris): raise Exception("No parameters left in list") - + # set conditions self._conditions_query = self.parse_conditions(condition, psrtype=psrtype, assoc=assoc, bincomp=bincomp, exactmatch=exactmatch) @@ -178,7 +105,7 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom # parse the query with BeautifulSoup into a dictionary self._query_output = self.parse_query() - def generate_query(self, version='', params=None, condition='', sortorder='asc', sortattr='JName', psrnames=None, getephemeris=False): + def generate_query(self, version='', params=None, condition='', sortorder='asc', sortattr='JName', psrnames=None, **kwargs): """ Generate a query URL and return the content of the request from that URL. If set the class attributes are used for generating the query, otherwise arguments can be given. @@ -189,8 +116,15 @@ def generate_query(self, version='', params=None, condition='', sortorder='asc', :param sortorder: the order for sorting the results :param sortattr: the attribute on which to perform the sorting :param psrnames: a list of pulsar names to get + :param get_ephemeris: boolean stating whether to get pulsar ephemerides rather than a table of parameter values (only works if pulsar names are given) """ + # get_ephemeris is the only keyword argument at the moment + for key, value in six.iteritems(kwargs): + if key == 'get_ephemeris': + if isinstance(value, bool): + self._get_ephemeris = value # overwrite the pre-set class _get_ephemeris value + query_dict = {} self._atnf_version = self._atnf_version if not version else version query_dict['version'] = self._atnf_version @@ -243,10 +177,13 @@ def generate_query(self, version='', params=None, condition='', sortorder='asc', query_dict['psrnames'] = qpulsars # get pulsar ephemeris rather than table (parsing of this is not implemented yet) - if getephemeris: - query_dict['getephemeris'] = 'Get+Ephemeris' - else: - query_dict['getephemeris'] = '' + query_dict['getephemeris'] = '' + if self._get_ephemeris: + if self._psrs is not None: + query_dict['getephemeris'] = 'Get+Ephemeris' + else: + warnings.warn('Cannot get ephemeris if no pulsar names are provided. No ephemerides will be returned.', UserWarning) + self._get_ephemeris = False # generate query URL self._query_url = QUERY_URL.format(**query_dict) @@ -278,82 +215,112 @@ def parse_query(self, requestcontent=''): pretags = psrsoup.find_all('pre') # get any
 html tags
 
         # check for any warnings generated by the request
+        self._bad_pulsars = [] # any requested pulsars that were not found
         for pt in pretags:
             if 'WARNING' in pt.text:
                 warnings.warn('Request generated warning: "{}"'.format(pt.text), UserWarning)
 
-        # actual table should be in the final 
 tag
-        qoutput = pretags[-1].text
+                # check if warning was for a specific requested pulsar: given by warning string "WARNING: PSR XXXXXXX not in catalogue"
+                if 'PSR' in pt.text:
+                    pat = r'WARNING: PSR (?P\S+) not in catalogue'
+                    wvalues = re.search(pat, pt.text).groupdict()
 
-        # put the data in an ordered dictionary dictionary
+                    if 'psr' in wvalues:
+                        self._bad_pulsars.append(wvalues['psr'])
+                        # remove any pulsars that weren't found
+                        if wvalues['psr'] in self._psrs:
+                            del self._psrs[wvalues['psr']]
+
+        # actual table or ephemeris values should be in the final 
 tag
+        qoutput = pretags[-1].text
         self._query_output = OrderedDict()
         self._npulsars = 0
-        if qoutput:
-            plist = qoutput.strip().split('\n') # split output string
 
-            self._npulsars = len(plist)
+        if not self._get_ephemeris: # not getting ephemeris values
+            # put the data in an ordered dictionary dictionary
+            if qoutput:
+                plist = qoutput.strip().split('\n') # split output string
 
-            for p in self._query_params:
-                if p in PSR_ALL_PARS:
-                    self._query_output[p] = np.zeros(self._npulsars, dtype=PSR_ALL[p]['format'])
+                if self._psrs:
+                    if len(self._psrs) != len(plist):
+                        raise Exception('Number of pulsars returned is not the same as the number requested')
 
-                    if PSR_ALL[p]['err'] and self._include_errs:
-                        self._query_output[p+'_ERR'] = np.zeros(self._npulsars, dtype='f8') # error can only be floats
+                self._npulsars = len(plist)
 
-                    if PSR_ALL[p]['ref'] and self._include_refs:
-                        self._query_output[p+'_REF'] = np.zeros(self._npulsars, dtype='S1024')
+                for p in self._query_params:
+                    if p in PSR_ALL_PARS:
+                        self._query_output[p] = np.zeros(self._npulsars, dtype=PSR_ALL[p]['format'])
 
-                        if self._adsref: # also add reference URL for NASA ADS
-                            self._query_output[p+'_REFURL'] = np.zeros(self._npulsars, dtype='S1024')
+                        if PSR_ALL[p]['err'] and self._include_errs:
+                            self._query_output[p+'_ERR'] = np.zeros(self._npulsars, dtype='f8') # error can only be floats
 
-            for idx, line in enumerate(plist):
-                # split the line on whitespace or \xa0 using re (if just using split it ignores \xa0,
-                # which may be present for, e.g., empty reference fields, and results in the wrong
-                # number of line entries, also ignore the first entry as it is always in index
-                pvals = [lv.strip() for lv in re.split(r'\s+| \xa0 | \D\xa0', line)][1:] # strip removes '\xa0' now
+                        if PSR_ALL[p]['ref'] and self._include_refs:
+                            self._query_output[p+'_REF'] = np.zeros(self._npulsars, dtype='S1024')
 
-                vidx = 0 # index of current value
-                for p in self._query_params:
-                    if PSR_ALL[p]['format'] == 'f8':
-                        if pvals[vidx] == '*':
-                            self._query_output[p][idx] = None # put NaN entry in numpy array
-                        else:
-                            self._query_output[p][idx] = float(pvals[vidx])
-                    elif PSR_ALL[p]['format'] == 'i4':
-                        if pvals[vidx] == '*':
-                            self._query_output[p][idx] = None
-                        else:
-                            self._query_output[p][idx] = int(pvals[vidx])
-                    else:
-                        self._query_output[p][idx] = pvals[vidx]
-                    vidx += 1
+                            if self._adsref: # also add reference URL for NASA ADS
+                                self._query_output[p+'_REFURL'] = np.zeros(self._npulsars, dtype='S1024')
+
+                for idx, line in enumerate(plist):
+                    # split the line on whitespace or \xa0 using re (if just using split it ignores \xa0,
+                    # which may be present for, e.g., empty reference fields, and results in the wrong
+                    # number of line entries, also ignore the first entry as it is always in index
+                    pvals = [lv.strip() for lv in re.split(r'\s+| \xa0 | \D\xa0', line)][1:] # strip removes '\xa0' now
 
-                    # get errors
-                    if PSR_ALL[p]['err']:
-                        if self._include_errs:
+                    vidx = 0 # index of current value
+                    for p in self._query_params:
+                        if PSR_ALL[p]['format'] == 'f8':
                             if pvals[vidx] == '*':
-                                self._query_output[p+'_ERR'][idx] = None
+                                self._query_output[p][idx] = None # put NaN entry in numpy array
                             else:
-                                self._query_output[p+'_ERR'][idx] = float(pvals[vidx])
-                        vidx += 1
-
-                    # get references
-                    if PSR_ALL[p]['ref']:
-                        if self._include_refs:
-                            reftag = pvals[vidx]
-
-                            if reftag in self._refs:
-                                thisref = self._refs[reftag]
-                                refstring = '{authorlist}, {year}, {title}, {journal}, {volume}'
-                                refstring2 = re.sub(r'\s+', ' ', refstring.format(**thisref)) # remove any superfluous whitespace
-                                self._query_output[p+'_REF'][idx] = ','.join([a for a in refstring2.split(',') if a.strip()]) # remove any superfluous empty ',' seperated values
-
-                                if self._adsref and 'ADS URL' in thisref:
-                                    self._query_output[p+'_REFURL'][idx] = thisref['ADS URL'] # remove any superfluous whitespace
+                                self._query_output[p][idx] = float(pvals[vidx])
+                        elif PSR_ALL[p]['format'] == 'i4':
+                            if pvals[vidx] == '*':
+                                self._query_output[p][idx] = None
                             else:
-                                warnings.warn('Reference tag "{}" not found so omitting reference'.format(reftag), UserWarning)
+                                self._query_output[p][idx] = int(pvals[vidx])
+                        else:
+                            self._query_output[p][idx] = pvals[vidx]
                         vidx += 1
 
+                        # get errors
+                        if PSR_ALL[p]['err']:
+                            if self._include_errs:
+                                if pvals[vidx] == '*':
+                                    self._query_output[p+'_ERR'][idx] = None
+                                else:
+                                    self._query_output[p+'_ERR'][idx] = float(pvals[vidx])
+                            vidx += 1
+
+                        # get references
+                        if PSR_ALL[p]['ref']:
+                            if self._include_refs:
+                                reftag = pvals[vidx]
+
+                                if reftag in self._refs:
+                                    thisref = self._refs[reftag]
+                                    refstring = '{authorlist}, {year}, {title}, {journal}, {volume}'
+                                    refstring2 = re.sub(r'\s+', ' ', refstring.format(**thisref)) # remove any superfluous whitespace
+                                    self._query_output[p+'_REF'][idx] = ','.join([a for a in refstring2.split(',') if a.strip()]) # remove any superfluous empty ',' seperated values
+
+                                    if self._adsref and 'ADS URL' in thisref:
+                                        self._query_output[p+'_REFURL'][idx] = thisref['ADS URL'] # remove any superfluous whitespace
+                                else:
+                                    warnings.warn('Reference tag "{}" not found so omitting reference'.format(reftag), UserWarning)
+                            vidx += 1
+        else: # getting ephemeris
+            # split ephemerides for each requested pulsar (they are seperated by '@-----'...)
+            if qoutput:
+                psrephs = re.split(r'@-+', qoutput)
+
+                if len(psrephs) != len(self._psrs):
+                    raise Exception('Number of pulsar ephemerides returned is not the same as the number requested')
+
+                self._npulsars = len(self._psrs)
+
+                # query output in this case is a dictionary of ephemerides
+                for psr, psreph in zip(self._psrs, psrephs):
+                    self._query_output[psr] = psreph
+
         return self._query_output
 
     def get_dict(self):
@@ -390,14 +357,54 @@ def table(self):
                     psrtable.columns[p+'_ERR'].unit = PSR_ALL[p]['units']
 
         # add catalogue version to metadata
-        psrtable.meta['version'] = self.get_version()
+        psrtable.meta['version'] = self.get_version
         psrtable.meta['ATNF Pulsar Catalogue'] = ATNF_BASE_URL
 
         return psrtable
 
+    def get_pulsars(self):
+        """
+        Return the queried pulsars as a Pulsars object, which is a dictionary
+        of Pulsar objects. If 'JNAME' or 'NAME' was not in the original query,
+        it will be performed again, so that a name is present, which is required
+        for a Pulsar object
+        """
+
+        from .pulsar import Pulsar, Pulsars
+
+        # check if JNAME or NAME was queried
+        if 'JNAME' not in self._query_params and 'NAME' not in self._query_params:
+            self._query_params.append('JNAME') # add JNAME parameter
+
+            # re-do query
+            self._query_content = self.generate_query()
+
+            # parse the query with BeautifulSoup into a dictionary
+            self._query_output = self.parse_query()
+            nameattr = 'JNAME'
+        elif 'JNAME' in self._query_params:
+            nameattr = 'JNAME'
+        else:
+            nameattr = 'NAME'
+
+        self._pulsars = Pulsars()
+
+        # add pulsars one by one
+        for pidx, psr in enumerate(self._query_output[nameattr]):
+            attrs = {}
+            for key in self._query_output:
+                attrs[key] = self._query_output[key][pidx]
+
+            P = Pulsar(attrs[nameattr], version=self.get_version, **attrs)
+            self._pulsars.add_pulsar(P)
+
+        return self._pulsars
+
+    @property
     def get_version(self):
         """
-        Return a string with the ATNF version number, or the default giving in ATNF_VERSION if not found
+        Return a string with the ATNF version number, or the default giving in
+        ATNF_VERSION if not found
         """
 
         if self._atnf_version is None:
diff --git a/psrqpy/utils.py b/psrqpy/utils.py
index 277b1953..b2258da3 100644
--- a/psrqpy/utils.py
+++ b/psrqpy/utils.py
@@ -2,7 +2,7 @@
 Various useful functions
 """
 
-from __future__ import division
+from __future__ import print_function, division
 
 import warnings
 import re

From 5d4e9e9826a2bc757cfd77b4d338ffc507106387 Mon Sep 17 00:00:00 2001
From: Matthew Pitkin 
Date: Wed, 22 Nov 2017 12:35:46 +0000
Subject: [PATCH 2/6] search.py: use table rather than dict when creating
 Pulsars object

---
 psrqpy/search.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/psrqpy/search.py b/psrqpy/search.py
index 6aadbd27..eb6e4336 100644
--- a/psrqpy/search.py
+++ b/psrqpy/search.py
@@ -390,10 +390,11 @@ def get_pulsars(self):
         self._pulsars = Pulsars()
 
         # add pulsars one by one
-        for pidx, psr in enumerate(self._query_output[nameattr]):
+        psrtable = self.table()
+        for row in psrtable:
             attrs = {}
-            for key in self._query_output:
-                attrs[key] = self._query_output[key][pidx]
+            for key in psrtable.colnames:
+                attrs[key] = row[key]
 
             P = Pulsar(attrs[nameattr], version=self.get_version, **attrs)
             self._pulsars.add_pulsar(P)

From 56c50ee9bec881378493338bee21a16c0228837a Mon Sep 17 00:00:00 2001
From: Matthew Pitkin 
Date: Wed, 22 Nov 2017 12:36:16 +0000
Subject: [PATCH 3/6] setup.py: get rid of most PyLint warnings

---
 setup.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index 1d1f76db..60a40317 100644
--- a/setup.py
+++ b/setup.py
@@ -13,24 +13,24 @@
 except ImportError:
     from distutils.core import setup
 
-major, minor1, minor2, release, serial =  sys.version_info
+MAJOR, MINOR1, MINOR2, RELEASE, SERIAL = sys.version_info
 
-readfile_kwargs = {"encoding": "utf-8"} if major >= 3 else {}
+READFILE_KWARGS = {"encoding": "utf-8"} if MAJOR >= 3 else {}
 
 def readfile(filename):
-    with open(filename, **readfile_kwargs) as fp:
-        contents = fp.read()
-    return contents
+    with open(filename, **READFILE_KWARGS) as fp:
+        filecontents = fp.read()
+    return filecontents
 
-version_regex = re.compile("__version__ = \"(.*?)\"")
-contents = readfile(os.path.join(
+VERSION_REGEX = re.compile("__version__ = \"(.*?)\"")
+CONTENTS = readfile(os.path.join(
     os.path.dirname(os.path.abspath(__file__)),
     "psrqpy", "__init__.py"))
 
-version = version_regex.findall(contents)[0]
+VERSION = VERSION_REGEX.findall(CONTENTS)[0]
 
 setup(name="psrqpy",
-      version=version,
+      version=VERSION,
       author="Matthew Pitkin",
       author_email="matthew.pitkin@glasgow.ac.uk",
       packages=["psrqpy"],

From dab5d77b14efed638f3f9ffffa2d3e53ea0a82e9 Mon Sep 17 00:00:00 2001
From: Matthew Pitkin 
Date: Wed, 22 Nov 2017 12:37:55 +0000
Subject: [PATCH 4/6] utils.py: get rid of some PyLint warnings

---
 psrqpy/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/psrqpy/utils.py b/psrqpy/utils.py
index b2258da3..6189c082 100644
--- a/psrqpy/utils.py
+++ b/psrqpy/utils.py
@@ -12,10 +12,10 @@
 
 from bs4 import BeautifulSoup
 
-from .config import *
+from .config import ATNF_BASE_URL, ATNF_VERSION, ADS_URL
 
 # problematic references that are hard to parse
-prob_refs = ['bwck08']
+PROB_REFS = ['bwck08']
 
 def get_version():
     """
@@ -75,7 +75,7 @@ def get_references(useads=False):
             j = j + 1
             reftag = tr.b.text # the reference string is contained in a  tag
 
-            if reftag in prob_refs:
+            if reftag in PROB_REFS:
                 continue
 
             refs[reftag] = {}

From f097caeaa733d77d802413e837839e27180a6591 Mon Sep 17 00:00:00 2001
From: Matthew Pitkin 
Date: Wed, 22 Nov 2017 12:38:25 +0000
Subject: [PATCH 5/6] pulsar.py: move Pulsar class into its own file - refs #2

---
 psrqpy/pulsar.py | 386 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 386 insertions(+)
 create mode 100644 psrqpy/pulsar.py

diff --git a/psrqpy/pulsar.py b/psrqpy/pulsar.py
new file mode 100644
index 00000000..c11cebf7
--- /dev/null
+++ b/psrqpy/pulsar.py
@@ -0,0 +1,386 @@
+"""
+classes defining pulsar objects
+"""
+
+from __future__ import print_function, division
+
+import warnings
+import six
+
+from .config import PSR_ALL_PARS, PSR_ALL
+from .utils import get_version
+
+class Pulsar(object):
+    """
+    An object to hold a single pulsar
+    """
+
+    def __init__(self, psrname, version=None, **kwargs):
+        """
+        Set object attributes. A pulsar name is required.
+
+        :param psrname: a string containing a pulsar name
+        :param version: a string with the ATNF version to use for queries
+
+        Additional keyword arguments are any of the valid queriable pulsar
+        parameters.
+        """
+
+        self._name = psrname
+        self._version = version if not version else get_version()
+        self._ephemeris = None
+
+        for key, value in six.iteritems(kwargs):
+            setattr(self, key, value)
+
+    def __repr__(self):
+        """
+        Define the method return by repr
+        """
+
+        if self.have_ephemeris():
+            return self.get_ephemeris()
+        else:
+            return self.name
+
+    def keys(self):
+        """
+        Return a list of the class attribute names for allowed pulsar parameters
+        """
+        return [key for key in self.__dict__ if key in PSR_ALL_PARS+[par+'_ERR' for par in PSR_ALL_PARS]]
+
+    def items(self):
+        """
+        Return a list of the class attribute values
+        """
+        return [value for key, value in six.iteritems(self.__dict__) if key in PSR_ALL_PARS+[par+'_ERR' for par in PSR_ALL_PARS]]
+
+    @property
+    def name(self):
+        """
+        Return the pulsar name
+        """
+
+        return self._name
+
+    def __getitem__(self, key):
+        """
+        If the class has a attribute given by the key then return it, otherwise generate a
+        query for that key to set it
+        """
+
+        ukey = key.upper()
+        pulsarname = self.name
+
+        param = getattr(self, key, None) # try to get value, and default to None if not present
+
+        if not param:
+            param = getattr(self, ukey, None) # try uppercase version
+
+        if not param:
+            if ukey[-4:] == '_ERR': # an error parameter
+                tkey = ukey[:-4] # parameter name without error
+            else:
+                tkey = ukey
+
+            if tkey not in PSR_ALL_PARS:
+                raise KeyError('"{}" is not a recognised pulsar parameter'.format(tkey))
+            else:
+                # generate a query for the key and add it
+                try:
+                    from .search import QueryATNF
+                    q = QueryATNF(params=tkey, psrs=pulsarname, version=self._version, include_errs=True)
+                except IOError:
+                    raise Exception('Problem querying ATNF catalogue')
+
+            if q.num_pulsars != 1:
+                raise Exception('Problem getting parameter "{}"'.format(tkey))
+
+            param = q.get_dict()[ukey][0] # required output parameter
+            setattr(self, ukey, param)    # set output parameter value
+
+            # set parameter value if an error value was requested
+            if PSR_ALL[tkey]['err']:
+                if tkey != ukey: # asking for error, so set actual value
+                    setattr(self, tkey, q.get_dict()[tkey][0]) # set parameter value
+                else: # asking for value, so set error
+                    setattr(self, tkey+'_ERR', q.get_dict()[tkey+'_ERR'][0]) # set error value
+
+        return param
+
+    def __getattr__(self, key):
+        """
+        If the class has a attribute given by the key then return it, otherwise generate a
+        query for that key to set it (use the already defined __getitem__)
+        """
+
+        ukey = key.upper()
+
+        # swapped from using hasattr to try...except... (see https://hynek.me/articles/hasattr/)
+        try:
+            return self.__dict__[key]
+        except KeyError:
+            try:
+                return self.__dict__[ukey]
+            except KeyError:
+                try:
+                    if ukey in PSR_ALL_PARS:
+                        return self[ukey]
+                except KeyError:
+                    raise AttributeError(key)
+
+    def __dir__(self):
+        """
+        Set this to what ipython is returned for ipython's autocomplete (otherwise the custom
+        __getattr__ caused problems!)
+        """
+
+        return self.keys()
+
+    def have_ephemeris(self):
+        """
+        Check whether we already have an ephemeris
+        """
+
+        if self._ephemeris:
+            return True
+        else:
+            return False
+
+    def get_ephemeris(self):
+        """
+        Query the ATNF to get the ephemeris for the given pulsar
+        """
+
+        ephem = self._ephemeris
+
+        if not self.have_ephemeris():
+            pulsarname = self.name
+
+            try:
+                from .search import QueryATNF
+                q = QueryATNF(psrs=pulsarname, version=self._version, include_errs=True, get_ephemeris=True)
+            except IOError:
+                raise Exception('Problem querying ATNF catalogue')
+
+            # set any parameters that can be set from the returned ephemeris
+            ephem = q.get_dict()[pulsarname]
+
+            self.set_ephemeris(ephem)
+
+        return ephem
+
+    def set_ephemeris(self, ephem=None):
+        """
+        Set attributes from the returned ephemeris
+
+        :param ephem: the ephemeris string
+        """
+
+        if not self._ephemeris and ephem:
+            self._ephemeris = ephem # set ephemeris if it doesn't already exist
+
+        assert isinstance(self._ephemeris, basestring), 'Ephemeris must be a string'
+
+        # get ephemeris values
+        ephemvals = [ev.split() for ev in ephem.split('\n') if len(ev.split()) > 1]
+
+        print(ephemvals)
+
+        for ev in ephemvals:
+            if ev[0].upper() in PSR_ALL_PARS and not hasattr(self, ev[0].upper()):
+                if PSR_ALL[ev[0].upper()]['format'][0] == 'S': # string type
+                    setattr(self, ev[0].upper(), ev[1])
+                elif PSR_ALL[ev[0].upper()]['format'][0] == 'i': # int type
+                    try:
+                        setattr(self, ev[0].upper(), int(ev[1]))
+                    except ValueError:
+                        warnings.warn('Could not set attribute for parameter "{}"'.format(ev[0].upper()), UserWarning)
+                else: # float type
+                    try:
+                        setattr(self, ev[0].upper(), float(ev[1]))
+                    except ValueError:
+                        warnings.warn('Could not set attribute for parameter "{}"'.format(ev[0].upper()), UserWarning)
+
+                # get errors if given
+                if len(ev) == 3:
+                    if PSR_ALL[ev[0].upper()]['err']:
+                        try:
+                            setattr(self, ev[0].upper()+'_ERR', float(ev[2]))
+                        except ValueError:
+                            pass
+
+    def __str__(self):
+        """
+        Define the string method as a call to get_ephemeris and output the ephemeris
+        """
+
+        if self.have_ephemeris():
+            return self.get_ephemeris()
+        else:
+            return self.name
+
+    def __eq__(self, other):
+        """
+        Define '==' rich comparison methods. True if pulsars have the same name.
+        """
+
+        if not isinstance(other, Pulsar):
+            return False
+        else:
+            if self.name == other.name:
+                return True
+            else:
+                return False
+
+    def __ne__(self, other):
+        """
+        Define '!=' rich comparison methods. False if pulsars have the same name.
+        """
+
+        assert isinstance(other, Pulsar), "You are not comparing two Pulsar types!"
+
+        if self.name == other.name:
+            return False
+        else:
+            return True
+
+    def __copy__(self):
+        """
+        Define how the object should be copied with copy
+        """
+
+        attrs = {}
+        for key, value in zip(self.keys(), self.items()):
+            attrs[key] = value
+        newpsr = type(self)(self.name, version=self._version, **attrs)
+        newpsr.set_ephemeris(ephem=self._ephemeris)
+
+        return newpsr
+
+
+class Pulsars(object):
+    """
+    Class to contain multiple Pulsar objects
+    """
+
+    def __init__(self):
+        self._num_pulsars = 0 # number of pulsars in the object
+        self._psrs = {}       # dictionary of Pulsar objects in the object, keyed to the name
+        self._got_ephemerides = False # set whether ephemerides have been got for all pulsars
+        self._version = None
+
+    def __iter__(self):
+        """
+        Iterator for the class
+        """
+        for psr in self._psrs:
+            yield psr
+
+    def __getitem__(self, key):
+        """
+        Define getitem to get a Pulsar object from the _psrs dictionary
+        """
+
+        if key in self._psrs.keys():
+            return self._psrs[key]
+        else:
+            return None
+
+    def __len__(self):
+        """
+        Define len method as the number of pulsars in the object
+        """
+
+        return self._num_pulsars
+
+    def add_pulsar(self, psr):
+        """
+        Add a pulsar into the object.
+
+        :param psr: a Pulsar object, or Pulsars object 
+        """
+
+        assert isinstance(psr, Pulsar) or isinstance(psr, Pulsars), 'psr is not a Pulsar type'
+
+        if isinstance(psr, Pulsar):
+            if psr.name not in self._psrs:
+                self._num_pulsars += 1 # add one pulsar
+                self._psrs[psr.name] = psr
+
+                # check if the added pulsar already has an ephemeris
+                if not psr.have_ephemeris():
+                    self._got_ephemerides = False
+        else:
+            # check for duplicates
+            for psrname in psrs:
+                if psrname not in self._psrs.keys(): # don't add duplicates
+                    self._psrs[psrname] = psrs[psrname]
+                    self._num_pulsars += 1
+
+                    # check whether any pulsars already have ephemerides
+                    if not psrs[psrname].have_ephemeris() and self._got_ephemerides:
+                        self._got_ephemerides = False
+
+    def remove_pulsar(self, psrname):
+        """
+        Remove a pulsar from the object. Only do one at a time.
+
+        :param psrname: a string with the name of a pulsar
+        """
+
+        assert isinstance(psrname, basestring), 'psrname is not a string'
+
+        if psrname in self._psrs:
+            del self._psrs[psrname]
+
+    def pop(self, psrname):
+        """
+        Remove a pulsar from the object and return the removed pulsar.
+
+        :param psrname: a string with the name of a pulsar
+        """
+        assert isinstance(psrname, basestring), 'psrname is not a string'
+
+        if psrname in self._psrs:
+            return self._psrs.pop(psrname)
+        else:
+            return None
+
+    def have_ephemerides(self):
+        """
+        Check whether we have ephemerides for all pulsars
+        """
+
+        if self._got_ephemerides:
+            return True
+        else:
+            return False
+
+    def get_ephemerides(self, version=None):
+        """
+        Query the ATNF to get the ephemerides for all pulsars in the object
+        """
+
+        if not self.have_ephemerides():
+            self._version = version if not version else get_version() # get version of the ATNF catalogue to use
+
+            psrnames = self._psrs.keys() # list of pulsar names
+
+            try:
+                from .search import QueryATNF
+                q = QueryATNF(psrs=psrnames, version=self._version, include_errs=True, get_ephemeris=True)
+            except IOError:
+                raise Exception('Problem querying ATNF catalogue')
+
+            for pulsarname in psrnames:
+                if not self._psrs[pulsarname].have_ephemeris():
+                    # set any parameters that can be set from the returned ephemeris
+                    ephem = q.get_dict()[pulsarname]
+
+                    self._psrs[pulsarname].set_ephemeris(ephem)
+
+            self._got_ephemerides = True
+
+        # return list of ephemerides
+        return [self._psrs[psr].get_ephemeris() for psr in self._psrs]

From a12b366a27fee6b57c089727d50e2eef28b14e81 Mon Sep 17 00:00:00 2001
From: Matthew Pitkin 
Date: Wed, 22 Nov 2017 13:06:49 +0000
Subject: [PATCH 6/6] Minor changes

---
 psrqpy/pulsar.py | 10 ++++++++++
 psrqpy/search.py | 48 ++++++++++++++++++++++++++----------------------
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/psrqpy/pulsar.py b/psrqpy/pulsar.py
index c11cebf7..4cee1ab3 100644
--- a/psrqpy/pulsar.py
+++ b/psrqpy/pulsar.py
@@ -384,3 +384,13 @@ def get_ephemerides(self, version=None):
 
         # return list of ephemerides
         return [self._psrs[psr].get_ephemeris() for psr in self._psrs]
+
+    def __str__(self):
+        """
+        Define string method
+        """
+
+        if self.have_ephemerides():
+            return '\n'.join(self.get_ephemerides())
+        else:
+            return '\n'.join([self._psrs[psr].name for psr in self._psrs])
diff --git a/psrqpy/search.py b/psrqpy/search.py
index eb6e4336..270d25c0 100644
--- a/psrqpy/search.py
+++ b/psrqpy/search.py
@@ -66,6 +66,8 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom
         self._query_output = None
         self._get_ephemeris = get_ephemeris
 
+        self._pulsars = None # gets set to a Pulsars object by get_pulsars()
+
         # check parameters are allowed values
         if isinstance(params, list):
             if len(params) == 0:
@@ -235,6 +237,7 @@ def parse_query(self, requestcontent=''):
         qoutput = pretags[-1].text
         self._query_output = OrderedDict()
         self._npulsars = 0
+        self._pulsars = None # reset to None in case a previous query had already been performed
 
         if not self._get_ephemeris: # not getting ephemeris values
             # put the data in an ordered dictionary dictionary
@@ -370,34 +373,35 @@ def get_pulsars(self):
         for a Pulsar object
         """
 
-        from .pulsar import Pulsar, Pulsars
+        if not self._pulsars:
+            from .pulsar import Pulsar, Pulsars
 
-        # check if JNAME or NAME was queried
-        if 'JNAME' not in self._query_params and 'NAME' not in self._query_params:
-            self._query_params.append('JNAME') # add JNAME parameter
+            # check if JNAME or NAME was queried
+            if 'JNAME' not in self._query_params and 'NAME' not in self._query_params:
+                self._query_params.append('JNAME') # add JNAME parameter
 
-            # re-do query
-            self._query_content = self.generate_query()
+                # re-do query
+                self._query_content = self.generate_query()
 
-            # parse the query with BeautifulSoup into a dictionary
-            self._query_output = self.parse_query()
-            nameattr = 'JNAME'
-        elif 'JNAME' in self._query_params:
-            nameattr = 'JNAME'
-        else:
-            nameattr = 'NAME'
+                # parse the query with BeautifulSoup into a dictionary
+                self._query_output = self.parse_query()
+                nameattr = 'JNAME'
+            elif 'JNAME' in self._query_params:
+                nameattr = 'JNAME'
+            else:
+                nameattr = 'NAME'
 
-        self._pulsars = Pulsars()
+            self._pulsars = Pulsars()
 
-        # add pulsars one by one
-        psrtable = self.table()
-        for row in psrtable:
-            attrs = {}
-            for key in psrtable.colnames:
-                attrs[key] = row[key]
+            # add pulsars one by one
+            psrtable = self.table()
+            for row in psrtable:
+                attrs = {}
+                for key in psrtable.colnames:
+                    attrs[key] = row[key]
 
-            P = Pulsar(attrs[nameattr], version=self.get_version, **attrs)
-            self._pulsars.add_pulsar(P)
+                P = Pulsar(attrs[nameattr], version=self.get_version, **attrs)
+                self._pulsars.add_pulsar(P)
 
         return self._pulsars