diff --git a/psrqpy/__init__.py b/psrqpy/__init__.py index 0d4d6e1a..cb9a828c 100644 --- a/psrqpy/__init__.py +++ b/psrqpy/__init__.py @@ -2,7 +2,8 @@ """ A Python tool for interacting with the ATNF pulsar catalogue """ -__version__ = "0.1.2" +__version__ = "0.2.1" -from .search import QueryATNF, Pulsar -from .utils import * \ No newline at end of file +from .search import QueryATNF +from .pulsar import Pulsar, Pulsars +from .utils import * diff --git a/psrqpy/config.py b/psrqpy/config.py index 077f2d76..49caa884 100644 --- a/psrqpy/config.py +++ b/psrqpy/config.py @@ -8,13 +8,13 @@ ATNF_BASE_URL = r'http://www.atnf.csiro.au/people/pulsar/psrcat/' ATNF_URL = ATNF_BASE_URL + r'proc_form.php?version={version}' -PARAMS_QUERY = '{params}' -USERDEFINED_QUERY = '&startUserDefined=true&c1_val=&c2_val=&c3_val=&c4_val=' -CONDITION_QUERY = '&condition={condition}' -PSRNAMES_QUERY = '&pulsar_names={psrnames}' -SORT_QUERY = '&sort_attr={sortattr}&sort_order={sortorder}' -EPHEMERIS_QUERY = '&submit_ephemeris={getephemeris}' -QUERY_FLUFF = '&ephemeris=long&coords_unit=raj%2Fdecj&radius=&coords_1=&coords_2=&style=Long+with+errors&no_value=*&nohead=nohead&state=query&table_bottom.x=30&table_bottom.y=22' +PARAMS_QUERY = r'{params}' +USERDEFINED_QUERY = r'&startUserDefined=true&c1_val=&c2_val=&c3_val=&c4_val=' +CONDITION_QUERY = r'&condition={condition}' +PSRNAMES_QUERY = r'&pulsar_names={psrnames}' +SORT_QUERY = r'&sort_attr={sortattr}&sort_order={sortorder}' +EPHEMERIS_QUERY = r'&submit_ephemeris={getephemeris}' +QUERY_FLUFF = r'&ephemeris=long&coords_unit=raj%2Fdecj&radius=&coords_1=&coords_2=&style=Long+with+errors&no_value=*&nohead=nohead&state=query&table_bottom.x=30&table_bottom.y=22' QUERY_URL = ATNF_URL + PARAMS_QUERY + USERDEFINED_QUERY + SORT_QUERY + CONDITION_QUERY + PSRNAMES_QUERY + EPHEMERIS_QUERY + QUERY_FLUFF @@ -33,7 +33,7 @@ 'PMRA': {'ref': True, 'err': True, 'units': 'mas/yr', 'format': 'f8'}, # Proper motion in the right ascension direction (mas/yr) 'PMDEC': {'ref': True, 'err': True, 'units': 'mas/yr', 'format': 'f8'}, # Proper motion in declination (mas/yr) 'PX': {'ref': True, 'err': True, 'units': 'mas', 'format': 'f8'}, # Annual parallax (mas) - 'POSEPOCH': {'ref': True, 'err': False, 'units': 'd', 'format': 'f8'}, # Epoch of position, defaults to PEpoch (MJD) + 'POSEPOCH': {'ref': True, 'err': False, 'units': 'd', 'format': 'f8'}, # Epoch of position, defaults to PEpoch (MJD) 'ELONG': {'ref': True, 'err': True, 'units': 'deg', 'format': 'f8'}, # Ecliptic longitude (degrees) 'ELAT': {'ref': True, 'err': True, 'units': 'deg', 'format': 'f8'}, # Ecliptic latitude (degrees) 'PMELONG': {'ref': True, 'err': True, 'units': 'mas/yr', 'format': 'f8'}, # Proper motion in the ecliptic longitude direction (mas/yr) diff --git a/psrqpy/pulsar.py b/psrqpy/pulsar.py new file mode 100644 index 00000000..4cee1ab3 --- /dev/null +++ b/psrqpy/pulsar.py @@ -0,0 +1,396 @@ +""" +classes defining pulsar objects +""" + +from __future__ import print_function, division + +import warnings +import six + +from .config import PSR_ALL_PARS, PSR_ALL +from .utils import get_version + +class Pulsar(object): + """ + An object to hold a single pulsar + """ + + def __init__(self, psrname, version=None, **kwargs): + """ + Set object attributes. A pulsar name is required. + + :param psrname: a string containing a pulsar name + :param version: a string with the ATNF version to use for queries + + Additional keyword arguments are any of the valid queriable pulsar + parameters. + """ + + self._name = psrname + self._version = version if not version else get_version() + self._ephemeris = None + + for key, value in six.iteritems(kwargs): + setattr(self, key, value) + + def __repr__(self): + """ + Define the method return by repr + """ + + if self.have_ephemeris(): + return self.get_ephemeris() + else: + return self.name + + def keys(self): + """ + Return a list of the class attribute names for allowed pulsar parameters + """ + return [key for key in self.__dict__ if key in PSR_ALL_PARS+[par+'_ERR' for par in PSR_ALL_PARS]] + + def items(self): + """ + Return a list of the class attribute values + """ + return [value for key, value in six.iteritems(self.__dict__) if key in PSR_ALL_PARS+[par+'_ERR' for par in PSR_ALL_PARS]] + + @property + def name(self): + """ + Return the pulsar name + """ + + return self._name + + def __getitem__(self, key): + """ + If the class has a attribute given by the key then return it, otherwise generate a + query for that key to set it + """ + + ukey = key.upper() + pulsarname = self.name + + param = getattr(self, key, None) # try to get value, and default to None if not present + + if not param: + param = getattr(self, ukey, None) # try uppercase version + + if not param: + if ukey[-4:] == '_ERR': # an error parameter + tkey = ukey[:-4] # parameter name without error + else: + tkey = ukey + + if tkey not in PSR_ALL_PARS: + raise KeyError('"{}" is not a recognised pulsar parameter'.format(tkey)) + else: + # generate a query for the key and add it + try: + from .search import QueryATNF + q = QueryATNF(params=tkey, psrs=pulsarname, version=self._version, include_errs=True) + except IOError: + raise Exception('Problem querying ATNF catalogue') + + if q.num_pulsars != 1: + raise Exception('Problem getting parameter "{}"'.format(tkey)) + + param = q.get_dict()[ukey][0] # required output parameter + setattr(self, ukey, param) # set output parameter value + + # set parameter value if an error value was requested + if PSR_ALL[tkey]['err']: + if tkey != ukey: # asking for error, so set actual value + setattr(self, tkey, q.get_dict()[tkey][0]) # set parameter value + else: # asking for value, so set error + setattr(self, tkey+'_ERR', q.get_dict()[tkey+'_ERR'][0]) # set error value + + return param + + def __getattr__(self, key): + """ + If the class has a attribute given by the key then return it, otherwise generate a + query for that key to set it (use the already defined __getitem__) + """ + + ukey = key.upper() + + # swapped from using hasattr to try...except... (see https://hynek.me/articles/hasattr/) + try: + return self.__dict__[key] + except KeyError: + try: + return self.__dict__[ukey] + except KeyError: + try: + if ukey in PSR_ALL_PARS: + return self[ukey] + except KeyError: + raise AttributeError(key) + + def __dir__(self): + """ + Set this to what ipython is returned for ipython's autocomplete (otherwise the custom + __getattr__ caused problems!) + """ + + return self.keys() + + def have_ephemeris(self): + """ + Check whether we already have an ephemeris + """ + + if self._ephemeris: + return True + else: + return False + + def get_ephemeris(self): + """ + Query the ATNF to get the ephemeris for the given pulsar + """ + + ephem = self._ephemeris + + if not self.have_ephemeris(): + pulsarname = self.name + + try: + from .search import QueryATNF + q = QueryATNF(psrs=pulsarname, version=self._version, include_errs=True, get_ephemeris=True) + except IOError: + raise Exception('Problem querying ATNF catalogue') + + # set any parameters that can be set from the returned ephemeris + ephem = q.get_dict()[pulsarname] + + self.set_ephemeris(ephem) + + return ephem + + def set_ephemeris(self, ephem=None): + """ + Set attributes from the returned ephemeris + + :param ephem: the ephemeris string + """ + + if not self._ephemeris and ephem: + self._ephemeris = ephem # set ephemeris if it doesn't already exist + + assert isinstance(self._ephemeris, basestring), 'Ephemeris must be a string' + + # get ephemeris values + ephemvals = [ev.split() for ev in ephem.split('\n') if len(ev.split()) > 1] + + print(ephemvals) + + for ev in ephemvals: + if ev[0].upper() in PSR_ALL_PARS and not hasattr(self, ev[0].upper()): + if PSR_ALL[ev[0].upper()]['format'][0] == 'S': # string type + setattr(self, ev[0].upper(), ev[1]) + elif PSR_ALL[ev[0].upper()]['format'][0] == 'i': # int type + try: + setattr(self, ev[0].upper(), int(ev[1])) + except ValueError: + warnings.warn('Could not set attribute for parameter "{}"'.format(ev[0].upper()), UserWarning) + else: # float type + try: + setattr(self, ev[0].upper(), float(ev[1])) + except ValueError: + warnings.warn('Could not set attribute for parameter "{}"'.format(ev[0].upper()), UserWarning) + + # get errors if given + if len(ev) == 3: + if PSR_ALL[ev[0].upper()]['err']: + try: + setattr(self, ev[0].upper()+'_ERR', float(ev[2])) + except ValueError: + pass + + def __str__(self): + """ + Define the string method as a call to get_ephemeris and output the ephemeris + """ + + if self.have_ephemeris(): + return self.get_ephemeris() + else: + return self.name + + def __eq__(self, other): + """ + Define '==' rich comparison methods. True if pulsars have the same name. + """ + + if not isinstance(other, Pulsar): + return False + else: + if self.name == other.name: + return True + else: + return False + + def __ne__(self, other): + """ + Define '!=' rich comparison methods. False if pulsars have the same name. + """ + + assert isinstance(other, Pulsar), "You are not comparing two Pulsar types!" + + if self.name == other.name: + return False + else: + return True + + def __copy__(self): + """ + Define how the object should be copied with copy + """ + + attrs = {} + for key, value in zip(self.keys(), self.items()): + attrs[key] = value + newpsr = type(self)(self.name, version=self._version, **attrs) + newpsr.set_ephemeris(ephem=self._ephemeris) + + return newpsr + + +class Pulsars(object): + """ + Class to contain multiple Pulsar objects + """ + + def __init__(self): + self._num_pulsars = 0 # number of pulsars in the object + self._psrs = {} # dictionary of Pulsar objects in the object, keyed to the name + self._got_ephemerides = False # set whether ephemerides have been got for all pulsars + self._version = None + + def __iter__(self): + """ + Iterator for the class + """ + for psr in self._psrs: + yield psr + + def __getitem__(self, key): + """ + Define getitem to get a Pulsar object from the _psrs dictionary + """ + + if key in self._psrs.keys(): + return self._psrs[key] + else: + return None + + def __len__(self): + """ + Define len method as the number of pulsars in the object + """ + + return self._num_pulsars + + def add_pulsar(self, psr): + """ + Add a pulsar into the object. + + :param psr: a Pulsar object, or Pulsars object + """ + + assert isinstance(psr, Pulsar) or isinstance(psr, Pulsars), 'psr is not a Pulsar type' + + if isinstance(psr, Pulsar): + if psr.name not in self._psrs: + self._num_pulsars += 1 # add one pulsar + self._psrs[psr.name] = psr + + # check if the added pulsar already has an ephemeris + if not psr.have_ephemeris(): + self._got_ephemerides = False + else: + # check for duplicates + for psrname in psrs: + if psrname not in self._psrs.keys(): # don't add duplicates + self._psrs[psrname] = psrs[psrname] + self._num_pulsars += 1 + + # check whether any pulsars already have ephemerides + if not psrs[psrname].have_ephemeris() and self._got_ephemerides: + self._got_ephemerides = False + + def remove_pulsar(self, psrname): + """ + Remove a pulsar from the object. Only do one at a time. + + :param psrname: a string with the name of a pulsar + """ + + assert isinstance(psrname, basestring), 'psrname is not a string' + + if psrname in self._psrs: + del self._psrs[psrname] + + def pop(self, psrname): + """ + Remove a pulsar from the object and return the removed pulsar. + + :param psrname: a string with the name of a pulsar + """ + assert isinstance(psrname, basestring), 'psrname is not a string' + + if psrname in self._psrs: + return self._psrs.pop(psrname) + else: + return None + + def have_ephemerides(self): + """ + Check whether we have ephemerides for all pulsars + """ + + if self._got_ephemerides: + return True + else: + return False + + def get_ephemerides(self, version=None): + """ + Query the ATNF to get the ephemerides for all pulsars in the object + """ + + if not self.have_ephemerides(): + self._version = version if not version else get_version() # get version of the ATNF catalogue to use + + psrnames = self._psrs.keys() # list of pulsar names + + try: + from .search import QueryATNF + q = QueryATNF(psrs=psrnames, version=self._version, include_errs=True, get_ephemeris=True) + except IOError: + raise Exception('Problem querying ATNF catalogue') + + for pulsarname in psrnames: + if not self._psrs[pulsarname].have_ephemeris(): + # set any parameters that can be set from the returned ephemeris + ephem = q.get_dict()[pulsarname] + + self._psrs[pulsarname].set_ephemeris(ephem) + + self._got_ephemerides = True + + # return list of ephemerides + return [self._psrs[psr].get_ephemeris() for psr in self._psrs] + + def __str__(self): + """ + Define string method + """ + + if self.have_ephemerides(): + return '\n'.join(self.get_ephemerides()) + else: + return '\n'.join([self._psrs[psr].name for psr in self._psrs]) diff --git a/psrqpy/search.py b/psrqpy/search.py index 0cf9c250..270d25c0 100644 --- a/psrqpy/search.py +++ b/psrqpy/search.py @@ -2,7 +2,7 @@ Search query """ -from __future__ import print_function +from __future__ import print_function, division import warnings from collections import OrderedDict @@ -16,85 +16,6 @@ from .config import * from .utils import * - -class Pulsar(object): - """ - An object to hold a single pulsar - """ - - def __init__(self, psrname, version=None, **kwargs): - """ - Set object attributes from kwargs - """ - - self._name = psrname - self._raw = kwargs - self._version = version if not version else get_version() - - for key, value in six.iteritems(kwargs): - setattr(self, key, value) - - def keys(self): - return self._raw.keys() - - def items(self): - return self._raw.items() - - @property - def name(self): - """ - Return the pulsar name - """ - - return self._name - - def __getitem__(self, key): - """ - If the class has a attribute given by the key then return it, otherwise generate a - query for that key to set it - """ - - ukey = key.upper() - pulsarname = self.name - - if hasattr(self, ukey): - param = getattr(self, ukey) - else: - if ukey[-4:] == '_ERR': # an error parameter - tkey = ukey[:-4] # parameter name without error - else: - tkey = ukey - - if tkey not in PSR_ALL_PARS: - raise Exception('"{}" is not a recognised pulsar parameter'.format(tkey)) - else: - # generate a query for the key and add it - try: - q = QueryATNF(params=tkey, psrs=pulsarname, version=self._version, include_errs=True) - except IOError: - raise Exception('Problem querying ATNF catalogue') - - if q.num_pulsars != 1: - raise Exception('Problem getting parameter "{}"'.format(tkey)) - - param = q.get_dict()[ukey][0] # required output parameter - setattr(self, ukey, param) # set output parameter value - - # set parameter value if an error value was requested - if PSR_ALL[tkey]['err']: - if tkey != ukey: # asking for error, so set actual value - setattr(self, tkey, q.get_dict()[tkey][0]) # set parameter value - else: # asking for value, so set error - setattr(self, tkey+'_ERR', q.get_dict()[tkey+'_ERR'][0]) # set error value - - return param - - def get_ephemeris(self): - """ - Query the ATNF to get the ephemeris for the given pulsar - """ - - class QueryATNF(object): """ Class to generate a query of the ATNF catalogue @@ -102,7 +23,8 @@ class QueryATNF(object): def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincomp=None, exactmatch=False, sort_attr='jname', sort_order='asc', psrs=None, - include_errs=True, include_refs=False, version=None, adsref=False, **kwargs): + include_errs=True, include_refs=False, get_ephemeris=False, version=None, + adsref=False): """ Set up and perform the query of the ATNF catalogue @@ -117,6 +39,7 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom :param psrs: a list of pulsar names to get the information for :param include_errs: boolean to set whether to include parameter errors :param include_refs: boolean to set whether to include parameter references + :param get_ephemeris: boolean to set whether to get pulsar ephemerides (only works if `psrs` have been specified) :param version: a string with the ATNF version to use (this will default to the current version if set as None) :param adsref: boolean to set whether the python 'ads' module can be used to get reference information """ @@ -125,7 +48,7 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom self._include_errs = include_errs self._include_refs = include_refs self._atnf_version = version - self._atnf_version = self.get_version() # if no version is set this will return the current or default value + self._atnf_version = self.get_version # if no version is set this will return the current or default value self._adsref = adsref # check sort order is either 'asc' or 'desc' (or some synonyms) @@ -141,6 +64,9 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom self._refs = None # set of pulsar references self._query_output = None + self._get_ephemeris = get_ephemeris + + self._pulsars = None # gets set to a Pulsars object by get_pulsars() # check parameters are allowed values if isinstance(params, list): @@ -156,15 +82,18 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom if isinstance(params, basestring): self._query_params = [params.upper()] # make sure parameter is all upper case else: - raise Exception("'params' must be a list or string") + if self._psrs and self._get_ephemeris: # if getting ephemerides then param can be None + self._query_params = [] + else: + raise Exception("'params' must be a list or string") for p in list(self._query_params): if p not in PSR_ALL_PARS: warnings.warn("Parameter {} not recognised".format(p), UserWarning) self._query_params.remove(p) - if len(p) == 0: + if len(self._query_params) == 0 and (not self._psrs or not self._get_ephemeris): raise Exception("No parameters left in list") - + # set conditions self._conditions_query = self.parse_conditions(condition, psrtype=psrtype, assoc=assoc, bincomp=bincomp, exactmatch=exactmatch) @@ -178,7 +107,7 @@ def __init__(self, params=None, condition=None, psrtype=None, assoc=None, bincom # parse the query with BeautifulSoup into a dictionary self._query_output = self.parse_query() - def generate_query(self, version='', params=None, condition='', sortorder='asc', sortattr='JName', psrnames=None, getephemeris=False): + def generate_query(self, version='', params=None, condition='', sortorder='asc', sortattr='JName', psrnames=None, **kwargs): """ Generate a query URL and return the content of the request from that URL. If set the class attributes are used for generating the query, otherwise arguments can be given. @@ -189,8 +118,15 @@ def generate_query(self, version='', params=None, condition='', sortorder='asc', :param sortorder: the order for sorting the results :param sortattr: the attribute on which to perform the sorting :param psrnames: a list of pulsar names to get + :param get_ephemeris: boolean stating whether to get pulsar ephemerides rather than a table of parameter values (only works if pulsar names are given) """ + # get_ephemeris is the only keyword argument at the moment + for key, value in six.iteritems(kwargs): + if key == 'get_ephemeris': + if isinstance(value, bool): + self._get_ephemeris = value # overwrite the pre-set class _get_ephemeris value + query_dict = {} self._atnf_version = self._atnf_version if not version else version query_dict['version'] = self._atnf_version @@ -243,10 +179,13 @@ def generate_query(self, version='', params=None, condition='', sortorder='asc', query_dict['psrnames'] = qpulsars # get pulsar ephemeris rather than table (parsing of this is not implemented yet) - if getephemeris: - query_dict['getephemeris'] = 'Get+Ephemeris' - else: - query_dict['getephemeris'] = '' + query_dict['getephemeris'] = '' + if self._get_ephemeris: + if self._psrs is not None: + query_dict['getephemeris'] = 'Get+Ephemeris' + else: + warnings.warn('Cannot get ephemeris if no pulsar names are provided. No ephemerides will be returned.', UserWarning) + self._get_ephemeris = False # generate query URL self._query_url = QUERY_URL.format(**query_dict) @@ -278,82 +217,113 @@ def parse_query(self, requestcontent=''): pretags = psrsoup.find_all('pre') # get any
html tags # check for any warnings generated by the request + self._bad_pulsars = [] # any requested pulsars that were not found for pt in pretags: if 'WARNING' in pt.text: warnings.warn('Request generated warning: "{}"'.format(pt.text), UserWarning) - # actual table should be in the finaltag - qoutput = pretags[-1].text + # check if warning was for a specific requested pulsar: given by warning string "WARNING: PSR XXXXXXX not in catalogue" + if 'PSR' in pt.text: + pat = r'WARNING: PSR (?P\S+) not in catalogue' + wvalues = re.search(pat, pt.text).groupdict() - # put the data in an ordered dictionary dictionary + if 'psr' in wvalues: + self._bad_pulsars.append(wvalues['psr']) + # remove any pulsars that weren't found + if wvalues['psr'] in self._psrs: + del self._psrs[wvalues['psr']] + + # actual table or ephemeris values should be in the final tag + qoutput = pretags[-1].text self._query_output = OrderedDict() self._npulsars = 0 - if qoutput: - plist = qoutput.strip().split('\n') # split output string + self._pulsars = None # reset to None in case a previous query had already been performed - self._npulsars = len(plist) + if not self._get_ephemeris: # not getting ephemeris values + # put the data in an ordered dictionary dictionary + if qoutput: + plist = qoutput.strip().split('\n') # split output string - for p in self._query_params: - if p in PSR_ALL_PARS: - self._query_output[p] = np.zeros(self._npulsars, dtype=PSR_ALL[p]['format']) + if self._psrs: + if len(self._psrs) != len(plist): + raise Exception('Number of pulsars returned is not the same as the number requested') - if PSR_ALL[p]['err'] and self._include_errs: - self._query_output[p+'_ERR'] = np.zeros(self._npulsars, dtype='f8') # error can only be floats + self._npulsars = len(plist) - if PSR_ALL[p]['ref'] and self._include_refs: - self._query_output[p+'_REF'] = np.zeros(self._npulsars, dtype='S1024') + for p in self._query_params: + if p in PSR_ALL_PARS: + self._query_output[p] = np.zeros(self._npulsars, dtype=PSR_ALL[p]['format']) - if self._adsref: # also add reference URL for NASA ADS - self._query_output[p+'_REFURL'] = np.zeros(self._npulsars, dtype='S1024') + if PSR_ALL[p]['err'] and self._include_errs: + self._query_output[p+'_ERR'] = np.zeros(self._npulsars, dtype='f8') # error can only be floats - for idx, line in enumerate(plist): - # split the line on whitespace or \xa0 using re (if just using split it ignores \xa0, - # which may be present for, e.g., empty reference fields, and results in the wrong - # number of line entries, also ignore the first entry as it is always in index - pvals = [lv.strip() for lv in re.split(r'\s+| \xa0 | \D\xa0', line)][1:] # strip removes '\xa0' now + if PSR_ALL[p]['ref'] and self._include_refs: + self._query_output[p+'_REF'] = np.zeros(self._npulsars, dtype='S1024') - vidx = 0 # index of current value - for p in self._query_params: - if PSR_ALL[p]['format'] == 'f8': - if pvals[vidx] == '*': - self._query_output[p][idx] = None # put NaN entry in numpy array - else: - self._query_output[p][idx] = float(pvals[vidx]) - elif PSR_ALL[p]['format'] == 'i4': - if pvals[vidx] == '*': - self._query_output[p][idx] = None - else: - self._query_output[p][idx] = int(pvals[vidx]) - else: - self._query_output[p][idx] = pvals[vidx] - vidx += 1 + if self._adsref: # also add reference URL for NASA ADS + self._query_output[p+'_REFURL'] = np.zeros(self._npulsars, dtype='S1024') + + for idx, line in enumerate(plist): + # split the line on whitespace or \xa0 using re (if just using split it ignores \xa0, + # which may be present for, e.g., empty reference fields, and results in the wrong + # number of line entries, also ignore the first entry as it is always in index + pvals = [lv.strip() for lv in re.split(r'\s+| \xa0 | \D\xa0', line)][1:] # strip removes '\xa0' now - # get errors - if PSR_ALL[p]['err']: - if self._include_errs: + vidx = 0 # index of current value + for p in self._query_params: + if PSR_ALL[p]['format'] == 'f8': if pvals[vidx] == '*': - self._query_output[p+'_ERR'][idx] = None + self._query_output[p][idx] = None # put NaN entry in numpy array else: - self._query_output[p+'_ERR'][idx] = float(pvals[vidx]) - vidx += 1 - - # get references - if PSR_ALL[p]['ref']: - if self._include_refs: - reftag = pvals[vidx] - - if reftag in self._refs: - thisref = self._refs[reftag] - refstring = '{authorlist}, {year}, {title}, {journal}, {volume}' - refstring2 = re.sub(r'\s+', ' ', refstring.format(**thisref)) # remove any superfluous whitespace - self._query_output[p+'_REF'][idx] = ','.join([a for a in refstring2.split(',') if a.strip()]) # remove any superfluous empty ',' seperated values - - if self._adsref and 'ADS URL' in thisref: - self._query_output[p+'_REFURL'][idx] = thisref['ADS URL'] # remove any superfluous whitespace + self._query_output[p][idx] = float(pvals[vidx]) + elif PSR_ALL[p]['format'] == 'i4': + if pvals[vidx] == '*': + self._query_output[p][idx] = None else: - warnings.warn('Reference tag "{}" not found so omitting reference'.format(reftag), UserWarning) + self._query_output[p][idx] = int(pvals[vidx]) + else: + self._query_output[p][idx] = pvals[vidx] vidx += 1 + # get errors + if PSR_ALL[p]['err']: + if self._include_errs: + if pvals[vidx] == '*': + self._query_output[p+'_ERR'][idx] = None + else: + self._query_output[p+'_ERR'][idx] = float(pvals[vidx]) + vidx += 1 + + # get references + if PSR_ALL[p]['ref']: + if self._include_refs: + reftag = pvals[vidx] + + if reftag in self._refs: + thisref = self._refs[reftag] + refstring = '{authorlist}, {year}, {title}, {journal}, {volume}' + refstring2 = re.sub(r'\s+', ' ', refstring.format(**thisref)) # remove any superfluous whitespace + self._query_output[p+'_REF'][idx] = ','.join([a for a in refstring2.split(',') if a.strip()]) # remove any superfluous empty ',' seperated values + + if self._adsref and 'ADS URL' in thisref: + self._query_output[p+'_REFURL'][idx] = thisref['ADS URL'] # remove any superfluous whitespace + else: + warnings.warn('Reference tag "{}" not found so omitting reference'.format(reftag), UserWarning) + vidx += 1 + else: # getting ephemeris + # split ephemerides for each requested pulsar (they are seperated by '@-----'...) + if qoutput: + psrephs = re.split(r'@-+', qoutput) + + if len(psrephs) != len(self._psrs): + raise Exception('Number of pulsar ephemerides returned is not the same as the number requested') + + self._npulsars = len(self._psrs) + + # query output in this case is a dictionary of ephemerides + for psr, psreph in zip(self._psrs, psrephs): + self._query_output[psr] = psreph + return self._query_output def get_dict(self): @@ -390,14 +360,56 @@ def table(self): psrtable.columns[p+'_ERR'].unit = PSR_ALL[p]['units'] # add catalogue version to metadata - psrtable.meta['version'] = self.get_version() + psrtable.meta['version'] = self.get_version psrtable.meta['ATNF Pulsar Catalogue'] = ATNF_BASE_URL return psrtable + def get_pulsars(self): + """ + Return the queried pulsars as a Pulsars object, which is a dictionary + of Pulsar objects. If 'JNAME' or 'NAME' was not in the original query, + it will be performed again, so that a name is present, which is required + for a Pulsar object + """ + + if not self._pulsars: + from .pulsar import Pulsar, Pulsars + + # check if JNAME or NAME was queried + if 'JNAME' not in self._query_params and 'NAME' not in self._query_params: + self._query_params.append('JNAME') # add JNAME parameter + + # re-do query + self._query_content = self.generate_query() + + # parse the query with BeautifulSoup into a dictionary + self._query_output = self.parse_query() + nameattr = 'JNAME' + elif 'JNAME' in self._query_params: + nameattr = 'JNAME' + else: + nameattr = 'NAME' + + self._pulsars = Pulsars() + + # add pulsars one by one + psrtable = self.table() + for row in psrtable: + attrs = {} + for key in psrtable.colnames: + attrs[key] = row[key] + + P = Pulsar(attrs[nameattr], version=self.get_version, **attrs) + self._pulsars.add_pulsar(P) + + return self._pulsars + + @property def get_version(self): """ - Return a string with the ATNF version number, or the default giving in ATNF_VERSION if not found + Return a string with the ATNF version number, or the default giving in + ATNF_VERSION if not found """ if self._atnf_version is None: diff --git a/psrqpy/utils.py b/psrqpy/utils.py index 277b1953..6189c082 100644 --- a/psrqpy/utils.py +++ b/psrqpy/utils.py @@ -2,7 +2,7 @@ Various useful functions """ -from __future__ import division +from __future__ import print_function, division import warnings import re @@ -12,10 +12,10 @@ from bs4 import BeautifulSoup -from .config import * +from .config import ATNF_BASE_URL, ATNF_VERSION, ADS_URL # problematic references that are hard to parse -prob_refs = ['bwck08'] +PROB_REFS = ['bwck08'] def get_version(): """ @@ -75,7 +75,7 @@ def get_references(useads=False): j = j + 1 reftag = tr.b.text # the reference string is contained in a tag - if reftag in prob_refs: + if reftag in PROB_REFS: continue refs[reftag] = {} diff --git a/setup.py b/setup.py index 1d1f76db..60a40317 100644 --- a/setup.py +++ b/setup.py @@ -13,24 +13,24 @@ except ImportError: from distutils.core import setup -major, minor1, minor2, release, serial = sys.version_info +MAJOR, MINOR1, MINOR2, RELEASE, SERIAL = sys.version_info -readfile_kwargs = {"encoding": "utf-8"} if major >= 3 else {} +READFILE_KWARGS = {"encoding": "utf-8"} if MAJOR >= 3 else {} def readfile(filename): - with open(filename, **readfile_kwargs) as fp: - contents = fp.read() - return contents + with open(filename, **READFILE_KWARGS) as fp: + filecontents = fp.read() + return filecontents -version_regex = re.compile("__version__ = \"(.*?)\"") -contents = readfile(os.path.join( +VERSION_REGEX = re.compile("__version__ = \"(.*?)\"") +CONTENTS = readfile(os.path.join( os.path.dirname(os.path.abspath(__file__)), "psrqpy", "__init__.py")) -version = version_regex.findall(contents)[0] +VERSION = VERSION_REGEX.findall(CONTENTS)[0] setup(name="psrqpy", - version=version, + version=VERSION, author="Matthew Pitkin", author_email="matthew.pitkin@glasgow.ac.uk", packages=["psrqpy"],