This repository has been archived by the owner on May 6, 2024. It is now read-only.
forked from balbinot/supaharris
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_hilker_2019.py
402 lines (326 loc) · 15.2 KB
/
parse_hilker_2019.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
import os
import sys
import numpy
import logging
import requests
from bs4 import BeautifulSoup
from matplotlib import pyplot
from urllib.parse import urlparse
logging.getLogger("keyring.backend").setLevel(logging.WARNING)
from astroquery.vizier import Vizier
Vizier.ROW_LIMIT = -1
from django.utils.text import slugify
BASEDIR = "/".join(__file__.split("/")[:-1]) + "/MW_GCS_Hilker2019/"
def parse_hilker_2019_orbits(logger,
fname="{0}orbits_table.txt".format(BASEDIR), debug=False):
# https://people.smp.uq.edu.au/HolgerBaumgardt/globular/orbits_table.txt
if not os.path.isfile(fname) or not os.path.exists(fname):
logger.error("ERROR: file not found: {0}".format(fname))
return
if debug:
logger.debug("\nParsing Hilker+ (2019) orbits table")
names = [
"Cluster", "RA", "DEC", "l", "b",
"Rsun", "ERsun", "R_GC", "<RV>", "ERV",
"mualpha", "mualpha_err", "mu_delta", "mu_delta_err", "rhopmrade",
"X", "DX", "Y", "DY", "Z",
"DZ", "U", "DU", "V", "DV",
"W", "DW", "RPERI", "RPERI_err", "RAP",
"RAP_err"
]
dtype = [
"U16", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float"
]
delimiter = [
16, 10, 11, 8, 8,
8, 8, 8, 8, 8,
8, 7, 8, 8, 8,
5, 8, 8, 8, 6,
8, 8, 10, 8, 8,
10, 8, 8, 8, 8,
8
]
if debug:
logger.debug("\nnames: {}\ndtype: {}\ndelimiter: {}\n".format(
len(names), len(dtype), len(delimiter) ))
logger.debug("-"*45)
logger.debug("{0:<15s}{1:<15s}{2:<15s}".format("name", "dtype", "delimiter"))
logger.debug("-"*45)
for i in range(len(names)):
logger.debug("{0:<15s}{1:<15s}{2:<15d}".format(names[i], dtype[i], delimiter[i]))
logger.debug("-"*45 + "\n")
data = numpy.genfromtxt(fname, skip_header=2, delimiter=delimiter,
dtype=dtype, names=names, autostrip=True)
if debug:
logger.debug("\nHere is the first entry:")
for n in data.dtype.names:
logger.debug("{0:<20s}{1}".format(n, data[0][n]))
logger.debug("\nHere are the first five rows:")
for i in range(5): logger.debug(data[i])
logger.debug("\nHere are the colums Cluster, mualpha, "+
"mualpha_err, RPERI, RPERI_err of the first five rows")
logger.debug(data["Cluster"][0:5])
logger.debug(data["mualpha"][0:5])
logger.debug(data["mualpha_err"][0:5])
logger.debug(data["RPERI"][0:5])
logger.debug(data["RPERI_err"][0:5])
return data
def parse_hilker_2019_combined(logger,
fname="{0}combined_table.txt".format(BASEDIR), debug=False):
# https://people.smp.uq.edu.au/HolgerBaumgardt/globular/combined_table.txt
if not os.path.isfile(fname) or not os.path.exists(fname):
logger.error("ERROR: file not found: {0}".format(fname))
return
if debug:
logger.debug("\nParsing Hilker+ (2019) combined table")
names = [
"Cluster", "RA", "DEC", "R_Sun", "R_GC",
"Mass", "DM", "V", "V_err", "M/L_V", "M/L_V_err", "rc",
"rh,l", "rh,m", "rt", "rho_c", "rho_h,m",
"sig_c", "sig_h,m", "lg(Trh)", "MF", "F_REM",
"sig0", "vesc", "etac", "etah",
]
dtype = [
"U16", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float", "float", "float", "float", "float",
"float",
]
delimiter = [
14, 10, 11, 8, 9,
12, 12, 6, 6, 7,
8, 7, 8, 9, 10,
7, 8, 8, 10, 6,
8, 8, 6, 8, 6,
7,
]
if debug and False:
logger.debug("\nnames: {}\ndtype: {}\ndelimiter: {}\n".format(
len(names), len(dtype), len(delimiter) ))
logger.debug("-"*45)
logger.debug("{0:<15s}{1:<15s}{2:<15s}".format("name", "dtype", "delimiter"))
logger.debug("-"*45)
for i in range(len(names)):
logger.debug("{0:<15s}{1:<15s}{2:<15d}".format(names[i], dtype[i], delimiter[i]))
logger.debug("-"*45 + "\n")
data = numpy.genfromtxt(fname, skip_header=2, delimiter=delimiter,
dtype=dtype, names=names, autostrip=True)
if debug:
logger.debug("\nHere is the first entry:")
for n in data.dtype.names:
logger.debug("{0:<20s}{1}".format(n, data[0][n]))
logger.debug("\ndelimiter.cumsum()\n{0}\n".format(numpy.array(delimiter).cumsum()))
logger.debug("\nHere are the first five rows:")
for i in range(5): logger.debug(data[i])
logger.debug("\nHere are the colums Cluster"+
"of the first five rows")
logger.debug(data["Cluster"][0:5])
return data
def parse_hilker_2019_radial_velocities(logger,
fname="{0}rv.dat".format(BASEDIR), debug=False):
# https://people.smp.uq.edu.au/HolgerBaumgardt/globular/rv.dat
# The following Table contains the velocity dispersion profiles of 139
# Galactic globular clusters. The Table is based on the following papers:
# - Watkins et al. (2015), ApJ 803, 29
# - Baumgardt (2017), MNRAS 464, 2174
# - Kamann et al. (2018), MNRAS, 473, 5591
# - Baumgardt & Hilker (2018), MNRAS 478, 1520
# - Baumgardt, Hilker, Sollima & Bellini (2019), MNRAS 482, 5138
if not os.path.isfile(fname) or not os.path.exists(fname):
logger.error("ERROR: file not found: {0}".format(fname))
return
if debug:
logger.debug("\nParsing Hilker+ (2019) velocity dispersion profiles")
# https://people.smp.uq.edu.au/HolgerBaumgardt/globular/veldis.html
# does have a column NStar, but that column is not available for
# https://people.smp.uq.edu.au/HolgerBaumgardt/globular/rv.dat
names = [
"Cluster", "radius", "velocity_dispersion",
"velocity_dispersion_err_up", "velocity_dispersion_err_down",
"type",
]
dtype = [
"U16", "float", "float", "float", "float", "U16"
]
delimiter = [
13, 8, 6, 6, 6, 6
]
if debug and False:
logger.debug("\nnames: {}\ndtype: {}\ndelimiter: {}\n".format(
len(names), len(dtype), len(delimiter) ))
logger.debug("-"*45)
logger.debug("{0:<15s}{1:<15s}{2:<15s}".format("name", "dtype", "delimiter"))
logger.debug("-"*45)
for i in range(len(names)):
logger.debug("{0:<15s}{1:<15s}{2:<15d}".format(names[i], dtype[i], delimiter[i]))
logger.debug("-"*45 + "\n")
data = numpy.genfromtxt(fname, skip_header=0, delimiter=delimiter,
dtype=dtype, names=names, autostrip=True)
if debug:
logger.debug("\nHere is the first entry:")
for n in data.dtype.names:
logger.debug("{0:<40s}{1}".format(n, data[0][n]))
logger.debug("\ndelimiter.cumsum()\n{0}\n".format(numpy.array(delimiter).cumsum()))
logger.debug("\nHere are the first five rows:")
for i in range(5): logger.debug(data[i])
logger.debug("\nHere are the colums Cluster"+
"of the first five rows")
logger.debug(data["Cluster"][0:5])
return data
def parse_baumgardt_2019_mnras_482_5138_table1():
return Vizier.get_catalogs("J/MNRAS/482/5138/table1")[0]
def parse_baumgardt_2019_mnras_482_5138_table4():
return Vizier.get_catalogs("J/MNRAS/482/5138/table4")[0]
def scrape_individual_fits_from_baumgardt_website(logger,
outdir="{0}../../staticfiles/img/aux/hbsb2019/".format(BASEDIR),
force_get_img=False):
""" Retrieve the GAIA selection, Orbit over last 2 Gyr (xy and Rz plane),
HST photometry, Mass function, and N-body fit gif/pdf images from the
website of Holger Baumgardt. This function returns a dict /w GC name
as keys, where the values are a (nested) dict with key: the url of the
img src, and value: path to where the image is stored locally.
The data ingestion script can later insert Auxiliary instances with
Reference Baumgardt & Hilker (2018) b/c that seems to be the reference
for the Nbody fits to the data? The AstroObject can be retrieved using
the GC name, path = the local image, and url will be the img src."""
base_url = "https://people.smp.uq.edu.au/HolgerBaumgardt/globular/fits/"
clusterlist = "{0}clusterlist.html".format(base_url)
r = requests.get(clusterlist)
if r.status_code != 200:
logger.error("ERROR: could not retrieve {0}".format(clusterlist))
return
soup = BeautifulSoup(r.content, "lxml")
gcs = [( a.text, "{0}{1}".format(base_url, a["href"]) ) for a in soup.find_all("a")]
Ngcs = len(gcs)
logger.info("Found {0} globular clusters\n".format(Ngcs))
# Get the nodata.gif in case we hit 404 at the individual GC pages later on
nodata = "{0}nodata.gif".format(outdir)
if not os.path.exists(nodata) and not os.path.isfile(nodata):
logger.info("GET nodata.gif")
nodata_url = "{0}phot/nodata.gif".format(base_url)
r = requests.get(nodata_url, stream=True)
if r.status_code != 200:
logger.error(" ERROR: could not retrieve {0}".format(nodata_url))
import sys; sys.exit(1)
with open(nodata, "wb") as f:
for chunk in r: # reads the data in chunks of 128 bytes
f.write(chunk)
logger.info("Success GET nodata.gif\n")
else:
logger.info("File exists: {0}\n".format(nodata))
figures = [
"GAIA_selection", "Orbit_last_2Gyr_xy", "Orbit_last_2Gyr_Rz",
"HST_photometry", "Mass_function", "Nbody_fit"
]
data = dict()
for i, gc in enumerate(gcs):
if i > 5: break
gc_name, gc_url = gc
data[gc_name] = dict()
data[gc_name]["url"] = gc_url
logger.info("\nGET {0}/{1}: {2} @ {3}".format(i+1, Ngcs, gc_name, gc_url))
r = requests.get(gc_url)
if r.status_code != 200:
logger.error(" ERROR: could not retrieve {0}".format(gc_url))
void = input("Press any key to continue")
continue
soup = BeautifulSoup(r.content, "lxml")
for img, fig_name in zip(soup.find_all("img"), figures):
img_src = "{0}{1}".format(base_url, img["src"])
logger.info(" {0} --> {1}".format(fig_name, img_src))
path = urlparse(img_src).path
ext = os.path.splitext(path)[1]
fname = "{0}{1}_{2}{3}".format(outdir, slugify(gc_name), fig_name, ext)
data[gc_name][fig_name] = { "fname": fname, "img_src": img_src }
if os.path.exists(fname) and os.path.isfile(fname) and not force_get_img:
logger.info(" already have {0}".format(fname))
continue
logger.info(" saving as {0}".format(fname))
r = requests.get(img_src, stream=True)
if r.status_code != 200:
logger.warning(" WARNING: could not retrieve {0}. Set to nodata.gif".format(img_src))
os.system("cp {0} {1}".format(nodata, fname))
with open(fname, "wb") as f:
for chunk in r: # reads the data in chunks of 128 bytes
f.write(chunk)
return data
def parse_individual_rvs_of_stars_in_field_of_clusters(logger, debug=False,
fname="{0}appendix_combined_table.txt".format(BASEDIR)):
""" Data retrieved 20191017 from
https://people.smp.uq.edu.au/HolgerBaumgardt/globular/appendix/appendix.html,
link at bottom 'Click here for an ASCII file with the combined radial velocity data of all clusters.'
--> https://people.smp.uq.edu.au/HolgerBaumgardt/globular/appendix/combined_table.txt
'The following table contains the individual stellar radial velocities that
we derived from ESO proposals prior to 2014. The data files also contain the
Gaia DR2, APOGEE DR14, Keck/DEIMOS, Keck/HIRES and Keck/NIRSPEC radial
velocities which are not included in Appendix D of the MNRAS paper. At the
moment the data files contain about 1/3 of all radial velocities. The
inclusion of the remaining data is underway... '
This function parses the combined_table.txt file.
"""
if not os.path.isfile(fname) or not os.path.exists(fname):
logger.error("ERROR: file not found: {0}".format(fname))
return
if debug:
logger.debug("\nParsing Hilker+ (2019) individual radial velocity data")
# https://people.smp.uq.edu.au/HolgerBaumgardt/globular/veldis.html
# does have a column NStar, but that column is not available for
# https://people.smp.uq.edu.au/HolgerBaumgardt/globular/rv.dat
names = [
"Cluster", "2MASS_ID", "RA", "DEC", "RV", "E_RV",
"DCEN", "J_mag", "E_J_mag", "K_mag", "E_K_mag", "P_Mem", "NRV", "P_Single",
]
dtype = [
"U16", "U18", "float", "float", "float", "float",
"float", "float", "float", "float", "float", "float", "int", "float"
]
delimiter = [
9, 18, 13, 13, 10, 10,
9, 6, 6, 8, 6, 10, 3, 9
]
if debug and False:
logger.debug("\nnames: {}\ndtype: {}\ndelimiter: {}\n".format(
len(names), len(dtype), len(delimiter) ))
logger.debug("-"*45)
logger.debug("{0:<15s}{1:<15s}{2:<15s}".format("name", "dtype", "delimiter"))
logger.debug("-"*45)
for i in range(len(names)):
logger.debug("{0:<15s}{1:<15s}{2:<15d}".format(names[i], dtype[i], delimiter[i]))
logger.debug("-"*45 + "\n")
data = numpy.genfromtxt(fname, skip_header=2, delimiter=delimiter,
dtype=dtype, names=names, autostrip=True)
if debug:
logger.debug("\nHere is the first entry:")
for n in data.dtype.names:
logger.debug("{0:<40s}{1}".format(n, data[0][n]))
logger.debug("\nHere is the 129th entry:")
for n in data.dtype.names:
logger.debug("{0:<40s}{1}".format(n, data[129][n]))
logger.debug("\ndelimiter.cumsum()\n{0}\n".format(numpy.array(delimiter).cumsum()))
logger.debug("\nHere are the first 50 rows:")
for i in range(50): logger.debug(data[i])
return data
if __name__ == "__main__":
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s")
logger = logging.getLogger(__name__)
logger.info("Running {0}".format(__file__))
individual_rvs = parse_individual_rvs_of_stars_in_field_of_clusters(logger, debug=True)
import sys; sys.exit(0)
hilker_orbits = parse_hilker_2019_orbits(logger, debug=True)
hilker_combined = parse_hilker_2019_combined(logger, debug=True)
# It seems Ter 2 has three nan values. So here we check which and why.
ter2, = numpy.where(hilker_combined["Cluster"] == "Ter 2")
for n in hilker_combined.dtype.names:
logger.debug("{0:<20s}{1}".format(n, hilker_combined[ter2][0][n]))
hilker_radial_velocities = parse_hilker_2019_radial_velocities(debug=True)
gc_fits = scrape_individual_fits_from_baumgardt_website(logger)
h19_table1 = parse_baumgardt_2019_mnras_482_5138_table1()
h19_table4 = parse_baumgardt_2019_mnras_482_5138_table4()