forked from vaastav/Fantasy-Premier-League
-
Notifications
You must be signed in to change notification settings - Fork 0
/
global_scraper.py
71 lines (65 loc) · 2.38 KB
/
global_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import requests
import json
from utility import uprint
from parsers import *
from cleaners import *
from collector import collect_gw, merge_gw
import time
def get_data():
""" Retrieve the fpl player data from the hard-coded url
"""
response = requests.get("https://fantasy.premierleague.com/drf/bootstrap-static")
if response.status_code != 200:
raise Exception("Response was code " + str(response.status_code))
responseStr = response.text
data = json.loads(responseStr)
return data
def get_individual_player_data(player_id):
""" Retrieve the player-specific detailed data
Args:
player_id (int): ID of the player whose data is to be retrieved
"""
base_url = "https://fantasy.premierleague.com/drf/element-summary/"
full_url = base_url + str(player_id)
response = ''
while response == '':
try:
response = requests.get(full_url)
except:
time.sleep(5)
if response.status_code != 200:
raise Exception("Response was code " + str(response.status_code))
data = json.loads(response.text)
return data
def parse_data():
""" Parse and store all the data
"""
print("Getting data")
data = get_data()
season = '2018-19'
base_filename = 'data/' + season + '/'
print("Parsing summary data")
parse_players(data["elements"], base_filename)
gw_num = data["current-event"]
print("Cleaning summary data")
clean_players(base_filename + 'players_raw.csv', base_filename)
print("Extracting player ids")
id_players(base_filename + 'players_raw.csv', base_filename)
player_ids = get_player_ids(base_filename)
# TODO: parse other stats that may be useful
num_players = len(data["elements"])
player_base_filename = base_filename + 'players/'
gw_base_filename = base_filename + 'gws/'
print("Extracting player specific data")
for i in range(num_players):
player_data = get_individual_player_data(i+1)
parse_player_history(player_data["history_past"], player_base_filename, player_ids[i+1], i+1)
parse_player_gw_history(player_data["history"], player_base_filename, player_ids[i+1], i+1)
print("Collecting gw scores")
collect_gw(gw_num, player_base_filename, gw_base_filename)
print("Merging gw scores")
merge_gw(gw_num, gw_base_filename)
def main():
parse_data()
if __name__ == "__main__":
main()