-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
executable file
·66 lines (60 loc) · 2.35 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
from lxml import html
from helpers import stripList,stripFromList,removeDuplicates,retrieveMatchUrls
import retrieveMatchInfo
import requests
import sys
import logging
import sqlite3
baseURL = "http://www.hltv.org"
db = "matches.db"
#Insert data to the db
def insertMatchData(teamOne,teamTwo,matchUrl,matchDate,db):
global baseURL
conn = sqlite3.connect(db)
curs = conn.cursor()
if not len(teamOne) == len(teamTwo):
raise ValueError("Amount of teams in matches aren't equal")
sys.exit(1)
for i in range(0,len(teamOne)):
try:
curs.execute("INSERT INTO matches ('Team1','Team2','MatchUrl','MatchDate') VALUES (?,?,?,?)",
(teamOne[i],teamTwo[i],matchUrl[i],matchDate[i]))
except sqlite3.IntegrityError:
logging.info("Duplicate match url {}".format(matchUrl[i]))
logging.info("Skipping match")
continue
conn.commit()
def main():
global baseURL
global db
logging.basicConfig(level=logging.DEBUG)
#Get raw match page from hltv
page = requests.get(baseURL + "/matches/")
logging.info("Code {} from hltv request".format(page.status_code))
#Check status code - 200 is good
if not page.status_code == 200:
sys.exit(1)
tree = html.fromstring(page.content)
#Get the data from the page
teamOne = stripList(tree.xpath("///div[@class='matchTeam1Cell']/a/text()"))
teamTwo = stripList(tree.xpath("///div[@class='matchTeam2Cell']/a/child::text()"))
#Get match url
matchUrl = stripList(tree.xpath("///div[@class='matchActionCell']/a/@href"))
#Prepend the baseUrl to the matchUrls for use in removeDuplicates()
for i in range(0,len(matchUrl)):
matchUrl[i] = baseURL + matchUrl[i]
# print(matchUrl)
#Check for duplicates and remove any duplicates
#This is done based on the URL of the match
removedDuplicates = removeDuplicates(matchUrl,retrieveMatchUrls(db),[teamOne,teamTwo])
matchUrl =removedDuplicates[0]
teamOne = removedDuplicates[1][0]
teamTwo = removedDuplicates[1][1]
matchesDates = []
for i in range(0,len(matchUrl)):
#use the matchUrl to retrieve the matchInfo
matchesDates.append(retrieveMatchInfo.getGameTime(matchUrl[i]))
insertMatchData(teamOne,teamTwo,matchUrl,matchesDates,db)
if __name__ == "__main__":
main()