-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathyt.py
92 lines (74 loc) · 2.46 KB
/
yt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
'''
dirty youtube stats parser
'''
from bs4 import BeautifulSoup
import requests
import re
HEADERS = {
'User-Agent': 'yt.py'
}
def downloadHTML(url, timeout=25):
"""Downloads and returns the webpage with the given URL.
Returns an empty string on failure.
"""
assert url.startswith('http://')
req = requests.get(url, headers=HEADERS, timeout=timeout)
req.encoding = 'utf-8'
if req.status_code == 200:
return req.text
else:
return ''
def searchmv(artist, title, n=5):
'''
returns the top n search results.
'''
query = artist + ' ' + title + 'official music video'
query = ''.join([c for c in query if c.isalnum or c == ' '])
query.replace(' ', '+')
url = 'http://www.youtube.com/results?search_query={}'.format(query)
html = downloadHTML(url)
soup = BeautifulSoup(html, 'html.parser')
results = soup.find_all('div', {'class': 'yt-lockup-content'})
links = []
for result in results[:n]:
a = result.find('a', {'class': 'yt-uix-tile-link'}, href=True)
link = a['href'][1:]
if link.startswith('watch'):
links.append(link)
return links
import io
def to_int(text):
return int(text.replace(',', ''))
def scrapeVid(link):
url = ''
if link.startswith('http'):
url = link
elif link.startswith('watch'):
url = 'http://youtube.com/{}'.format(link)
else:
raise Exception('invalid link provided')
html = downloadHTML(url)
soup = BeautifulSoup(html, 'html.parser')
views = to_int(soup.find('div', {'class', 'watch-view-count'}).contents[0].strip().split(' ')[0])
stats = soup.find('span', {'class', 'like-button-renderer'})
# print(stats)
buttons = stats.find_all('button')
likes = None
dislikes = None
for button in buttons:
if "Gusto ko ito" in str(button):
likes = to_int(button.find('span').contents[0].strip())
if "Hindi ko ito gusto" in str(button):
dislikes = to_int(button.find('span').contents[0].strip())
# year_uploaded = int(soup.find('strong', {'class', 'watch-time-text'}).contents[0].strip().split(' ')[-1])
data ={
'youtube_link': url,
'views': views,
'likes': likes,
'dislikes': dislikes
}
return data
# with io.open('video.html', 'w', encoding='utf-8') as f:
# f.write(soup.encode('utf-8').decode())
def get_stats(artist, title):
return scrapeVid(searchmv(artist, title, 1)[0])