Skip to content

Commit

Permalink
fixed problems in recommendationengine class, updated README
Browse files Browse the repository at this point in the history
  • Loading branch information
trislee committed Aug 9, 2022
1 parent b18e559 commit cb30169
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 43 deletions.
6 changes: 1 addition & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,4 @@ Scraper for alt-tech video sharing platform [Odysee](https://odysee.com/).

### TODO
- Implement CLI
- Profile run-time, look into implementing async requests
- Add error handling/backoff waiting to requests
- Implement basic test suite
- Formaize network graph generation into class/module
- Work on reverse-engineering auth_token instead of having it hard-coded
- Profile run-time, look into implementing async requests
2 changes: 1 addition & 1 deletion examples/generate_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

engine = polyphemus.base.RecommendationEngine(channel_list= [CHANNEL_NAME])

weighted_edge_list, claim_id_to_video = engine.generate(iterations = 1)
weighted_edge_list, channels, claim_id_to_video = engine.generate(iterations = ITERATIONS)

G = nx.DiGraph()
G.add_weighted_edges_from(weighted_edge_list)
Expand Down
8 changes: 8 additions & 0 deletions polyphemus/_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: UTF-8 -*-

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

from . import api
from . import base

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
2 changes: 1 addition & 1 deletion polyphemus/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def make_request(request: Callable, kwargs: dict) -> requests.Response:
retry_reasons = []

# TODO this looks a bit gross, try to refactor
while n_retries < 5:
while n_retries < 10:
time.sleep(2 ** n_retries - 1)
try:
response = request(**kwargs)
Expand Down
30 changes: 20 additions & 10 deletions polyphemus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def process_raw_video_info(raw_video_info: dict, auth_token: str = None, additio
channel_id = channel_id,
channel_name = channel_name,
claim_id = raw_video_info['claim_id'],
created = datetime.fromtimestamp(int(created)),
created = datetime.fromtimestamp(max(int(created), 0)),
text = raw_video_info['value'].get('description'),
languages = raw_video_info['value'].get('languages'),
tags = raw_video_info['value'].get('tags',[]),
Expand Down Expand Up @@ -269,14 +269,15 @@ def __init__(self, channel_list):
#-------------------------------------------------------------------------#

def generate(self, iterations = 1):

for channel_name in self.channel_list:
print(channel_name)
scraper = OdyseeChannelScraper(channel_name = channel_name, auth_token = self.auth_token)

self.new_videos.extend(list(scraper.get_all_videos(additional_fields = False)))

self.claim_id_to_video = dict(zip([v.claim_id for v in self.new_videos], self.new_videos))

if not self.new_videos:
for channel_name in self.channel_list:
print(channel_name)
scraper = OdyseeChannelScraper(channel_name = channel_name, auth_token = self.auth_token)

self.new_videos.extend(list(scraper.get_all_videos(additional_fields = False)))

self.claim_id_to_video.update(dict(zip([v.claim_id for v in self.new_videos], self.new_videos)))

for iteration in range(int(iterations)):

Expand Down Expand Up @@ -311,6 +312,15 @@ def generate(self, iterations = 1):
c = Counter(channel_edge_list)
self.weighted_edge_list = [(source, target, weight) for (source, target), weight in c.most_common()]

return self.weighted_edge_list, self.claim_id_to_video
usernames = set([channel.strip('@') for edge in self.weighted_edge_list for channel in edge[:2]])

self.channels = {}
for username in usernames:
try:
self.channels['@' + username] = OdyseeChannelScraper(channel_name = username, auth_token=self.auth_token).get_entity().__dict__
except KeyError:
pass

return self.weighted_edge_list, self.channels, self.claim_id_to_video

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
55 changes: 29 additions & 26 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,39 @@
def readme( ):

with open( os.path.abspath(
os.path.join(
os.path.dirname( __file__ ),
'README.md' ) ) ) as f:
os.path.join(
os.path.dirname( __file__ ),
'README.md' ) ) ) as f:

return f.read( )
return f.read( )

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

setup(
name = 'polyphemus',
version = '0.1',
description = 'Scraping Odysee video data',
long_description = readme( ),
author = 'Bellingcat',
packages = [
'polyphemus' ],
install_requires = [
'requests >= 2.27.0',
'beautifulsoup4 >= 4.10.0',
'pandas >= 1.4.0'],
extras_require = {
'docs': [
'sphinx >= 3.3.1',
'sphinx_rtd_theme >= 0.5',],
'tests': [
'pytest >= 6.1.2',
'pytest-cov >= 2.10.1',
'pytest-html >= 3.0.0',
'pytest-metadata >= 1.10.0']},
include_package_data = True,
zip_safe = False )
name = 'polyphemus',
version = '0.1',
description = 'Scraping Odysee video data',
long_description = readme(),
author = 'Bellingcat',
packages = [
'polyphemus'],
install_requires = [
'requests >= 2.27.0',
'beautifulsoup4 >= 4.10.0',
'pandas >= 1.4.0'],
extras_require = {
'docs': [
'sphinx >= 3.3.1',
'sphinx_rtd_theme >= 0.5',],
'tests': [
'pytest >= 6.1.2',
'pytest-cov >= 2.10.1',
'pytest-html >= 3.0.0',
'pytest-metadata >= 1.10.0']},
include_package_data = True,
zip_safe = False,
entry_points = {
'console_scripts': [
'polyphemus = polyphemus._cli:main']})

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

0 comments on commit cb30169

Please sign in to comment.