Skip to content

Commit

Permalink
get github repos
Browse files Browse the repository at this point in the history
  • Loading branch information
hpiwowar committed Jan 24, 2017
1 parent 6bfd4b3 commit e07dd12
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 13 deletions.
14 changes: 14 additions & 0 deletions models/github_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from util import elapsed
from util import safe_commit
from time import time
import datetime
import ast
import subprocess
import re
Expand Down Expand Up @@ -48,6 +49,9 @@ class GithubRepo(db.Model):
lib_matches_raw = deferred(db.Column(JSONB))
lib_matches_final = deferred(db.Column(JSONB))

created = db.Column(db.DateTime)
updated = db.Column(db.DateTime)

# old, and removed from current database. only in backups of database.
# requirements = db.Column(JSONB)
# reqs_file = deferred(db.Column(db.Text))
Expand All @@ -62,6 +66,16 @@ class GithubRepo(db.Model):
# setup_py_no_forks = deferred(db.Column(db.Text))


def __init__(self, login=None, repo_name=None, language=None):
self.login = login
self.repo_name = repo_name
self.language = language
self.id = u'{}:{}'.format(self.login, repo_name)
self.created = datetime.datetime.utcnow()
self.updated = datetime.datetime.utcnow()
super(GithubRepo, self).__init__()


def __repr__(self):
return u'<GithubRepo {language} {login}/{repo_name}>'.format(
language=self.language, login=self.login, repo_name=self.repo_name)
Expand Down
54 changes: 45 additions & 9 deletions refresh.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import requests
from lxml import html
import argparse
import datetime

from app import db
from models.cran_package import CranPackage
from models.pypi_package import PypiPackage
from models.github_repo import GithubRepo
from models.github_api import make_ratelimited_call
import update
from util import safe_commit

Expand All @@ -29,15 +31,35 @@ def add_all_new_packages(package_class):


def add_all_new_github_repos(language):
all_current_github_repo_rows = db.session.query(GithubRepo.id).filter(GithubRepo.language==language).all()
all_current_github_repo_ids = [row[0] for row in all_current_github_repo_rows]

end_date = datetime.datetime(2015, 11, 01)
start_date = datetime.datetime.utcnow()
date = start_date
while date <= end_date:
next_date = date + timdelta(days=1)
url_template = "https://api.github.com/search/repositories?q=created:%22{date}%20..%20{next_date}%22%20language:{language}&per_page=1000&sort=forks&order=desc"
while date >= end_date:
prev_date = date - datetime.timedelta(days=1)
# The sort field. One of stars, forks, or updated.
# max of 100 returned
# authenticated rate limit: 30/min
url_template = "https://api.github.com/search/repositories?q=created:%22{prev_date}%20..%20{date}%22%20language:{language}&per_page=1000&sort=stars&order=desc"
url = url_template.format(
language=language, date=date, next_date=next_date)
r = requests.get(url)
data = r.json()
date = next_date
language=language, date=date.isoformat()[0:10], prev_date=prev_date.isoformat()[0:10])
print url
data = make_ratelimited_call(url)
print date.isoformat()[0:10], data["total_count"], data["incomplete_results"]
date = prev_date
for repo_dict in data["items"]:
new_repo = GithubRepo(login=repo_dict["owner"]["login"], repo_name=repo_dict["name"], language=language)
new_repo.api_raw = repo_dict
print "new_repo:", new_repo
if new_repo.id not in all_current_github_repo_ids:
print "added new repo from {}: {}\n".format(date.isoformat()[0:10], new_repo.id)
db.session.add(new_repo)
all_current_github_repo_ids.append(new_repo.id)
safe_commit(db)




def recalculate_everything(parsed_args):
Expand All @@ -51,15 +73,29 @@ def recalculate_everything(parsed_args):
update.run_update(parsed_args)


def refresh(parsed_args):
if parsed_args.language=="r":
package_class = CranPackage
else:
package_class = PypiPackage

parsed_args.fn = u"{}.refresh".format(package_class.__name__)
print "parsed_args.fn", parsed_args.fn
update.run_update(parsed_args)


if __name__ == '__main__':

parser = argparse.ArgumentParser(description="Run stuff.")
parser.add_argument('language', help="r or python")
parsed_args = update.parse_update_optional_args(parser)


add_all_new_github_repos(parsed_args.language)

# add_all_new_packages(PypiPackage)
# add_all_new_packages(CranPackage)
add_all_new_packages(PypiPackage)


# start_date = ""
# end_date = ""
Expand Down
2 changes: 1 addition & 1 deletion test/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from sqlalchemy.exc import OperationalError
from sqlalchemy.sql import text
from sqlalchemy.sql import text

import redis
import os
Expand Down
7 changes: 4 additions & 3 deletions util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import time
import bisect
import sqlalchemy

from app import db

Expand Down Expand Up @@ -135,8 +136,8 @@ def safe_commit(db):
except sqlalchemy.exc.DataError:
db.session.rollback()
print u"sqlalchemy.exc.DataError on commit. rolling back."
except Exception:
db.session.rollback()
except Exception as e:
print "error", e
print u"generic exception in commit. rolling back."
logging.exception("commit error")
db.session.rollback()
return False

0 comments on commit e07dd12

Please sign in to comment.