diff --git a/models/contribution.py b/models/contribution.py index d26b649f..c4f8730e 100644 --- a/models/contribution.py +++ b/models/contribution.py @@ -15,9 +15,6 @@ class Contribution(db.Model): person_id = db.Column(db.Integer, db.ForeignKey("person.id")) package_id = db.Column(db.Text, db.ForeignKey("package.id")) - #person = db.relationship("Person", backref="contributions") - #package = db.relationship("Package", backref="contributions") - role = db.Column(db.Text) quantity = db.Column(db.Integer) percent = db.Column(db.Float) diff --git a/models/cran_package.py b/models/cran_package.py index f2cf5d35..8ed3a0de 100644 --- a/models/cran_package.py +++ b/models/cran_package.py @@ -50,17 +50,6 @@ def save_host_contributors(self): self._save_contribution(person, "author") - - def _remove_all_authors_cruft(self, all_authors): - return all_authors - - def _extract_author_strings(self, all_authors): - return [] - - def _name_and_email_from_author_str(self, author_str): - return [None, None] - - def set_github_repo_ids(self): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.language == 'r') diff --git a/models/person.py b/models/person.py index d4f10619..62752018 100644 --- a/models/person.py +++ b/models/person.py @@ -226,6 +226,9 @@ def get_or_make_person(**kwargs): else: print u"minting a new person using {}".format(kwargs) new_person = Person(**kwargs) + + # do these things now so that can use them to detect dedups later this run + new_person.set_github_about_() # also sets name so has to go first new_person.set_parsed_name() db.session.add(new_person) diff --git a/models/profile.py b/models/profile.py deleted file mode 100644 index 1021d5c7..00000000 --- a/models/profile.py +++ /dev/null @@ -1,110 +0,0 @@ -from app import db -from models.repo import create_repo -from models.repo import Repo -from providers import github -from util import dict_from_dir - -from sqlalchemy.dialects.postgresql import JSON -import datetime -import logging - -logger = logging.getLogger("profile") - - -def create_profile(username): - profile_data = github.get_profile_data(username) - profile = Profile(username=username, github_data=profile_data) - - repo_data = github.get_all_repo_data(username) - for repo_dict in repo_data: - repo = create_repo(username, repo_dict["name"], repo_dict) - profile.repos.append(repo) - db.session.merge(profile) - db.session.commit() - return profile - - -class Profile(db.Model): - username = db.Column(db.Text, primary_key=True) - created = db.Column(db.DateTime()) - github_data = db.Column(JSON) - - repos = db.relationship( - 'Repo', - lazy='subquery', - cascade='all, delete-orphan' - - # @heather - # i removed this line: - # backref=db.backref("repo", lazy="subquery") - # because it seemed to be making circular references on the repo obj - ) - - def __init__(self, **kwargs): - super(Profile, self).__init__(**kwargs) - self.created = datetime.datetime.utcnow().isoformat() - - def __repr__(self): - return u''.format( - username=self.username) - - def _get_from_github_data(self, my_property): - try: - return self.github_data[my_property] - except KeyError: - return None - - @property - def avatar_url(self): - return self._get_from_github_data("avatar_url") - - @property - def name(self): - return self._get_from_github_data("name") - - @property - def bio(self): - return self._get_from_github_data("bio") - - @property - def blog(self): - return self._get_from_github_data("blog") - - @property - def company(self): - return self._get_from_github_data("company") - - @property - def created_at(self): - return self._get_from_github_data("created_at") - - @property - def email(self): - return self._get_from_github_data("email") - - @property - def followers(self): - return self._get_from_github_data("followers") - - @property - def following(self): - return self._get_from_github_data("following") - - @property - def html_url(self): - return self._get_from_github_data("html_url") - - @property - def location(self): - return self._get_from_github_data("location") - - @property - def received_events_url(self): - return self._get_from_github_data("received_events_url") - - @property - def updated_at(self): - return self._get_from_github_data("updated_at") - - def to_dict(self): - return dict_from_dir(self, keys_to_ignore=["github_data"]) diff --git a/test/test_cran_package.py b/test/test_cran_package.py new file mode 100644 index 00000000..784ec48b --- /dev/null +++ b/test/test_cran_package.py @@ -0,0 +1,23 @@ +from models import cran_package +from test.utils import http + +import unittest +from nose.tools import assert_equals +from nose.tools import assert_not_equals +from nose.tools import assert_true +from nose.tools import assert_items_equal + + +class TestCranPackage(unittest.TestCase): + pass + # @http + # def test_get_tags(self): + # response = cran_package.get_tags("knitr") + # expected = ['ReproducibleResearch'] + # assert_equals(response, expected) + + # response = cran_package.get_tags("MASS") + # expected = ['Distributions', 'Econometrics', 'Environmetrics', 'Multivariate', 'NumericalMathematics', 'Pharmacokinetics', 'Psychometrics', 'Robust', 'SocialSciences'] + # assert_equals(response, expected) + +