Skip to content

Commit

Permalink
Merge pull request #43 from Slashdacoda/truthorfiction_2021_Normalize…
Browse files Browse the repository at this point in the history
…Credibility

merge to master to update
  • Loading branch information
Slashdacoda authored Jul 28, 2021
2 parents 1d64d34 + 4f44164 commit 2d3d04e
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 79 deletions.
23 changes: 18 additions & 5 deletions claim_extractor/extractors/truthorfiction.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,22 @@ def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url:
article = parsed_claim_review_page.find("article")

# date

date_ = parsed_claim_review_page.find('meta', {"property": "article:published_time"})['content']
if date_:
date_str = date_.split("T")[0]
claim.set_date(date_str)

# author
author_ = parsed_claim_review_page.find('meta', {"name": "author"})['content']
if author_:
author_str = author_.split("T")[0]
claim.set_author(author_str)

## auth link
author_url = parsed_claim_review_page.find('a', {"class": "url fn n"})['href']
if author_url:
claim.author_url = author_url

# body
content = [tag for tag in article.contents if not isinstance(tag, NavigableString)]
body = content[-1] # type: Tag
Expand Down Expand Up @@ -99,12 +109,15 @@ def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url:
else:
claim.set_rating(rating_text)
claim.set_claim(claim_text)


# tags
tags = []
if parsed_claim_review_page.select('footer > span.tags-links > a'):
for link in parsed_claim_review_page.select('footer > span.tags-links > a'):
if hasattr(link, 'href'):
#tag_link = link['href']
tags.append(link.text)

for tag in parsed_claim_review_page.findAll("meta", {"property", "article:tags"}):
tag_str = tag['content']
tags.append(tag_str)
claim.set_tags(", ".join(tags))

return [claim]
Loading

0 comments on commit 2d3d04e

Please sign in to comment.