Skip to content

Commit

Permalink
Merge pull request #37 from Slashdacoda/africacheck_2021_NormalizeCre…
Browse files Browse the repository at this point in the history
…dibility

merge to master to update
  • Loading branch information
Slashdacoda authored Jul 28, 2021
2 parents 4bf5b7c + bf7bd8d commit acf1301
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 106 deletions.
55 changes: 3 additions & 52 deletions claim_extractor/extractors/africacheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,7 @@ class AfricacheckFactCheckingSiteExtractor(FactCheckingSiteExtractor):
def __init__(self, configuration: Configuration):
super().__init__(configuration)

def retrieve_listing_page_urls(self) -> List[str]:
#return ["https://africacheck.org/latest-reports/page/1/"]
def retrieve_listing_page_urls(self) -> List[str]:
return ["https://africacheck.org/search?rt_bef_combine=created_DESC&sort_by=created&sort_order=DESC&page=0"]


Expand Down Expand Up @@ -251,7 +250,6 @@ def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url:
except KeyError:
print("KeyError: Skip")
else:

# alternative rating (If there is no article--aside box with verdict)
global_truth_rating = ""
if parsed_claim_review_page.find("div", {"class": "verdict-stamp"}):
Expand All @@ -264,9 +262,6 @@ def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url:
global_truth_rating = parsed_claim_review_page.find("div", {"class": "indicator"}).find(
'span').get_text()

#if (url == "https://africacheck.org/fact-checks/fbchecks/beware-details-viral-message-about-discovery-health-and-oxygen-tanks-covid-19"):
# print("ws")

# If still no rathing value, try to extract from picture name
if (global_truth_rating == ""):
filename =""
Expand All @@ -288,8 +283,6 @@ def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url:
global_truth_rating = filename_split[0]
else:
global_truth_rating = filename_split[len(filename_split)-1]

#global_truth_rating = filename_split[0]

claim.set_rating(str(re.sub('[^A-Za-z0-9 -]+', '', global_truth_rating)).lower().strip().replace("pfalse","false").replace("-","").capitalize())

Expand All @@ -298,58 +291,16 @@ def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url:
print ("\n Rating:" + claim.rating)
claim.rating = ""

###########

# # There are several claims checked within the page. Common date, author, tags ,etc.
# if inline_ratings and len(inline_ratings) > 0:
# entry_contents = entry_section.contents # type : List[Tag]
# current_index = 0

# # First we extract the bit of text common to everything until we meed a sub-section
# body_text, links, current_index = get_text_and_links_until_next_header(entry_contents, current_index)
# claim.set_body(body_text)
# claim.set_refered_links(links)

# while current_index < len(entry_contents):
# current_index = forward_until_inline_rating(entry_contents, current_index)
# inline_rating_div = entry_contents[current_index]
# if isinstance(inline_rating_div, NavigableString):
# break
# claim_text = inline_rating_div.find("p", {"class": "claim-content"}).text
# inline_rating = inline_rating_div.find("div", {"class", "indicator"}).find("span").text
# previous_current_index = current_index
# inline_body_text, inline_links, current_index = get_text_and_links_until_next_header(entry_contents,
# current_index)
# if previous_current_index == current_index:
# current_index += 1
# inline_claim = Claim()
# inline_claim.set_source("africacheck")
# inline_claim.set_claim(claim_text)
# inline_claim.set_rating(inline_rating)
# inline_claim.set_refered_links(",".join(inline_links))
# inline_claim.set_body(inline_body_text)
# inline_claim.set_tags(", ".join(tags))
# inline_claim.set_date(global_date_str)
# inline_claim.set_url(url)
# if author:
# inline_claim.set_author(author.get_text())
# inline_claim.set_title(global_title_text)

# local_claims.append(inline_claim)


# body
# body
body = parsed_claim_review_page.find("div", {"class": "article--main"})
claim.set_body(body.get_text())

# related links
related_links = []
for link in body.findAll('a', href=True):
related_links.append(link['href'])
claim.related_links = related_links
claim.set_refered_links(related_links)

# local_claims.append(claim)
# return local_claims
if claim.rating:
return [claim]
else:
Expand Down
Loading

0 comments on commit acf1301

Please sign in to comment.