Skip to content

Commit

Permalink
Solo ad
Browse files Browse the repository at this point in the history
  • Loading branch information
peterk committed Sep 5, 2018
1 parent 7639f05 commit 0a60f06
Showing 1 changed file with 27 additions and 4 deletions.
31 changes: 27 additions & 4 deletions web/app/fetch_ads_by_likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ def save_file(url, targetdir):



def html2text(rawhtml):
tree = html.fromstring(rawhtml)
text = tree.text_content()
text = text.replace("SpSonSsrSadS","")
return text



def write_ad(jad):
target = os.path.join(archive_dir, jad["id"])

Expand All @@ -114,8 +122,9 @@ def write_ad(jad):
json.dump(jad, outfile)

# write media
for item in jad["images"]:
save_file(item, target)
if len(jad["images"]) > 0:
for item in set(jad["images"]):
save_file(item, target)

if "thumbnail" in jad.keys():
save_file(jad["thumbnail"], target)
Expand Down Expand Up @@ -150,6 +159,10 @@ def write_ad(jad):
ad.raw=jad
ad.created_at = jad["created_at"]
ad.updated_at=jad["updated_at"]
ad.plaintext = html2text(jad["html"])
if jad["targeting"]:
if len(jad["targeting"]) > 0:
ad.plaintarget = html2text(jad["targeting"])

session.add(ad)
else:
Expand All @@ -175,6 +188,7 @@ def print_ad(ad):
parser.add_argument("--new", help="Exclude existing ads while listing", action="store_true")
parser.add_argument("--min", help="Min probapility", type=int)
parser.add_argument("--max", help="Max probapility", type=int)
parser.add_argument("--only", help="Only ad matching id", type=int)
parser.parse_args()
args = parser.parse_args()

Expand All @@ -188,6 +202,8 @@ def print_ad(ad):

pages = math.ceil((jdata["total"]) / 20)
print("Pages: %s" % pages)
if args.only:
print("Only fetching %s" % args.only)

for page in range(0, pages):
r = requests.get(f"https://projects.propublica.org/fbpac-api/ads?poliprob={args.min}&maxpoliprob={args.max}&page={page}&lang=sv-SE", cookies=cookies)
Expand All @@ -208,8 +224,15 @@ def print_ad(ad):
print_ad(ad)

else:
write_ad(ad)
session.commit()
if args.only:
if str(ad["id"]) == str(args.only):
write_ad(ad)
session.commit()
print("Solo ad %s saved." % ad["id"])
break
else:
write_ad(ad)
session.commit()

print("Skipped %s existing ads" % skip_count)

0 comments on commit 0a60f06

Please sign in to comment.