Skip to content

Commit

Permalink
Add logic to store neg news for 5 names (#160)
Browse files Browse the repository at this point in the history
Signed-off-by: Sean Sundberg <[email protected]>
  • Loading branch information
seansund authored Nov 14, 2023
1 parent dd98eca commit 8398d89
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 351 deletions.
5 changes: 5 additions & 0 deletions nest-cli.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"include": "../config/*.*",
"outDir": "dist/config",
"watchAssets": true
},
{
"include": "**/*.json",
"outDir": "dist/src",
"watchAssets": true
}
]
}
Expand Down
326 changes: 0 additions & 326 deletions src/services/negative-news/negative-news.impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,332 +45,6 @@ const topicRiskScoreConfig = {
"illegal activities": 4
};

/*
params_classify = GenerateParams(decoding_method="greedy")
#params = GenerateParams(
#decoding_method="sample",
#max_new_tokens=10,
#min_new_tokens=1,
#stream=False,
#temperature=0.7,
#top_k=50,
#top_p=1,
#)
#genai_model = Model(model="google/flan-ul2", params=params, credentials=creds)
langchain_model_classify = LangChainInterface(model="google/flan-ul2", params=params_classify, credentials=creds)
params_summary = GenerateParams(decoding_method="greedy", repetition_penalty=2, min_new_tokens=80, max_new_tokens=200)
langchain_model_summary = LangChainInterface(model="google/flan-ul2", params=params_summary, credentials=creds)
*/

/*
data = search_func(query, num_results,api_key)
valid_url_details, bad_url_details = validate_urls(data)
report_bad_urls(bad_url_details)
scraped_news = scrape_func(valid_url_details, char_size)
neg_news, pos_news = check_neg_news(scraped_news,langchain_model_classify)
report_pos_news(pos_news,langchain_model_summary)
tp,fp = apply_filters(neg_news,langchain_model_classify,subject_name)
report_fp(fp,langchain_model_summary)
report_tp(tp,langchain_model_summary)
final_conclusion(tp,fp, pos_news, subject_name, num_results)
st.success("Done!")
*/

/*
def search_func(query,num_results,api_key):
client = ScrapeitCloudClient(api_key)
try:
params = {
"q": query,
"gl": "us",
"hl": "en",
#"domain": "google.co.uk",
"num": num_results,
"tbm": "nws",
#"tbs": "qdr:y"
}
response = client.scrape(params)
data = response.json()
data = data['newsResults']
write_list("data.json", data)
r_data = read_list("data.json")
#r_data = read_list("data_UT.json")
return r_data
except Exception as e:
print(f"Error occurred: {e}")
def validate_urls(data):
valid_url_details = []
bad_url_details = []
for x in range(len(data)):
title = data[x]['title']
URL = data[x]['link']
snippet = data[x]['snippet']
publish_date = data[x]['date']
n=0
try:
response = requests.get(URL,timeout = (10, 10))
n=1
except requests.exceptions.Timeout:
n=2
except requests.exceptions.RequestException as e:
#print("An error occurred:", e)
n=3
if n == 1:
valid_news_ll = [title, URL, snippet, publish_date]
valid_url_details.append(valid_news_ll)
elif n == 2:
invalid_news_ll = [title, URL, snippet, publish_date,'TimeOut']
bad_url_details.append(invalid_news_ll)
elif n == 3:
invalid_news_ll = [title, URL, snippet, publish_date,'OtherError']
bad_url_details.append(invalid_news_ll)
else:
pass
return valid_url_details, bad_url_details
def report_bad_urls(bad_url_details):
write_list("bad_url.json", bad_url_details)
def scrape_func(valid_url_details, char_size):
scraped_news = []
r_bad_url = read_list("bad_url.json")
for x in range(len(valid_url_details)):
title = valid_url_details[x] [0]
URL = valid_url_details[x][1]
snippet = valid_url_details[x][2]
publish_date = valid_url_details[x][3]
url=[URL]
loader = UnstructuredURLLoader(urls=url)
sdata=loader.load()
sdata = sdata[0].page_content
if sdata == "Please enable JS and disable any ad blocker":
bad_url_ll=[title,URL,snippet, publish_date,"Blocking WebSites"]
r_bad_url.append(bad_url_ll)
else:
scraped_news_ll=[title,URL,snippet,publish_date,sdata[0:char_size]]
scraped_news.append(scraped_news_ll)
write_list("scraped_news.json", scraped_news)
write_list("bad_url.json", r_bad_url)
return scraped_news
def check_neg_news(scraped_news,langchain_model):
neg_news = []
pos_news = []
r_topic_config = read_list("topic_risk_score_config.json")
topic_ll = list(r_topic_config.keys())
topic_prompt = ", ".join(topic_ll)
#print(topic_prompt)
for x in range(len(scraped_news)):
context = scraped_news[x][4]
langchain_model = langchain_model
neg_news_instr = f"From the context provided identify if there is any negetive news or news related to {topic_prompt} etc present or not. Provide a truthful answer in yes or no"
seed_pattern = PromptPattern.from_str(neg_news_instr+" : {{context}}")
template = seed_pattern.langchain.as_template()
#pattern = PromptPattern.langchain.from_template(template)
#print("")
#print("")
#print("")
response = langchain_model(template.format(context=context))
if response == 'yes':
news_topic = []
for i in range(len(topic_ll)):
indv_topic_prompt = topic_ll[i]
#topic_instr1 = f"From the context provided about news item can you suggest which of the following topics is this news related to ? {topic_prompt}"
topic_instr1 = f"From the context provided about news item can you suggest this news related to {indv_topic_prompt} or not. Provide a truthful answer in yes or no"
seed_pattern = PromptPattern.from_str(topic_instr1+" : {{context}}")
template = seed_pattern.langchain.as_template()
response = langchain_model(template.format(context=context))
if response == 'yes':
response = indv_topic_prompt
#print(response)
news_topic.append(response)
scraped_news[x].append(news_topic)
neg_news.append(scraped_news[x])
elif response == 'no':
pos_news.append(scraped_news[x])
return neg_news, pos_news
def report_pos_news(pos_news,langchain_model):
pos_news_results = []
langchain_model = langchain_model
seed_pattern = PromptPattern.from_str("Summarize the text in 2 or 3 sentences : {{text}}")
template = seed_pattern.langchain.as_template()
#pattern = PromptPattern.langchain.from_template(template)
for x in range(len(pos_news)) :
text = pos_news[x][4]
response = langchain_model(template.format(text=text))
summary = response.rstrip(".")
pos_news_results_ll = [pos_news[x][1],pos_news[x][3],summary]
pos_news_results.append(pos_news_results_ll)
write_list("pos_news_results.json", pos_news_results)
def apply_filters(neg_news,langchain_model, subject_name):
tp = []
fp = []
r_filter = read_list("filter.json")
langchain_model = langchain_model
for x in range(len(neg_news)):
if len(r_filter) == 0:
subject_name = subject_name
instr1 = f"From the news text provided identify if the person {subject_name} is mentioned anywhere in the text. Provide a truthful answer in yes or no. If not sure then say not sure"
text = neg_news[x][4]
seed_pattern = PromptPattern.from_str(instr1+" : {{text}}")
template = seed_pattern.langchain.as_template()
response1 = langchain_model(template.format(text=text))
response2 = 'yes'
response3 = 'yes'
response4 = 'yes'
if (response1 == "yes"):
neg_news[x].extend([response1,response2,response3,response4])
tp.append(neg_news[x])
else:
neg_news[x].extend([response1,response2,response3,response4])
fp.append(neg_news[x])
else:
location = r_filter[0]
subject_name = subject_name
dob = r_filter[1]
dob_date = datetime.strptime(dob, '%b %Y')
#print(dob_date)
today = date.today()
age = today - dob_date.date()
age_yrs = round((age.days+age.seconds/86400)/365.2425)
#print(age_yrs)
instr1 = f"From the news text provided identify if the person {subject_name} is mentioned anywhere in the text. Provide a truthful answer in yes or no. If not sure then say not sure"
instr2 = f"From the news text provided identify if there is any mention of {location} anywhere in the text. Provide a truthful answer in yes or no. If not sure then say not sure"
instr3 = f"From the news text provided identify if there is any mention of {dob_date} anywhere in the text. Provide a truthful answer in yes or no. If not sure then say not sure"
instr4 = f"From the news text provided identify if the age of {subject_name} is nearly around {age_yrs} years or so. Provide a truthful answer in yes or no. If not sure then say not sure"
text = neg_news[x][4]
seed_pattern = PromptPattern.from_str(instr1+" : {{text}}")
template = seed_pattern.langchain.as_template()
response1 = langchain_model(template.format(text=text))
seed_pattern = PromptPattern.from_str(instr2+" : {{text}}")
template = seed_pattern.langchain.as_template()
response2 = langchain_model(template.format(text=text))
seed_pattern = PromptPattern.from_str(instr3+" : {{text}}")
template = seed_pattern.langchain.as_template()
response3 = langchain_model(template.format(text=text))
seed_pattern = PromptPattern.from_str(instr4+" : {{text}}")
template = seed_pattern.langchain.as_template()
response4 = langchain_model(template.format(text=text))
if (response1 == "yes") and (response2 == "yes") and ((response3 == "yes") or (response4 == "yes")):
vmatch = 1
neg_news[x].extend([response1,response2,response3,response4])
tp.append(neg_news[x])
else:
vmmatch = 0
neg_news[x].extend([response1,response2,response3,response4])
fp.append(neg_news[x])
return tp, fp
def report_fp(fp,langchain_model):
fp_results=[]
langchain_model = langchain_model
seed_pattern = PromptPattern.from_str("Summarize the text in 2 or 3 sentences : {{text}}")
template = seed_pattern.langchain.as_template()
#pattern = PromptPattern.langchain.from_template(template)
for x in range(len(fp)) :
text = fp[x][4]
response = langchain_model(template.format(text=text))
summary = response.rstrip(".")
fp_results_ll = [fp[x][1],fp[x][3],summary,fp[x][5],fp[x][6],fp[x][7],fp[x][8],fp[x][9]]
fp_results.append(fp_results_ll)
write_list("fp_results.json", fp_results)
def report_tp(tp,langchain_model):
tp_results=[]
langchain_model = langchain_model
seed_pattern = PromptPattern.from_str("Summarize the text in 2 or 3 sentences : {{text}}")
template = seed_pattern.langchain.as_template()
#pattern = PromptPattern.langchain.from_template(template)
for x in range(len(tp)) :
text = tp[x][4]
response = langchain_model(template.format(text=text))
summary = response.rstrip(".")
tp_results_ll = [tp[x][1],tp[x][3],summary,tp[x][5],tp[x][6],tp[x][7],tp[x][8],tp[x][9]]
tp_results.append(tp_results_ll)
write_list("tp_results.json", tp_results)
def final_conclusion(tp,fp, pos_news,subject_name, num_results):
neg_news_conclusion = []
cpos = len(pos_news)
ctp = len(tp)
cfp = len(fp)
bad_url_details = read_list("bad_url.json")
cbadurl = len(bad_url_details)
conclusion_text_general = "Total News Screened: "+str(num_results)+" Neg-News-"+str(ctp)+" Un-related News-"+str(cfp)+" Non-Neg News-"+str(cpos)+" Bad-Url-"+str(cbadurl)+" "
neg_news_conclusion.append(conclusion_text_general)
tp_topic_unique = []
for x in range(len(tp)) :
tp_topic_unique.extend(tp[x][5])
fp_topic_unique = []
for x in range(len(fp)) :
fp_topic_unique.extend(fp[x][5])
l1 = list(set(tp_topic_unique))
l2 = list(set(fp_topic_unique))
l1str = ", ".join(l1)
l2str = ", ".join(l2)
if len(l1) > 0:
conclusion_text_topic_tp = "Screening process has found "+ str(ctp) + " Negative news. Topics identified are - "+l1str +". "
else:
conclusion_text_topic_tp = ""
if len(l2) > 0:
conclusion_text_topic_fp = "Screening process has found "+ str(cfp) + " unrelated -ve news. Topics identified are - "+l2str +"."
else:
conclusion_text_topic_fp = ""
conclusion_text_topic = conclusion_text_topic_tp + conclusion_text_topic_fp
neg_news_conclusion.append(conclusion_text_topic_tp)
neg_news_conclusion.append(conclusion_text_topic_fp)
if len(tp) > 0:
conclusion_text = "The screening process has found that there are Negative News present about "+subject_name +". Initiate L2 level Screening."
neg_news_conclusion.append(conclusion_text)
elif len(fp) > 0:
conclusion_text = "Even if the screening process has found that there are Negative News present but those seems not related to "+subject_name +". Further Manual Screening is recommended."
neg_news_conclusion.append(conclusion_text)
else:
conclusion_text = "There are No Negative News found about "+subject_name +"."
neg_news_conclusion.append(conclusion_text)
write_list("neg_news_conclusion.json", neg_news_conclusion)
*/

interface ScrapeitResponse {
searchInformation: {
totalResults: string;
Expand Down
Loading

0 comments on commit 8398d89

Please sign in to comment.