Skip to content

Commit

Permalink
Moves test folder outside of src fso coverage does not include testin…
Browse files Browse the repository at this point in the history
…g for testing
  • Loading branch information
Kelli Scheuble committed Jun 17, 2020
1 parent a871d45 commit d088716
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 167 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pytest-cov = "*"
fastapi = "*"
uvicorn = "*"
gunicorn = "*"
psycopg2 = "*"

[requires]
python_version = "3.7"
39 changes: 29 additions & 10 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

166 changes: 83 additions & 83 deletions src/app/extract.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
import logging
import os
import requests
import pandas as pd
from flatten_dict import flatten
import psycopg2
from datetime import date

import os

logging.basicConfig(level=logging.INFO, format="%(asctime)s:%(name)s:%(message)s")

app_id = os.environ["APP_ID"]
api_key = os.environ["API_KEY"]

DB_NAME = os.environ["DB_NAME"]
DB_USER = os.environ["DB_USER"]
DB_PASSWORD = os.environ["DB_PASSWORD"]
DB_HOST = os.environ["DB_HOST"]
# credentials for monster postgres
# DB_NAME = os.environ["DB_NAME"]
# DB_USER = os.environ["DB_USER"]
# DB_PASSWORD = os.environ["DB_PASSWORD"]
# DB_HOST = os.environ["DB_HOST"]

# This is where we can define the titles that we want to search for
main_titles = [
Expand Down Expand Up @@ -61,10 +61,10 @@ def adzuna():
appended_results = list()

for title in main_titles:
logging.info("="*20)

logging.info("=" * 20)
logging.info(f"Adzuna request for {title}:")

request = requests.get(
"https://api.adzuna.com/v1/api/jobs/us/search/1",
params={
Expand Down Expand Up @@ -194,9 +194,7 @@ def jobsearcher():

offset += 100
result = request.json()

logging.info(f"{result}")


x = len(result["data"])
# flatten nested objects
flattened_results = [
Expand Down Expand Up @@ -256,74 +254,76 @@ def jobsearcher():
return df



def monster_scraper():
try:
today = print(date.today())
connection = psycopg2.connect(dbname='DB_NAME',
user='DB_USER',
password='DB_PASSWORD',
host='DB_HOST')
print("CONNECTION:", connection)
cursor = connection.cursor()
print(connection.get_dsn_parameters(),"\n")

cursor.execute("SELECT version();")
record = cursor.fetchone()
print("you are connected to - ", record,"\n")

query = """
SELECT
job_listings.id,
"post_date_utc",
"title",
"city",
"state_province",
"external_url",
job_descriptions.description,
companies.name
FROM
job_listings
FULL OUTER JOIN job_locations ON job_listings.id = job_locations.job_id
FULL OUTER JOIN locations ON job_locations.location_id = locations.id
FULL OUTER JOIN job_links ON job_listings.id = job_links.job_id
FULL OUTER JOIN job_descriptions ON job_listings.id = job_descriptions.job_id
FULL OUTER JOIN job_companies ON job_listings.id = job_companies.job_id
FULL OUTER JOIN companies ON job_companies.company_id = companies.id
WHERE
"post_date_utc" > '{today}'
ORDER BY
"post_date_utc" ASC
""".format(today = str(date.today()))

cursor.execute(query)
result = cursor.fetchall()
print("RESULT:", len(result))

job_list = []
for row in result:
list(row)
for x in row:
x = {"id": row[0],
"publication_date": row[1],
"title": row[2],
"title_keyword": row[2],
"city": row[3],
"state": row[4],
"post_url": row[5],
"description": row[6],
"company": row[7]}
job_list.append(x)

df = pd.DataFrame.from_dict(job_list)
df["id"] = df["id"].apply(lambda x: "MS" + str(x))

return df

except (Exception, psycopg2.Error) as error:
print("Error while connecting to PostgreSQL", error)
finally:
if(connection):
cursor.close()
connection.close()
print("PostgreSQL connection is closed")
# def monster_scraper():
# try:
# today = print(date.today())
# connection = psycopg2.connect(
# dbname="DB_NAME", user="DB_USER", password="DB_PASSWORD", host="DB_HOST"
# )
# print("CONNECTION:", connection)
# cursor = connection.cursor()
# print(connection.get_dsn_parameters(), "\n")

# cursor.execute("SELECT version();")
# record = cursor.fetchone()
# print("you are connected to - ", record, "\n")

# query = """
# SELECT
# job_listings.id,
# "post_date_utc",
# "title",
# "city",
# "state_province",
# "external_url",
# job_descriptions.description,
# companies.name
# FROM
# job_listings
# FULL OUTER JOIN job_locations ON job_listings.id = job_locations.job_id
# FULL OUTER JOIN locations ON job_locations.location_id = locations.id
# FULL OUTER JOIN job_links ON job_listings.id = job_links.job_id
# FULL OUTER JOIN job_descriptions ON job_listings.id = job_descriptions.job_id
# FULL OUTER JOIN job_companies ON job_listings.id = job_companies.job_id
# FULL OUTER JOIN companies ON job_companies.company_id = companies.id
# WHERE
# "post_date_utc" > '{today}'
# ORDER BY
# "post_date_utc" ASC
# """.format(
# today=str(date.today())
# )

# cursor.execute(query)
# result = cursor.fetchall()
# print("RESULT:", len(result))

# job_list = []
# for row in result:
# list(row)
# for x in row:
# x = {
# "id": row[0],
# "publication_date": row[1],
# "title": row[2],
# "title_keyword": row[2],
# "city": row[3],
# "state": row[4],
# "post_url": row[5],
# "description": row[6],
# "company": row[7],
# }
# job_list.append(x)

# df = pd.DataFrame.from_dict(job_list)
# df["id"] = df["id"].apply(lambda x: "MS" + str(x))

# return df

# except (Exception, psycopg2.Error) as error:
# print("Error while connecting to PostgreSQL", error)
# finally:
# if connection:
# cursor.close()
# connection.close()
# print("PostgreSQL connection is closed")
2 changes: 1 addition & 1 deletion src/app/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ def gendata(df):
def query(df):

print(df)
#print(bulk(es, gendata(df)))
# print(bulk(es, gendata(df)))
23 changes: 13 additions & 10 deletions src/app/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from .extract import adzuna, jobsearcher, monster_scraper
from .extract import adzuna, jobsearcher #, monster_scraper
from .transform import transform_df
from .load import query

Expand All @@ -16,31 +16,34 @@
allow_headers=["*"],
)


@app.get("/")
async def root():
"""
Kondoboard Cron Deployed
"""
return HTMLResponse("""
return HTMLResponse(
"""
<h1>Kondoboard Cron Deployed</h1>
<p>Go to <a href="/docs">/docs</a> for documentation.</p>
""")
"""
)


@app.get("/start")
def start_upload(): #async
def start_upload(): # async
"""
Start the cron task to upload new jobs to the elasticsearch database
"""
df_adzuna = adzuna()
df_jobsearcher = jobsearcher()
df_monster = monster_scraper()
#df_monster = monster_scraper()

transformed_adzuna = transform_df(df_adzuna)
transformed_jobsearcher = transform_df(df_jobsearcher)
transformed_monstser = transform_df(df_monster)
#transformed_monstser = transform_df(df_monster)

query(transformed_adzuna)
query(transformed_jobsearcher)
query(transformed_monstser)
return 'Cron job complete'
#query(transformed_monstser)
return "Cron job complete"
Loading

0 comments on commit d088716

Please sign in to comment.