Moves test folder outside of src fso coverage does not include testin…

…g for testing
kellischeuble · Jun 17, 2020 · d088716 · d088716
1 parent a871d45
commit d088716
Show file tree

Hide file tree

Showing 11 changed files with 195 additions and 167 deletions.
diff --git a/Pipfile b/Pipfile
@@ -21,6 +21,7 @@ pytest-cov = "*"
 fastapi = "*"
 uvicorn = "*"
 gunicorn = "*"
+psycopg2 = "*"
 
 [requires]
 python_version = "3.7"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/src/app/extract.py b/src/app/extract.py
@@ -1,21 +1,21 @@
 import logging
+import os
 import requests
 import pandas as pd
 from flatten_dict import flatten
 import psycopg2
 from datetime import date
 
-import os
-
 logging.basicConfig(level=logging.INFO, format="%(asctime)s:%(name)s:%(message)s")
 
 app_id = os.environ["APP_ID"]
 api_key = os.environ["API_KEY"]
 
-DB_NAME = os.environ["DB_NAME"]
-DB_USER = os.environ["DB_USER"]
-DB_PASSWORD = os.environ["DB_PASSWORD"]
-DB_HOST = os.environ["DB_HOST"]
+# credentials for monster postgres
+# DB_NAME = os.environ["DB_NAME"]
+# DB_USER = os.environ["DB_USER"]
+# DB_PASSWORD = os.environ["DB_PASSWORD"]
+# DB_HOST = os.environ["DB_HOST"]
 
 # This is where we can define the titles that we want to search for
 main_titles = [
@@ -61,10 +61,10 @@ def adzuna():
     appended_results = list()
 
     for title in main_titles:
-        
-        logging.info("="*20)
+
+        logging.info("=" * 20)
         logging.info(f"Adzuna request for {title}:")
-        
+
         request = requests.get(
             "https://api.adzuna.com/v1/api/jobs/us/search/1",
             params={
@@ -194,9 +194,7 @@ def jobsearcher():
 
             offset += 100
             result = request.json()
-
-            logging.info(f"{result}")
-
+
             x = len(result["data"])
             # flatten nested objects
             flattened_results = [
@@ -256,74 +254,76 @@ def jobsearcher():
     return df
 
 
-
-def monster_scraper():
-    try:
-        today = print(date.today())
-        connection = psycopg2.connect(dbname='DB_NAME',
-                                     user='DB_USER',
-                                     password='DB_PASSWORD',
-                                     host='DB_HOST')
-        print("CONNECTION:", connection)
-        cursor = connection.cursor()
-        print(connection.get_dsn_parameters(),"\n")
-
-        cursor.execute("SELECT version();")
-        record = cursor.fetchone()
-        print("you are connected to - ", record,"\n")
-
-        query = """
-        SELECT
-    	    job_listings.id,
-    	    "post_date_utc",
-    	    "title",
-    	    "city",
-    	    "state_province",
-    	    "external_url",
-    	    job_descriptions.description,
-    	    companies.name
-        FROM
-    	    job_listings
-    	    FULL OUTER JOIN job_locations ON job_listings.id = job_locations.job_id
-    	    FULL OUTER JOIN locations ON job_locations.location_id = locations.id
-    	    FULL OUTER JOIN job_links ON job_listings.id = job_links.job_id
-    	    FULL OUTER JOIN job_descriptions ON job_listings.id = job_descriptions.job_id
-    	    FULL OUTER JOIN job_companies ON job_listings.id = job_companies.job_id
-    	    FULL OUTER JOIN companies ON job_companies.company_id = companies.id
-        WHERE
-    	    "post_date_utc" > '{today}'
-        ORDER BY
-    	    "post_date_utc" ASC
-            """.format(today = str(date.today()))
-
-        cursor.execute(query)
-        result = cursor.fetchall()
-        print("RESULT:", len(result))
-
-        job_list = []
-        for row in result:
-            list(row)
-            for x in row:
-                x = {"id": row[0],
-                    "publication_date": row[1],
-                    "title": row[2],
-                    "title_keyword": row[2],
-                    "city": row[3],
-                    "state": row[4],
-                    "post_url": row[5],
-                    "description": row[6],
-                    "company": row[7]}
-            job_list.append(x)
-
-        df = pd.DataFrame.from_dict(job_list)
-        df["id"] = df["id"].apply(lambda x: "MS" + str(x))
-
-        return df
-
-    except (Exception, psycopg2.Error) as error:
-        print("Error while connecting to PostgreSQL", error)
-    finally:
-        if(connection):
-            cursor.close()
-            connection.close()
-            print("PostgreSQL connection is closed")
+# def monster_scraper():
+#     try:
+#         today = print(date.today())
+#         connection = psycopg2.connect(
+#             dbname="DB_NAME", user="DB_USER", password="DB_PASSWORD", host="DB_HOST"
+#         )
+#         print("CONNECTION:", connection)
+#         cursor = connection.cursor()
+#         print(connection.get_dsn_parameters(), "\n")
+
+#         cursor.execute("SELECT version();")
+#         record = cursor.fetchone()
+#         print("you are connected to - ", record, "\n")
+
+#         query = """
+#         SELECT
+#     	    job_listings.id,
+#     	    "post_date_utc",
+#     	    "title",
+#     	    "city",
+#     	    "state_province",
+#     	    "external_url",
+#     	    job_descriptions.description,
+#     	    companies.name
+#         FROM
+#     	    job_listings
+#     	    FULL OUTER JOIN job_locations ON job_listings.id = job_locations.job_id
+#     	    FULL OUTER JOIN locations ON job_locations.location_id = locations.id
+#     	    FULL OUTER JOIN job_links ON job_listings.id = job_links.job_id
+#     	    FULL OUTER JOIN job_descriptions ON job_listings.id = job_descriptions.job_id
+#     	    FULL OUTER JOIN job_companies ON job_listings.id = job_companies.job_id
+#     	    FULL OUTER JOIN companies ON job_companies.company_id = companies.id
+#         WHERE
+#     	    "post_date_utc" > '{today}'
+#         ORDER BY
+#     	    "post_date_utc" ASC
+#             """.format(
+#             today=str(date.today())
+#         )
+
+#         cursor.execute(query)
+#         result = cursor.fetchall()
+#         print("RESULT:", len(result))
+
+#         job_list = []
+#         for row in result:
+#             list(row)
+#             for x in row:
+#                 x = {
+#                     "id": row[0],
+#                     "publication_date": row[1],
+#                     "title": row[2],
+#                     "title_keyword": row[2],
+#                     "city": row[3],
+#                     "state": row[4],
+#                     "post_url": row[5],
+#                     "description": row[6],
+#                     "company": row[7],
+#                 }
+#             job_list.append(x)
+
+#         df = pd.DataFrame.from_dict(job_list)
+#         df["id"] = df["id"].apply(lambda x: "MS" + str(x))
+
+#         return df
+
+#     except (Exception, psycopg2.Error) as error:
+#         print("Error while connecting to PostgreSQL", error)
+#     finally:
+#         if connection:
+#             cursor.close()
+#             connection.close()
+#             print("PostgreSQL connection is closed")
diff --git a/src/app/load.py b/src/app/load.py
@@ -55,4 +55,4 @@ def gendata(df):
 def query(df):
 
     print(df)
-    #print(bulk(es, gendata(df)))
+    # print(bulk(es, gendata(df)))
diff --git a/src/app/main.py b/src/app/main.py
@@ -1,7 +1,7 @@
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse
 from fastapi.middleware.cors import CORSMiddleware
-from .extract import adzuna, jobsearcher, monster_scraper
+from .extract import adzuna, jobsearcher #, monster_scraper
 from .transform import transform_df
 from .load import query
 
@@ -16,31 +16,34 @@
     allow_headers=["*"],
 )
 
+
 @app.get("/")
 async def root():
     """
     Kondoboard Cron Deployed
     """
-    return HTMLResponse("""
+    return HTMLResponse(
+        """
     <h1>Kondoboard Cron Deployed</h1>
     <p>Go to <a href="/docs">/docs</a> for documentation.</p>
-    """)
+    """
+    )
 
 
 @app.get("/start")
-def start_upload(): #async 
+def start_upload():  # async
     """
     Start the cron task to upload new jobs to the elasticsearch database
     """
     df_adzuna = adzuna()
     df_jobsearcher = jobsearcher()
-    df_monster = monster_scraper()
-    
+    #df_monster = monster_scraper()
+
     transformed_adzuna = transform_df(df_adzuna)
     transformed_jobsearcher = transform_df(df_jobsearcher)
-    transformed_monstser = transform_df(df_monster)
-    
+    #transformed_monstser = transform_df(df_monster)
+
     query(transformed_adzuna)
     query(transformed_jobsearcher)
-    query(transformed_monstser)
-    return 'Cron job complete'
+    #query(transformed_monstser)
+    return "Cron job complete"