Skip to content

Commit

Permalink
Merge pull request BloomTech-Labs#11 from Lambda-School-Labs/mapping_fix
Browse files Browse the repository at this point in the history
fixes nested doc object when loading objects into bulk api
  • Loading branch information
kellischeuble authored Jun 9, 2020
2 parents e99a19a + e1cdd0e commit 50a5ce6
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 13 deletions.
23 changes: 11 additions & 12 deletions etl/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,19 @@ def gendata(df):
"_op_type": "index",
"_index": "jobs",
"_id": row["id"],
"doc": {
"post_url": row["post_url"],
"title": row["title"],
"title_keyword": row["title_keyword"],
"tags": row["tags"],
"company": row["company"],
"description": row["description"],
"publication_date": row["publication_date"],
"location_city": row["city"],
"location_state": row["state"],
"location_point": f"{row['latitude']},{row['longitude']}",
},
"post_url": row["post_url"],
"title": row["title"],
"title_keyword": row["title_keyword"],
"tags": row["tags"],
"company": row["company"],
"description": row["description"],
"publication_date": row["publication_date"],
"location_city": row["city"],
"location_state": row["state"],
"location_point": f"{row['latitude']},{row['longitude']}",
}


def query(df):

print(bulk(es, gendata(df)))
3 changes: 3 additions & 0 deletions etl/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,7 @@ def transform_df(df):
for col in keyword_cols:
df[col] = df[col].apply(keyword)

for col in ["latitude", "longitude"]:
df[col] = df[col].fillna(0)

return df
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
pandas
arrow
beautifulsoup4
elasticsearch[async]
boto3
elasticsearch
requests
requests-aws4auth
flatten-dict

# test requirements
Expand Down

0 comments on commit 50a5ce6

Please sign in to comment.