Skip to content

Commit

Permalink
Add Wikipedia Configuration Example
Browse files Browse the repository at this point in the history
  • Loading branch information
vasgat committed Jun 27, 2017
1 parent 0fe48ac commit 7be267c
Showing 1 changed file with 51 additions and 0 deletions.
51 changes: 51 additions & 0 deletions configuration_files/wikipedia_table.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"url": {
"base_url": "https://en.wikipedia.org",
"relative_url": "/wiki/List_of_largest_companies_by_revenue"
},
"table_selector": ".wikitable > tbody > tr",
"source_name": "wikipedia",
"company_info": [
{
"label": "company_name",
"value": {
"selector": "td:nth-child(2)",
"type": "text"
}
},
{
"label": "industry",
"value": {
"selector": "td:nth-child(3)",
"type": "text"
}
},
{
"label": "country",
"value": {
"selector": "td:nth-child(7) > a",
"type": "text"
}
}
],
"metrics": [
{
"label": "Revenue",
"value": {
"selector": "td:nth-child(4)",
"type": "numerical",
"replace":{
"regex":["^"],
"with":["Millions of "]
}
}
}
],
"store": {
"format": "COMPANY_METRIC",
"database": "WR_test_db",
"companies_collection": "companies",
"metrics_collection": "metrics"
},
"dynamic_page": false
}

0 comments on commit 7be267c

Please sign in to comment.