-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDianPing.py
45 lines (36 loc) · 1.32 KB
/
DianPing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from bs4 import BeautifulSoup
import HttpClient
import re
import AzureStorage
name="dianping"
key="/J3rUIw92KxHbgkyrlSg1ii4dH954IjEPSiK6bK0QtLKuuodMADinBDWrxOaMi5OiD4jRwQIiyjS6DYUAYOyPg=="
AzureStorage.Init(name, key)
AzureStorage.EnsureTable("topshops")
def GetTopShops(city):
ret = []
topUrlTemplete="http://dpindex.dianping.com/dpindex?category=10®ion=&type=rank&city=%d&p=%d"
for pIndex in range(1, 51):
ret += ParseShopInfo(HttpClient.Get(topUrlTemplete % (city, pIndex)))
for shop in ret:
shop["city"]=city
return ret
def ParseShopInfo(text):
ret = []
bsObj = BeautifulSoup(text)
for li in bsObj.findAll("li", {"class":re.compile("rank-item")}):
href = li.find("a")["href"]
id = int(re.search("\d+", href).group())
name = li.find("div", {"class":"field-name"}).text
rank = int(li.find("span", {"class":"ranknum"}).text)
ret.append({"id":id, "href":href, "name":name, "rank":rank})
return ret
def UpdateShop(shop):
AzureStorage.Update(shop['city'], '%d_%d' % (shop["city"], shop['rank']), shop)
def ListShops(city):
for entity in AzureStorage.Query("PartitionKey eq '%d'" % city):
id = entity["id"]
href = entity["href"]
name = entity["name"]
rank = entity["rank"]
city = entity["city"]
yield {"id":id, "href":href, "name":name, "rank":rank, "city":city}