-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawler_baidugupiao.py
54 lines (43 loc) · 1.68 KB
/
crawler_baidugupiao.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# coding:utf-8
# 引入相关模块
import requests
from bs4 import BeautifulSoup
import sys
import requests
import MySQLdb
import time
reload(sys)
sys.setdefaultencoding('utf-8')
today = time.strftime("%Y-%m-%d")
#conn = MySQLdb.connect(host='localhost', user='root', passwd='123456', db='crawler', port=3306, use_unicode=True, charset="utf8")
conn = MySQLdb.connect(host='localhost', user='root', passwd='123456', db='crawler', port=3306, use_unicode=True, charset="utf8")
cur = conn.cursor()
def get_stock_price(stock_id):
url = requests.get("https://gupiao.baidu.com/stock/" + stock_id + ".html?from=aladingpc")
url.encoding = 'utf-8'
# 请求URL,获取其text文本
wbdata = url.text
# 对获取到的文本进行解析
soup = BeautifulSoup(wbdata, 'lxml')
# 从解析文件中通过select选择器定位指定的元素,返回一个列表
news_titles = soup.select(".line1 dl dt")
news_value = soup.select(".line1 dl dd")
for i in range(len(news_titles)):
#print news_titles[i].get_text(), news_value[i].get_text()
cur.execute("insert into crawler.stock_pd_price(stock_no,stock_index,index_value,date) values(%s,%s,%s,%s)",
(stock_id, news_titles[i].get_text(),news_value[i].get_text(), today))
conn.commit()
if __name__ == '__main__':
count = cur.execute('select stock_id from crawler.stock_list')
print count
result = cur.fetchmany(count)
for i in range(count):
try:
print 'running ',i,result[i]
get_stock_price(result[i][0])
except:
print 'error ',i,result[i]
continue
cur.close()
conn.commit()
conn.close()