forked from hankangwen/MyCSDNData
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsdn_crawler.py
30 lines (24 loc) · 936 Bytes
/
csdn_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import datetime
import requests
import bs4
def get_data():
url = 'https://blog.csdn.net/qq_41999731'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36(KHTHL, like Gecko) Chrome/45.0.2454.101 Safari/537.36',
}
html_file = requests.get(url, headers=headers)
obj_soup = bs4.BeautifulSoup(html_file.text, 'html.parser')
result = []
names = obj_soup.select('div .user-profile-statistics-name')
numbers = obj_soup.select('div .user-profile-statistics-num')
for i in range(len(numbers)):
result.append("{}: {}".format(names[i].text, numbers[i].text))
now_time = datetime.datetime.now()
year = now_time.year
month = now_time.month
day = now_time.day
output = "\n{}_{}_{}\t {}".format(year, month, day, result)
with open("./csdndata/csdn_data.txt", mode="a") as f:
f.write(output)
if __name__ == '__main__':
get_data()