-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathWebscrappingPyconCoSpeakers.py
49 lines (44 loc) · 1.67 KB
/
WebscrappingPyconCoSpeakers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Extract pycon.co speakers information
using web scrapping
https://www.pycon.co/speakers/
Created on Tue Mar 27 07:26:42 2018
@author: Jose R. Zapata
@contact: https://joserzapata.github.io/
"""
import requests
from bs4 import BeautifulSoup
import csv #para manipular archivos csv
#%% Download webpage
URL = 'https://2018.pycon.co'
page = requests.get(URL+'/speakers/')
contents = page.content
#%% Reading web info
Info = BeautifulSoup(contents, 'html.parser')
speakers = Info.find_all("div", "speaker")
file2write=open("Pycon2018SpeakersInfo.md",'w')
file2write.write("# Pycon 2018 Colombia Speakers Info"+'\n\n')
file2write.write("List of speakers and workshops in pycon 2018"+'\n\n')
file2write.write("| Name | Github | Presentation |"+'\n')
file2write.write("| --- | --- | ---"+ '\n')
for data in speakers:
name = data.find("div","speaker-name").getText()
links = data.find_all("a","social-hover",href=True)
if len(links)!=0:
github_link = [x['href'] for x in links if 'www.github' in x['href']]
# get the talk or workshop name
per = data.find("a","",href=True)
personal_info = requests.get(URL+per['href'])
content_personal = personal_info.content
per_info = BeautifulSoup(content_personal, 'html.parser')
talk = per_info.select('ul > li > a')[-1].getText()
talk = talk.replace('(Spanish)', '').replace('(English)', '').replace('Español', '')
if len(github_link) >= 1 :
file2write.write(f'{name} | [{github_link[0][23:]}]({github_link[0]}) | {talk}'+ '\n')
else:
file2write.write(f'{name} | | {talk}'+ '\n')
github_link = []
file2write.close()
print("Have a Nice day :)")