forked from MLH-Fellowship/Quinn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrands_scr.py
38 lines (30 loc) · 898 Bytes
/
brands_scr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import urllib.request
import re
brands = []
brands_list = []
driver = webdriver.Chrome(ChromeDriverManager().install())
url = "https://www.sephora.com/brands-list"
page = driver.get(url)
content = driver.page_source
soup = BeautifulSoup(content, 'html.parser')
main = soup.find('main', attrs={'class' : 'css-1vs8v5v'})
results = main.find_all('a', attrs={'class':'css-ekc7zl'})
for brand in results:
b = brand.getText()
brands.append(b)
for b in brands:
clean_b = re.sub(r'[^a-zA-Z ]', '', b)
brands_list.append(clean_b)
my_file = open('brands.txt', 'w')
my_file.write('Brands')
my_file.write('\n')
for ele in brands_list:
my_file.write(ele)
my_file.write('\n')
my_file.close()
if __name__ == '__main__':
print(brands_list)