-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprofile-stats.py
36 lines (30 loc) · 1.03 KB
/
profile-stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!env python
# -*- coding: utf-8 -*-
import os, sys, re, time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
if len(sys.argv) < 2:
print("Usage: " + sys.argv[0] + " WIKI_URL")
sys.exit(1)
options = Options()
options.headless = True
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.set_page_load_timeout(30)
driver.get(sys.argv[1])
# Recursive condition to expand all closed tree
more_pages = True
while more_pages:
try:
driver.find_element_by_class_name("more-link").click()
time.sleep(1)
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
except:
more_pages = False
soup = BeautifulSoup(driver.page_source,"lxml")
print(soup.prettify())
driver.quit()