-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchromespider.py
40 lines (33 loc) · 1.21 KB
/
chromespider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os,time
from lxml import etree
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import argparse
from argparse import ArgumentDefaultsHelpFormatter
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.binary_location = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
chrome_options.add_argument('--disable-gpu')
chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.set_page_load_timeout(200)
parser = argparse.ArgumentParser(prog="Spider", description='parse arg', formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('-u', dest='url', action='store', required=True, help='the file url')
print parser.parse_args()
url = parser.parse_args().url
try:
chrome.get(url)
print chrome.title
elem =chrome.find_element_by_id("btnDl")
elem.click()
time.sleep(5)
elem2 = chrome.find_element_by_id("downloadTimer")
elem2.click()
content = chrome.page_source
tree = etree.HTML(content)
download_url = tree.xpath("//span[@id='streamurl']/text()")
print download_url
chrome.quit()
except Exception as e:
chrome.quit()
print e