-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsmsbrowser.py
37 lines (34 loc) · 1.19 KB
/
smsbrowser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#strips images, Jav applets, and Javascript from the page, just html baby
#then changes all links in html to deep links so our app can make SMS requests
from urllib.request import urlopen
from bs4 import BeautifulSoup as soup
import sys
def getPage(url): #gets the raw html of the page we want
scrapeClient = urlopen(url)
searchPage_html = scrapeClient.read()
scrapeClient.close()
return searchPage_html
def ballsDeepLinking(soup):
for link in soup.find_all('a'):
location = link['href']
try:
title = link.string
except:
try:
title = link['title']
except:
title = "NA"
link = link.replace_with('<a href="http://smsinternet.tech/smsbrowse?{}">{}</a>'.format(location, title))
#link = link.replace_with('gay')
#for link in soup.find_all('a', href=True):
#print("a" + link)
return soup
#main
if __name__ == "__main__":
page = getPage(sys.argv[1])
parsedPage = soup(page, "html.parser") #parse html
parsedPage = ballsDeepLinking(parsedPage) #turn regular links into deep links
parsedPage = parsedPage.prettify(formatter=None)
with open("./public/currentpage.html", "w") as f:
f.write(str(parsedPage)) #write html to file
print("browser", end="")