-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy pathpastebinscrape.py
85 lines (71 loc) · 3.78 KB
/
pastebinscrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
import requests, time
from googlesearch import search
from lxml import html
class Pastebinscrape():
"""right now this just google dorks a supplied arg for site:pastebin.com
need to implement scraping api http://pastebin.com/api_scraping_faq
that would necessitate a more ongoing program however, not one-off usage of autosint
scraping url is here http://pastebin.com/api_scraping.php"""
def run(self, args, lookup, reportDir, apiKeyDir):
#set a UA
userAgent = {'User-agent': 'Mozilla/5.0'}
#defaults and init
paste_scrape_url = []
paste_scrape_content = []
paste_scrape_results =[]
dorks=args.dorks
scrape_url = []
scrape_content = []
#iterate the lookup list
for i, l in enumerate(lookup):
for d in dorks:
#init textfiles
scrapedFile=open(reportDir+l+'/'+l+'_pastebin_content.txt','w')
pasteUrlFile=open(reportDir+l+'/'+l+'_pastebin_urls.txt','w')
#show user whiat is being searched
print('[+] Searching Pastebin via Google for public pastes containing {}'.format(l))
#print('[i] May require a Pastebin Pro account for IP whitelisting')
#run google query code
try:
#iterate url results from search of dork arg and supplied lookup value against pastebin. return top 20 hits
for url in search(str(d) +' '+ str(l) + ' site:pastebin.com', stop=20):
#delay 2s to be polite
time.sleep(2)
#append results together
scrape_url.append(url)
if args.verbose is True:
print('[+] Paste containing "{}" and "{}" found at: {}'.format(d,l,url))
except Exception as e:
print('[-] Error dorking pastebin URLs: {}, skipping...'.format(e))
paste_scrape_results.append('Error scraping Pastebin')
continue
#ok, urls matching the dork found. what's in the paste? im certain this could be VASTLY improved
for u in scrape_url:
#http://docs.python-guide.org/en/latest/scenarios/scrape/
try:
page = requests.get(u, headers=userAgent)
pasteUrlFile.writelines(u+'\n')
paste_scrape_results.append(u+'\n')
except:
print('[-] Error opening ' + u +':')
paste_scrape_results.append('Error opening {}'.format(u))
continue
#build html tree
tree = html.fromstring(page.content)
#if verbose spit out url, search term and domain searched
if args.verbose is True:
print('[+] Looking for instances of {} and {} in {}'.format(d,l,u))
#grab raw paste data from the textarea
rawPasteData = tree.xpath('//textarea[@class="paste_code"]/text()')
#search lines for lookup and keyword
for line in rawPasteData:
#regex for the lookup value (domain) in that line
#if re.search((str(l)), line):
if str(l) in line:
#if the argument search term is in the line
if d in line:
#print str(line)
scrapedFile.writelines(str(line.encode('utf8')))
#print paste_scrape_results
return paste_scrape_results