giving all modules a once over

bharshbarger · Sep 19, 2017 · c9c5324 · c9c5324
1 parent 70905b3
commit c9c5324
Show file tree

Hide file tree

Showing 9 changed files with 191 additions and 163 deletions.
diff --git a/AutOSINT.py b/AutOSINT.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-"""A tool to automate some OSINT tasks"""
+"""A tool to automate some OSINT tasks and put results into a docx report"""
 #By @arbitrary_code
 #https://github.com/bharshbarger/AutOSINT
 
@@ -27,14 +27,14 @@
 from modules.credleaks import Credleaks
 from modules.pyfoca import Pyfoca
 from modules.webscrape import Scraper
-from resources.reportgen import Reportgen
+from modules.reportgen import Reportgen
 
 class Autosint:
     """autosint class"""
     def __init__(self, args, parser):
 
         #version
-        self.version = 'v2-09.18.17'
+        self.version = 'v2-09.19.17'
 
         #defaults
         self.lookup_list = []
@@ -99,7 +99,7 @@ def banner(self):
 /_/   \_\__,_|\__|\___/|____/___|_| \_| |_|\n''')
 
         if self.args.verbose is True:
-            print('AutOSINT.py {}: A way to automate various OSINT tasks\n'.format(self.version))
+            print('AutOSINT.py {}: A way to automate various OSINT tasks and place results into a docx\n'.format(self.version))
         if self.args.verbose is True:
             print(self.args)
 
@@ -120,7 +120,7 @@ def check_arguments(self):
                         os.makedirs(self.report_directory+l)
 
         if self.args.verbose is True:
-            print '[+] Lookup Values: '+', '.join(self.lookup_list)
+            print ('[+] Lookup Values: '+', '.join(self.lookup_list))
 
         #check for a supplied client name and exit if none provided
         if self.args.client is None:
@@ -135,15 +135,34 @@ def check_arguments(self):
 
     def run_queries(self):
         """invoke all the queries. assumption is that every run will want all data"""
+
+        #verified
         self.whois_result = self.whois_query_module.run(self.args, self.lookup_list, self.report_directory)
+
+        #verified
         self.dns_result = self.dns_query_module.run(self.args, self.lookup_list, self.report_directory)
+
+        #needs work
         self.haveibeenpwned_result = self.haveibeenpwned_api_module.run(self.args, self.lookup_list, self.report_directory)
+
+        #verified
         self.google_dork_result = self.google_dork_module.run(self.args, self.lookup_list, self.report_directory)
+
+        #verified
         self.shodan_query_result = self.shodan_search_module.run(self.args, self.lookup_list, self.report_directory, self.api_key_directory)
+
+        #verified
         self.pastebin_scrape_urls_result = self.pastebin_scrape_module.run(self.args, self.lookup_list, self.report_directory, self.api_key_directory)
+
+        #verified
         self.theharvester_module_result = self.theharvester_module.run(self.args, self.lookup_list, self.report_directory)
+
         self.cred_leak_search_result = self.cred_leaks_module.run(self.args, self.lookup_list, self.start_time, self.report_directory)
+
+        #needs work
         self.scrape_result = self.web_scraper_module.run(self.args, self.lookup_list, self.report_directory, self.api_key_directory)
+
+        #pyfoca has to be present
         self.pyfoca_module_result = self.pyfoca_module.run(self.args, self.lookup_list, self.report_directory)
 
     def report(self):

diff --git a/modules/googledork.py b/modules/googledork.py
@@ -12,47 +12,53 @@
 # https://stackoverflow.com/questions/4082966/what-are-the-alternatives-now-that-the-google-web-search-api-has-been-deprecated/11206266#11206266
 
 class Googledork():
+    def __init__(self):
+        self.google_result = []
+
+
 
     def run(self, args, lookup, reportDir):
-        #need a default dork list
+
+        self.args = args
 
         #C58EA28C-18C0-4a97-9AF2-036E93DDAFB3 is string for open OWA attachments, for example
         #init lists
-        googleResult = []
-        dorks = args.dorks
+
         #iterate the lookup list
         for i, l in enumerate(lookup):
-            for d in dorks:
+            for d in self.args.dorks:
 
-                googleResult.append('[i] Google query for: "%s site:%s"' % (str(d),str(l)))
+                #add header to result
+                self.google_result.append('[i] Google query for: "%s site:%s"' % (str(d),str(l)))
 
+                #open a file for each domain searched
                 googleFile=open(reportDir+l+'/'+l+'_google_dork.txt','w')
 
                 #show user whiat is being searched
                 print ('[+] Google query %s for %s site:%s' % (str(i + 1),str(d),str(l)))
+                print('[+] Results:')
 
                 try:
                     #iterate url results from search of password(for now) and site:current list value
-                    for url in search(str(dorks)+' site:'+str(l), stop = 20):
-
+                    for url in search(str(self.args.dorks)+' site:'+str(l), stop = 20):
                         #append results together
-                        googleResult.append(url)
+                        self.google_result.append(url)
 
-                        #rate limit to 1 per second
-                        time.sleep(1)
+                        #rate limit with 2 second delay
+                        time.sleep(2)
                 #catch exceptions
                 except Exception as e:
                     print ('[!] Error encountered: %s' % e)
                     pass
         #iterate results
-        for r in googleResult:
+        for r in self.google_result:
             #write results on newlines
             googleFile.writelines(r + '\r\n')
 
         #verbosity flag
-        if args.verbose is True:
-            for r in googleResult: print (''.join(r))
+        if self.args.verbose is True:
+            for r in self.google_result: print (''.join(r))
 
         #return results list
-        return googleResult
+        return self.google_result
 
diff --git a/modules/hibp.py b/modules/hibp.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python
-
+"""module to perform an API search against haveibeenpwned.com based on supplied domain"""
+#https://haveibeenpwned.com/API/v2
+#https://haveibeenpwned.com/Pastes/Latest
+# needs work...
 import json
 import requests
 
@@ -8,38 +11,40 @@
 requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
 
 class Haveibeenpwned():
+    """haveibeenpwned module class"""
+    def __init__(self):
+        self.haveibeenpwned_json_result = []
 
-
-    #https://haveibeenpwned.com/API/v2
-    #https://haveibeenpwned.com/Pastes/Latest
     def run(self, args, lookup, reportDir):
+        """main function"""
         userAgent = {'user-agent': 'Pwnage Checker for AutOSINT'}
 
-        for i,l in enumerate(lookup):
-            print '[+] Searching haveibeenpwned.com via API for %s' % l
-            scrapeFile=open(reportDir+l+'/'+l+'_haveibeenpwned.txt','w')
+        for i, l in enumerate(lookup):
+            print('[+] Searching haveibeenpwned.com via API for {}'.format(l))
+            scrapeFile=open(reportDir + l + '/' + l + '_haveibeenpwned.txt','w')
                             #altered HIBP URL
-            url = 'https://haveibeenpwned.com/api/v2/breaches?domain=%s' % l
-
+            url = 'https://haveibeenpwned.com/api/v2/breaches?domain={}'.format(l)
 
-            if args.verbose is True:print '[+] Searching haveibeenpwned.com for %s' % (l.split('.')[0])
+            if args.verbose is True:
+                print('[+] Searching haveibeenpwned.com for {}'.format((l.split('.')[0])))
 
             #http://docs.python-guide.org/en/latest/scenarios/scrape/
             try:
-                page = requests.get(url, headers = userAgent, verify=False)
+                page = requests.get(url, headers=userAgent, verify=False)
                 #build html tree
                 #save HIBP data to file
                 json.dump((page.json()),scrapeFile)
+                #append to a result list
+                self.haveibeenpwned_json_result.append(json.dump(page.json()))
                 #add in error checking (placeholder) maybe more efficient than main try loop
                 try:
                     if page.status_code == 503:
                         page.raise_for_status()
                 except page.exceptions.HTTPError as e:
                     if e.page.status_code == 503:
-                        print 'Service unavailable'
+                        print ('Service unavailable')
                         continue
-
             except:
-                print '[-] Connection error or no result on ' + url +':'
-                print '[-] Status code %s' % page.status_code
+                print ('[-] Connection error or no result on {} :'.format(url))
+                print ('[-] Status code {}'.format(page.status_code))
                 continue
diff --git a/modules/pastebinscrape.py b/modules/pastebinscrape.py
@@ -6,22 +6,22 @@
 
 class Pastebinscrape():
 
-    #right now this just google dorks a supplied arg for site:pastebin.com
-    #need to implement scraping api http://pastebin.com/api_scraping_faq
-    #that would necessitate a more ongoing program however, not one-off usage of autosint
-    #scraping url is here http://pastebin.com/api_scraping.php
+    """right now this just google dorks a supplied arg for site:pastebin.com
+    need to implement scraping api http://pastebin.com/api_scraping_faq
+    that would necessitate a more ongoing program however, not one-off usage of autosint
+    scraping url is here http://pastebin.com/api_scraping.php"""
     def run(self, args, lookup, reportDir, apiKeyDir):
 
         #set a UA
         userAgent = {'User-agent': 'Mozilla/5.0'}
 
         #defaults and init
-        pasteScrapeUrl = []
-        pasteScrapeContent = []
-        pasteScrapeResult =[]
+        paste_scrape_url = []
+        paste_scrape_content = []
+        paste_scrape_results =[]
         dorks=args.dorks
-        scrapeURL = []
-        scrapeContent = []
+        scrape_url = []
+        scrape_content = []
 
         #iterate the lookup list
         for i, l in enumerate(lookup):
@@ -32,42 +32,43 @@ def run(self, args, lookup, reportDir, apiKeyDir):
                 pasteUrlFile=open(reportDir+l+'/'+l+'_pastebin_urls.txt','w')
 
                 #show user whiat is being searched
-                print('[+] Searching Pastebin for public pastes containing %s' % (l))
-                print('[i] May require a Pastebin Pro account for IP whitelisting')
+                print('[+] Searching Pastebin via Google for public pastes containing {}'.format(l))
+                #print('[i] May require a Pastebin Pro account for IP whitelisting')
 
 
                 #run google query code
                 try:
                     #iterate url results from search of dork arg and supplied lookup value against pastebin. return top 20 hits
-                    for url in search(str(d) +' '+ str(l) + ' site:pastebin.com', stop = 20):
-                        #delay 1s to be polite
-                        time.sleep(1)
+                    for url in search(str(d) +' '+ str(l) + ' site:pastebin.com', stop=20):
+                        #delay 2s to be polite
+                        time.sleep(2)
                         #append results together
-                        scrapeURL.append(url)
+                        scrape_url.append(url)
                         if args.verbose is True:
-                            print ('[+] Paste containing "%s" and "%s" found at: %s' % (d,l,url))
+                            print('[+] Paste containing "{}" and "{}" found at: {}'.format(d,l,url))
                 except Exception as e:
-                    print('[-] Error dorking pastebin URLs: %s, skipping...' % e)
-                    pasteScrapeResult.append('Error scraping Pastebin')
+                    print('[-] Error dorking pastebin URLs: {}, skipping...'.format(e))
+                    paste_scrape_results.append('Error scraping Pastebin')
                     continue
 
-                for u in scrapeURL:
+                #ok, urls matching the dork found. what's in the paste? im certain this could be VASTLY improved
+                for u in scrape_url:
                     #http://docs.python-guide.org/en/latest/scenarios/scrape/
                     try:
-                        page = requests.get(u, headers = userAgent)
+                        page = requests.get(u, headers=userAgent)
                         pasteUrlFile.writelines(u+'\n')
-                        pasteScrapeResult.append(u+'\n')
+                        paste_scrape_results.append(u+'\n')
                     except:
                         print ('[-] Error opening ' + u +':')
-                        pasteScrapeResult.append('Error opening %s' % u)
+                        paste_scrape_results.append('Error opening {}'.format(u))
                         continue
 
-
                     #build html tree
                     tree = html.fromstring(page.content)
 
                     #if verbose spit out url, search term and domain searched
-                    if args.verbose is True:print ('[+] Looking for instances of %s and %s in %s' % (d,l,u))
+                    if args.verbose is True:
+                        print ('[+] Looking for instances of {} and {} in {}'.format(d,l,u))
                     #grab raw paste data from the textarea
                     rawPasteData = tree.xpath('//textarea[@class="paste_code"]/text()')
 
@@ -80,5 +81,5 @@ def run(self, args, lookup, reportDir, apiKeyDir):
                             if d in line:
                                 #print str(line)
                                 scrapedFile.writelines(str(line.encode('utf8')))
-                #print pasteScrapeResult
-                return pasteScrapeResult
+                #print paste_scrape_results
+                return paste_scrape_results
diff --git a/modules/pyfoca.py b/modules/pyfoca.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 #https://github.com/altjx/ipwn
 
+import re
 import subprocess
 
 class Pyfoca():
@@ -11,6 +12,7 @@ def run(self, args, lookup, reportDir):
 
         #init lists
         pyfocaResult=[]
+        ansi_escape = re.compile(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]')
 
         #based on domain or ip, enumerate with index and value
         for i, l in enumerate(lookup):
@@ -20,20 +22,23 @@ def run(self, args, lookup, reportDir):
 
             #run pyfoca with -d domain. should automagically do metadata
             try:
-                print '[+] Running pyfoca -d %s' % l
+                print ('[+] Running pyfoca -d %s' % l)
                 pyfocaCmd = subprocess.Popen(['pyfoca', '-d', str(l)], stdout = subprocess.PIPE).communicate()[0].split('\r\n')
             except:
-                print '[-] Error running pyfoca. Make sure it is in your PATH and you are connected to the Internet'
+                print ('[-] Error running pyfoca. Make sure it is in your PATH and you are connected to the Internet')
                 pyfocaResult.append('Error running pyfoca')
                 pyfocaFile.writelines('Error running pyfoca')
                 continue
 
+
+            #pyfocaCmd = ansi_escape.sub('', pyfocaCmd, re.S)
+
             #append output
-            pyfocaFile.writelines(pyfocaCmd)
-            pyfocaResult.append(pyfocaCmd)
+            pyfocaFile.writelines(str(pyfocaCmd))
+            pyfocaResult.append(str(pyfocaCmd))
 
             #spew if verbose
             if args.verbose is True: 
-                for p in pyfocaResult:print '\n'.join(p)
+                for p in pyfocaResult:print ''.join(p)
 
             return pyfocaResult
diff --git a/modules/reportgen.py b/modules/reportgen.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+"""module to generate a docx report based on osint findings"""
 
 #https://python-docx.readthedocs.io/en/latest/user/text.html
 #https://python-docx.readthedocs.io/en/latest/user/quickstart.html
@@ -238,10 +239,11 @@ def run(self, args, reportDir, lookup, whoisResult, dnsResult, googleResult, sho
                 #content
                 paragraph = document.add_paragraph()
                 for sr in scrapeResult:
-                    runParagraph = paragraph.add_run(sr)
-                    font=runParagraph.font
-                    font.name = 'Arial'
-                    font.size = Pt(10)
+                    for line in sr:
+                        runParagraph = paragraph.add_run(line)
+                        font=runParagraph.font
+                        font.name = 'Arial'
+                        font.size = Pt(10)
 
                 document.add_page_break()