diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39cc9a1ae..8551da363 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.9] + python-version: [3.8, 3.9] os: ['ubuntu-latest', 'windows-latest'] steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index d9460881a..4677bafa5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Support non-default network interface - Remove unused dependencies (urllib3, cryptography, cffi, idna, chardet) - Load targets from a Nmap XML report +- Added --async option to enable asynchronous mode (use coroutines instead of threads) ## [0.4.3] - October 2nd, 2022 - Automatically detect the URI scheme (`http` or `https`) if no scheme is provided diff --git a/README.md b/README.md index c8d3b4a74..a21a3879e 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Table of Contents Installation & Usage ------------ -**Requirement: python 3.7 or higher** +**Requirement: python 3.8 or higher** Choose one of these installation options: diff --git a/dirsearch.py b/dirsearch.py index 60cef5161..05af81c1f 100755 --- a/dirsearch.py +++ b/dirsearch.py @@ -19,8 +19,7 @@ # Author: Mauro Soria import sys - -from pkg_resources import DistributionNotFound, VersionConflict +import warnings from lib.core.data import options from lib.core.exceptions import FailedDependenciesInstallation @@ -28,10 +27,14 @@ from lib.core.settings import OPTIONS_FILE from lib.parse.config import ConfigParser -if sys.version_info < (3, 7): - sys.stderr.write("Sorry, dirsearch requires Python 3.7 or higher\n") +if sys.version_info < (3, 8): + sys.stderr.write("Sorry, dirsearch requires Python 3.8 or higher\n") sys.exit(1) +# silence pkg_resources deprecation warnings +warnings.simplefilter("ignore", DeprecationWarning) +from pkg_resources import DistributionNotFound, VersionConflict # noqa: E402 + def main(): config = ConfigParser() diff --git a/lib/connection/requester.py b/lib/connection/requester.py index 366253dba..c1fb386ce 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -16,33 +16,38 @@ # # Author: Mauro Soria +import asyncio import http.client -import socket import random import re -import requests +import socket +from ssl import SSLError import threading import time +from typing import Generator, Optional +from urllib.parse import urlparse +import httpx +import requests from requests.auth import AuthBase, HTTPBasicAuth, HTTPDigestAuth from requests.packages import urllib3 from requests_ntlm import HttpNtlmAuth +from httpx_ntlm import HttpNtlmAuth as HttpxNtlmAuth from requests_toolbelt.adapters.socket_options import SocketOptionsAdapter -from urllib.parse import urlparse +from lib.connection.dns import cached_getaddrinfo +from lib.connection.response import AsyncResponse, Response from lib.core.data import options from lib.core.decorators import cached from lib.core.exceptions import RequestException from lib.core.logger import logger from lib.core.settings import ( + PROXY_SCHEMES, RATE_UPDATE_DELAY, READ_RESPONSE_ERROR_REGEX, SCRIPT_PATH, - PROXY_SCHEMES, ) from lib.core.structures import CaseInsensitiveDict -from lib.connection.dns import cached_getaddrinfo -from lib.connection.response import Response from lib.utils.common import safequote from lib.utils.file import FileUtils from lib.utils.mimetype import guess_mimetype @@ -53,28 +58,28 @@ socket.getaddrinfo = cached_getaddrinfo -class HTTPBearerAuth(AuthBase): - def __init__(self, token): - self.token = token - - def __call__(self, request): - request.headers["Authorization"] = f"Bearer {self.token}" - return request - - -class Requester: +class BaseRequester: def __init__(self): self._url = None self._proxy_cred = None self._rate = 0 self.headers = CaseInsensitiveDict(options["headers"]) self.agents = [] - self.session = requests.Session() - self.session.verify = False - self.session.cert = ( - options["cert_file"], - options["key_file"], - ) + self.session = None + + self._cert = None + if options["cert_file"] and options["key_file"]: + self._cert = (options["cert_file"], options["key_file"]) + + self._socket_options = [] + if options["network_interface"]: + self._socket_options.append( + ( + socket.SOL_SOCKET, + socket.SO_BINDTODEVICE, + options["network_interface"].encode("utf-8"), + ) + ) if options["random_agents"]: self._fetch_agents() @@ -83,11 +88,67 @@ def __init__(self): if options["data"] and "content-type" not in self.headers: self.set_header("content-type", guess_mimetype(options["data"])) - socket_options = [] - if options["network_interface"]: - socket_options.append( - (socket.SOL_SOCKET, socket.SO_BINDTODEVICE, options["network_interface"].encode("utf-8")) - ) + def _fetch_agents(self) -> None: + self.agents = FileUtils.get_lines( + FileUtils.build_path(SCRIPT_PATH, "db", "user-agents.txt") + ) + + def set_url(self, url: str) -> None: + self._url = url + + def set_header(self, key: str, value: str) -> None: + self.headers[key] = value.lstrip() + + def set_proxy(self, proxy: str) -> None: + if not proxy: + return + + if not proxy.startswith(PROXY_SCHEMES): + proxy = f"http://{proxy}" + + if self._proxy_cred and "@" not in proxy: + # socks5://localhost:9050 => socks5://[credential]@localhost:9050 + proxy = proxy.replace("://", f"://{self._proxy_cred}@", 1) + + self.session.proxies = {"https": proxy} + if not proxy.startswith("https://"): + self.session.proxies["http"] = proxy + + def set_proxy_auth(self, credential: str) -> None: + self._proxy_cred = credential + + def is_rate_exceeded(self): + return self._rate >= options["max_rate"] > 0 + + def decrease_rate(self): + self._rate -= 1 + + def increase_rate(self): + self._rate += 1 + threading.Timer(1, self.decrease_rate).start() + + @property + @cached(RATE_UPDATE_DELAY) + def rate(self): + return self._rate + + +class HTTPBearerAuth(AuthBase): + def __init__(self, token): + self.token = token + + def __call__(self, request): + request.headers["Authorization"] = f"Bearer {self.token}" + return request + + +class Requester(BaseRequester): + def __init__(self): + super().__init__() + + self.session = requests.Session() + self.session.verify = False + self.session.cert = self._cert for scheme in ("http://", "https://"): self.session.mount( @@ -95,21 +156,10 @@ def __init__(self): SocketOptionsAdapter( max_retries=0, pool_maxsize=options["thread_count"], - socket_options=socket_options, - ) + socket_options=self._socket_options, + ), ) - def _fetch_agents(self): - self.agents = FileUtils.get_lines( - FileUtils.build_path(SCRIPT_PATH, "db", "user-agents.txt") - ) - - def set_url(self, url): - self._url = url - - def set_header(self, key, value): - self.headers[key] = value.lstrip() - def set_auth(self, type, credential): if type in ("bearer", "jwt"): self.session.auth = HTTPBearerAuth(credential) @@ -127,24 +177,6 @@ def set_auth(self, type, credential): else: self.session.auth = HttpNtlmAuth(user, password) - def set_proxy(self, proxy): - if not proxy: - return - - if not proxy.startswith(PROXY_SCHEMES): - proxy = f"http://{proxy}" - - if self._proxy_cred and "@" not in proxy: - # socks5://localhost:9050 => socks5://[credential]@localhost:9050 - proxy = proxy.replace("://", f"://{self._proxy_cred}@", 1) - - self.session.proxies = {"https": proxy} - if not proxy.startswith("https://"): - self.session.proxies["http"] = proxy - - def set_proxy_auth(self, credential): - self._proxy_cred = credential - # :path: is expected not to start with "/" def request(self, path, proxy=None): # Pause if the request rate exceeded the maximum @@ -211,7 +243,7 @@ def request(self, path, proxy=None): if proxy: err_msg = f"Error with the proxy: {proxy}" else: - err_msg = f"Error with the system proxy" + err_msg = "Error with the system proxy" # Prevent from re-using it in the future if proxy in options["proxies"] and len(options["proxies"]) > 1: options["proxies"].remove(proxy) @@ -229,23 +261,173 @@ def request(self, path, proxy=None): ): err_msg = f"Request timeout: {url}" else: - err_msg = ( - f"There was a problem in the request to: {url}" - ) + err_msg = f"There was a problem in the request to: {url}" raise RequestException(err_msg) - def is_rate_exceeded(self): - return self._rate >= options["max_rate"] > 0 - def decrease_rate(self): - self._rate -= 1 +class HTTPXBearerAuth(httpx.Auth): + def __init__(self, token: str) -> None: + self.token = token - def increase_rate(self): - self._rate += 1 - threading.Timer(1, self.decrease_rate).start() + def auth_flow(self, request: httpx.Request) -> Generator: + request.headers["Authorization"] = f"Bearer {self.token}" + yield request - @property - @cached(RATE_UPDATE_DELAY) - def rate(self): - return self._rate + +class ProxyRoatingTransport(httpx.AsyncBaseTransport): + def __init__(self, proxies, **kwargs) -> None: + self._transports = [ + httpx.AsyncHTTPTransport(proxy=proxy, **kwargs) for proxy in proxies + ] + + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + transport = random.choice(self._transports) + return await transport.handle_async_request(request) + + +class AsyncRequester(BaseRequester): + def __init__(self): + super().__init__() + + tpargs = { + "verify": False, + "cert": self._cert, + "limits": httpx.Limits(max_connections=options["thread_count"]), + "socket_options": self._socket_options, + } + transport = ( + ProxyRoatingTransport( + list(map(self.parse_proxy, options["proxies"])), **tpargs + ) + if options["proxies"] + else httpx.AsyncHTTPTransport(**tpargs) + ) + + self.session = httpx.AsyncClient( + mounts={"all://": transport}, + timeout=httpx.Timeout(options["timeout"]), + ) + self.replay_session = None + + def parse_proxy(self, proxy: str) -> str: + if not proxy: + return None + + if not proxy.startswith(PROXY_SCHEMES): + proxy = f"http://{proxy}" + + if self._proxy_cred and "@" not in proxy: + # socks5://localhost:9050 => socks5://[credential]@localhost:9050 + proxy = proxy.replace("://", f"://{self._proxy_cred}@", 1) + + return proxy + + def set_auth(self, type: str, credential: str) -> None: + if type in ("bearer", "jwt"): + self.session.auth = HTTPXBearerAuth(credential) + else: + try: + user, password = credential.split(":", 1) + except ValueError: + user = credential + password = "" + + if type == "basic": + self.session.auth = httpx.BasicAuth(user, password) + elif type == "digest": + self.session.auth = httpx.DigestAuth(user, password) + else: + self.session.auth = HttpxNtlmAuth(user, password) + + async def replay_request(self, path: str, proxy: str): + if self.replay_session is None: + transport = httpx.AsyncHTTPTransport( + verify=False, + cert=self._cert, + limits=httpx.Limits(max_connections=options["thread_count"]), + proxy=self.parse_proxy(proxy), + socket_options=self._socket_options, + ) + self.replay_session = httpx.AsyncClient( + mounts={"all://": transport}, + timeout=httpx.Timeout(options["timeout"]), + ) + return await self.request(path, self.replay_session) + + # :path: is expected not to start with "/" + async def request( + self, path: str, session: Optional[httpx.AsyncClient] = None + ) -> AsyncResponse: + while self.is_rate_exceeded(): + await asyncio.sleep(0.1) + + self.increase_rate() + + err_msg = None + + # Safe quote all special characters to prevent them from being encoded + url = safequote(self._url + path if self._url else path) + parsed_url = urlparse(url) + + session = session or self.session + for _ in range(options["max_retries"] + 1): + try: + if self.agents: + self.set_header("user-agent", random.choice(self.agents)) + + # Use "target" extension to avoid the URL path from being normalized + request = session.build_request( + options["http_method"], + url, + headers=self.headers, + data=options["data"], + ) + if p := parsed_url.path: + request.extensions = {"target": p.encode()} + + xresponse = await session.send( + request, + stream=True, + follow_redirects=options["follow_redirects"], + ) + response = await AsyncResponse.create(xresponse) + await xresponse.aclose() + + log_msg = f'"{options["http_method"]} {response.url}" {response.status} - {response.length}B' + + if response.redirect: + log_msg += f" - LOCATION: {response.redirect}" + + logger.info(log_msg) + + return response + + except Exception as e: + logger.exception(e) + + if isinstance(e, httpx.ConnectError): + if str(e).startswith("[Errno -2]"): + err_msg = "Couldn't resolve DNS" + else: + err_msg = f"Cannot connect to: {urlparse(url).netloc}" + elif isinstance(e, SSLError): + err_msg = "Unexpected SSL error" + elif isinstance(e, httpx.TooManyRedirects): + err_msg = f"Too many redirects: {url}" + elif isinstance(e, httpx.ProxyError): + err_msg = "Cannot establish the proxy connection" + elif isinstance(e, httpx.InvalidURL): + err_msg = f"Invalid URL: {url}" + elif isinstance(e, httpx.TimeoutException): + err_msg = f"Request timeout: {url}" + elif isinstance(e, httpx.ReadError) or isinstance(e, httpx.DecodingError): # not sure + err_msg = f"Failed to read response body: {url}" + else: + err_msg = f"There was a problem in the request to: {url}" + + raise RequestException(err_msg) + + def increase_rate(self) -> None: + self._rate += 1 + asyncio.get_running_loop().call_later(1, self.decrease_rate) diff --git a/lib/connection/response.py b/lib/connection/response.py index 40823cb77..b9a56fbc9 100755 --- a/lib/connection/response.py +++ b/lib/connection/response.py @@ -16,44 +16,30 @@ # # Author: Mauro Soria +import httpx + from lib.core.settings import ( - DEFAULT_ENCODING, ITER_CHUNK_SIZE, - MAX_RESPONSE_SIZE, UNKNOWN, + DEFAULT_ENCODING, + ITER_CHUNK_SIZE, + MAX_RESPONSE_SIZE, + UNKNOWN, ) from lib.parse.url import clean_path, parse_path from lib.utils.common import is_binary -class Response: +class BaseResponse: def __init__(self, response): - self.url = response.url - self.full_path = parse_path(response.url) + self.url = str(response.url) + self.full_path = parse_path(self.url) self.path = clean_path(self.full_path) self.status = response.status_code self.headers = response.headers self.redirect = self.headers.get("location") or "" - self.history = [res.url for res in response.history] + self.history = [str(res.url) for res in response.history] self.content = "" self.body = b"" - for chunk in response.iter_content(chunk_size=ITER_CHUNK_SIZE): - self.body += chunk - - if len(self.body) >= MAX_RESPONSE_SIZE or ( - "content-length" in self.headers and is_binary(self.body) - ): - break - - if not is_binary(self.body): - try: - self.content = self.body.decode( - response.encoding or DEFAULT_ENCODING, errors="ignore" - ) - except LookupError: - self.content = self.body.decode( - DEFAULT_ENCODING, errors="ignore" - ) - @property def type(self): if "content-type" in self.headers: @@ -77,3 +63,47 @@ def __eq__(self, other): other.body, other.redirect, ) + + +class Response(BaseResponse): + def __init__(self, response): + super().__init__(response) + + for chunk in response.iter_content(chunk_size=ITER_CHUNK_SIZE): + self.body += chunk + + if len(self.body) >= MAX_RESPONSE_SIZE or ( + "content-length" in self.headers and is_binary(self.body) + ): + break + + if not is_binary(self.body): + try: + self.content = self.body.decode( + response.encoding or DEFAULT_ENCODING, errors="ignore" + ) + except LookupError: + self.content = self.body.decode(DEFAULT_ENCODING, errors="ignore") + + +class AsyncResponse(BaseResponse): + @classmethod + async def create(cls, response: httpx.Response) -> "AsyncResponse": + self = cls(response) + async for chunk in response.aiter_bytes(chunk_size=ITER_CHUNK_SIZE): + self.body += chunk + + if len(self.body) >= MAX_RESPONSE_SIZE or ( + "content-length" in self.headers and is_binary(self.body) + ): + break + + if not is_binary(self.body): + try: + self.content = self.body.decode( + response.encoding or DEFAULT_ENCODING, errors="ignore" + ) + except LookupError: + self.content = self.body.decode(DEFAULT_ENCODING, errors="ignore") + + return self diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 49207136f..8abae1871 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -16,8 +16,10 @@ # # Author: Mauro Soria +import asyncio import gc import os +import signal import psycopg import re import time @@ -26,7 +28,6 @@ from urllib.parse import urlparse from lib.connection.dns import cache_dns -from lib.connection.requester import Requester from lib.core.data import blacklists, options from lib.core.decorators import locked from lib.core.dictionary import Dictionary, get_blacklists @@ -38,7 +39,6 @@ QuitInterrupt, UnpicklingError, ) -from lib.core.fuzzer import Fuzzer from lib.core.logger import enable_logging, logger from lib.core.settings import ( BANNER, @@ -69,6 +69,19 @@ from lib.utils.schemedet import detect_scheme from lib.view.terminal import interface +if options["async_mode"]: + from lib.connection.requester import AsyncRequester as Requester + from lib.core.fuzzer import AsyncFuzzer as Fuzzer + + try: + import uvloop + asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) + except ImportError: + pass +else: + from lib.connection.requester import Requester + from lib.core.fuzzer import Fuzzer + class Controller: def __init__(self): @@ -140,6 +153,10 @@ def setup(self): self.errors = 0 self.consecutive_errors = 0 + if options["async_mode"]: + self.loop = asyncio.new_event_loop() + self.loop.add_signal_handler(signal.SIGINT, self.handle_pause) + if options["auth"]: self.requester.set_auth(options["auth_type"], options["auth"]) @@ -162,7 +179,7 @@ def setup(self): ) exit(1) - if options["autosave_report"] and not options["output"] : + if options["autosave_report"] and not options["output"]: self.report_path = options["output_path"] or FileUtils.build_path( SCRIPT_PATH, "reports" ) @@ -272,8 +289,14 @@ def start(self): interface.warning(msg) self.fuzzer.set_base_path(current_directory) - self.fuzzer.start() - self.process() + if options["async_mode"]: + # use a future to get exceptions from handle_pause + # https://stackoverflow.com/a/64230941 + self.pause_future = self.loop.create_future() + self.loop.run_until_complete(self._start_coroutines()) + else: + self.fuzzer.start() + self.process() except KeyboardInterrupt: pass @@ -285,6 +308,26 @@ def start(self): self.jobs_processed += 1 self.old_session = False + async def _start_coroutines(self): + task = self.loop.create_task(self.fuzzer.start()) + + try: + await asyncio.wait_for( + asyncio.wait( + [self.pause_future, task], + return_when=asyncio.FIRST_COMPLETED, + ), + timeout=options["max_time"] if options["max_time"] > 0 else None, + ) + except asyncio.TimeoutError: + raise SkipTargetInterrupt("Runtime exceeded the maximum set by the user") + + if self.pause_future.done(): + task.cancel() + await self.pause_future # propagate the exception, if raised + + await task # propagate the exception, if raised + def set_target(self, url): # If no scheme specified, unset it first if "://" not in url: @@ -463,7 +506,10 @@ def match_callback(self, response): if options["replay_proxy"]: # Replay the request with new proxy - self.requester.request(response.full_path, proxy=options["replay_proxy"]) + if options["async_mode"]: + self.loop.create_task(self.requester.replay_request(response.full_path, proxy=options["replay_proxy"])) + else: + self.requester.request(response.full_path, proxy=options["replay_proxy"]) if self.report: self.results.append(response) @@ -521,6 +567,14 @@ def handle_pause(self): option = input() if option.lower() == "q": + if options["async_mode"]: + quitexc = QuitInterrupt("Canceled by the user") + if options["async_mode"]: + self.pause_future.set_exception(quitexc) + break + else: + raise quitexc + interface.in_line("[s]ave / [q]uit without saving: ") option = input() @@ -535,9 +589,19 @@ def handle_pause(self): ) self._export(session_file) - raise QuitInterrupt(f"Session saved to: {session_file}") + quitexc = QuitInterrupt(f"Session saved to: {session_file}") + if options["async_mode"]: + self.pause_future.set_exception(quitexc) + break + else: + raise quitexc elif option.lower() == "q": - raise QuitInterrupt("Canceled by the user") + quitexc = QuitInterrupt("Canceled by the user") + if options["async_mode"]: + self.pause_future.set_exception(quitexc) + break + else: + raise quitexc elif option.lower() == "c": self.fuzzer.play() @@ -548,7 +612,12 @@ def handle_pause(self): break elif option.lower() == "s" and len(options["urls"]) > 1: - raise SkipTargetInterrupt("Target skipped by the user") + skipexc = SkipTargetInterrupt("Target skipped by the user") + if options["async_mode"]: + self.pause_future.set_exception(skipexc) + break + else: + raise skipexc def is_timed_out(self): return time.time() - self.start_time > options["max_time"] > 0 diff --git a/lib/core/data.py b/lib/core/data.py index 46292f4d4..9eff02734 100755 --- a/lib/core/data.py +++ b/lib/core/data.py @@ -83,6 +83,7 @@ "ip": None, "exit_on_error": False, "crawl": False, + "async_mode": False, "full_url": False, "redirects_history": False, "color": True, diff --git a/lib/core/fuzzer.py b/lib/core/fuzzer.py index 5e78ccd37..29a1519fe 100755 --- a/lib/core/fuzzer.py +++ b/lib/core/fuzzer.py @@ -16,14 +16,19 @@ # # Author: Mauro Soria +import asyncio import re import threading import time +from typing import Callable, Generator, Tuple +from lib.connection.requester import BaseRequester +from lib.connection.response import BaseResponse from lib.core.data import blacklists, options +from lib.core.dictionary import Dictionary from lib.core.exceptions import RequestException from lib.core.logger import logger -from lib.core.scanner import Scanner +from lib.core.scanner import AsyncScanner, BaseScanner, Scanner from lib.core.settings import ( DEFAULT_TEST_PREFIXES, DEFAULT_TEST_SUFFIXES, @@ -34,33 +39,126 @@ from lib.utils.crawl import Crawler -class Fuzzer: - def __init__(self, requester, dictionary, **kwargs): - self._threads = [] +class BaseFuzzer: + def __init__( + self, + requester: BaseRequester, + dictionary: Dictionary, + *, + match_callbacks: Tuple[Callable] = (), + not_found_callbacks: Tuple[Callable] = (), + error_callbacks: Tuple[Callable] = (), + ) -> None: self._scanned = set() self._requester = requester self._dictionary = dictionary - self._play_event = threading.Event() - self._quit_event = threading.Event() - self._pause_semaphore = threading.Semaphore(0) self._base_path = None self.exc = None - self.match_callbacks = kwargs.get("match_callbacks", []) - self.not_found_callbacks = kwargs.get("not_found_callbacks", []) - self.error_callbacks = kwargs.get("error_callbacks", []) + self.match_callbacks = match_callbacks + self.not_found_callbacks = not_found_callbacks + self.error_callbacks = error_callbacks - def setup_scanners(self): self.scanners = { "default": {}, "prefixes": {}, "suffixes": {}, } + def set_base_path(self, path: str) -> None: + self._base_path = path + + def get_scanners_for(self, path: str) -> Generator[BaseScanner, None, None]: + # Clean the path, so can check for extensions/suffixes + path = clean_path(path) + + for prefix in self.scanners["prefixes"]: + if path.startswith(prefix): + yield self.scanners["prefixes"][prefix] + + for suffix in self.scanners["suffixes"]: + if path.endswith(suffix): + yield self.scanners["suffixes"][suffix] + + for scanner in self.scanners["default"].values(): + yield scanner + + @staticmethod + def is_excluded(resp: BaseResponse) -> bool: + """Validate the response by different filters""" + + if resp.status in options["exclude_status_codes"]: + return True + + if ( + options["include_status_codes"] + and resp.status not in options["include_status_codes"] + ): + return True + + if resp.status in blacklists and any( + resp.path.endswith(lstrip_once(suffix, "/")) + for suffix in blacklists.get(resp.status) + ): + return True + + if human_size(resp.length).rstrip() in options["exclude_sizes"]: + return True + + if resp.length < options["minimum_response_size"]: + return True + + if resp.length > options["maximum_response_size"] > 0: + return True + + if any(text in resp.content for text in options["exclude_texts"]): + return True + + if options["exclude_regex"] and re.search( + options["exclude_regex"], resp.content + ): + return True + + if options["exclude_redirect"] and ( + options["exclude_redirect"] in resp.redirect + or re.search(options["exclude_redirect"], resp.redirect) + ): + return True + + return False + + +class Fuzzer(BaseFuzzer): + def __init__( + self, + requester: BaseRequester, + dictionary: Dictionary, + *, + match_callbacks: Tuple[Callable] = (), + not_found_callbacks: Tuple[Callable] = (), + error_callbacks: Tuple[Callable] = (), + ) -> None: + super().__init__( + requester, + dictionary, + match_callbacks=match_callbacks, + not_found_callbacks=not_found_callbacks, + error_callbacks=error_callbacks, + ) + self._threads = [] + self._play_event = threading.Event() + self._quit_event = threading.Event() + self._pause_semaphore = threading.Semaphore(0) + + def setup_scanners(self): # Default scanners (wildcard testers) - self.scanners["default"].update({ - "index": Scanner(self._requester, path=self._base_path), - "random": Scanner(self._requester, path=self._base_path + WILDCARD_TEST_POINT_MARKER), - }) + self.scanners["default"].update( + { + "index": Scanner(self._requester, path=self._base_path), + "random": Scanner( + self._requester, path=self._base_path + WILDCARD_TEST_POINT_MARKER + ), + } + ) if options["exclude_response"]: self.scanners["default"]["custom"] = Scanner( @@ -69,14 +167,16 @@ def setup_scanners(self): for prefix in options["prefixes"] + DEFAULT_TEST_PREFIXES: self.scanners["prefixes"][prefix] = Scanner( - self._requester, tested=self.scanners, + self._requester, + tested=self.scanners, path=f"{self._base_path}{prefix}{WILDCARD_TEST_POINT_MARKER}", context=f"/{self._base_path}{prefix}***", ) for suffix in options["suffixes"] + DEFAULT_TEST_SUFFIXES: self.scanners["suffixes"][suffix] = Scanner( - self._requester, tested=self.scanners, + self._requester, + tested=self.scanners, path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}{suffix}", context=f"/{self._base_path}***{suffix}", ) @@ -84,7 +184,8 @@ def setup_scanners(self): for extension in options["extensions"]: if "." + extension not in self.scanners["suffixes"]: self.scanners["suffixes"]["." + extension] = Scanner( - self._requester, tested=self.scanners, + self._requester, + tested=self.scanners, path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}.{extension}", context=f"/{self._base_path}***.{extension}", ) @@ -98,21 +199,6 @@ def setup_threads(self): new_thread.daemon = True self._threads.append(new_thread) - def get_scanners_for(self, path): - # Clean the path, so can check for extensions/suffixes - path = clean_path(path) - - for prefix in self.scanners["prefixes"]: - if path.startswith(prefix): - yield self.scanners["prefixes"][prefix] - - for suffix in self.scanners["suffixes"]: - if path.endswith(suffix): - yield self.scanners["suffixes"][suffix] - - for scanner in self.scanners["default"].values(): - yield scanner - def start(self): self.setup_scanners() self.setup_threads() @@ -176,59 +262,11 @@ def scan(self, path, scanners): logger.info(f'THREAD-{threading.get_ident()}: crawling "/{path}"') for path_ in Crawler.crawl(response): if self._dictionary.is_valid(path_): - logger.info(f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}') + logger.info( + f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}' + ) self.scan(path_, self.get_scanners_for(path_)) - def is_excluded(self, resp): - """Validate the response by different filters""" - - if resp.status in options["exclude_status_codes"]: - return True - - if ( - options["include_status_codes"] - and resp.status not in options["include_status_codes"] - ): - return True - - if ( - resp.status in blacklists - and any( - resp.path.endswith(lstrip_once(suffix, "/")) - for suffix in blacklists.get(resp.status) - ) - ): - return True - - if human_size(resp.length).rstrip() in options["exclude_sizes"]: - return True - - if resp.length < options["minimum_response_size"]: - return True - - if resp.length > options["maximum_response_size"] > 0: - return True - - if any(text in resp.content for text in options["exclude_texts"]): - return True - - if options["exclude_regex"] and re.search(options["exclude_regex"], resp.content): - return True - - if ( - options["exclude_redirect"] - and ( - options["exclude_redirect"] in resp.redirect - or re.search(options["exclude_redirect"], resp.redirect) - ) - ): - return True - - return False - - def set_base_path(self, path): - self._base_path = path - def thread_proc(self): logger.info(f'THREAD-{threading.get_ident()} started"') @@ -258,3 +296,150 @@ def thread_proc(self): if self._quit_event.is_set(): break + + +class AsyncFuzzer(BaseFuzzer): + def __init__( + self, + requester: BaseRequester, + dictionary: Dictionary, + *, + match_callbacks: Tuple[Callable] = (), + not_found_callbacks: Tuple[Callable] = (), + error_callbacks: Tuple[Callable] = (), + ) -> None: + super().__init__( + requester, + dictionary, + match_callbacks=match_callbacks, + not_found_callbacks=not_found_callbacks, + error_callbacks=error_callbacks, + ) + self._play_event = asyncio.Event() + self._background_tasks = set() + + async def setup_scanners(self) -> None: + # Default scanners (wildcard testers) + self.scanners["default"].update( + { + "index": await AsyncScanner.create( + self._requester, path=self._base_path + ), + "random": await AsyncScanner.create( + self._requester, path=self._base_path + WILDCARD_TEST_POINT_MARKER + ), + } + ) + + if options["exclude_response"]: + self.scanners["default"]["custom"] = await AsyncScanner.create( + self._requester, tested=self.scanners, path=options["exclude_response"] + ) + + for prefix in options["prefixes"] + DEFAULT_TEST_PREFIXES: + self.scanners["prefixes"][prefix] = await AsyncScanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{prefix}{WILDCARD_TEST_POINT_MARKER}", + context=f"/{self._base_path}{prefix}***", + ) + + for suffix in options["suffixes"] + DEFAULT_TEST_SUFFIXES: + self.scanners["suffixes"][suffix] = await AsyncScanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}{suffix}", + context=f"/{self._base_path}***{suffix}", + ) + + for extension in options["extensions"]: + if "." + extension not in self.scanners["suffixes"]: + self.scanners["suffixes"]["." + extension] = await AsyncScanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}.{extension}", + context=f"/{self._base_path}***.{extension}", + ) + + async def start(self) -> None: + # In Python 3.9, initialize the Semaphore within the coroutine + # to avoid binding to a different event loop. + self.sem = asyncio.Semaphore(options["thread_count"]) + await self.setup_scanners() + self.play() + + for _ in range(len(self._dictionary)): + task = asyncio.create_task(self.task_proc()) + self._background_tasks.add(task) + task.add_done_callback(self._background_tasks.discard) + + await asyncio.gather(*self._background_tasks) + + def is_finished(self) -> bool: + if self.exc: + raise self.exc + + return len(self._background_tasks) == 0 + + def play(self) -> None: + self._play_event.set() + + def pause(self) -> None: + self._play_event.clear() + + def quit(self) -> None: + for task in self._background_tasks: + task.cancel() + + async def scan(self, path: str, scanners: Generator) -> None: + # Avoid scanned paths from being re-scanned + if path in self._scanned: + return + else: + self._scanned.add(path) + + response = await self._requester.request(path) + + if self.is_excluded(response): + for callback in self.not_found_callbacks: + callback(response) + return + + for tester in scanners: + # Check if the response is unique, not wildcard + if not tester.check(path, response): + for callback in self.not_found_callbacks: + callback(response) + return + + try: + for callback in self.match_callbacks: + callback(response) + except Exception as e: + self.exc = e + + if options["crawl"]: + task = asyncio.current_task() + logger.info(f'{task.get_name()}: crawling "/{path}"') + for path_ in Crawler.crawl(response): + if self._dictionary.is_valid(path_): + logger.info( + f'{task.get_name()}: found new path "/{path_}" in /{path}' + ) + await self.scan(path_, self.get_scanners_for(path_)) + + async def task_proc(self) -> None: + async with self.sem: + await self._play_event.wait() + + try: + path = next(self._dictionary) + scanners = self.get_scanners_for(path) + await self.scan(self._base_path + path, scanners) + except StopIteration: + pass + except RequestException as e: + for callback in self.error_callbacks: + callback(e) + finally: + await asyncio.sleep(options["delay"]) diff --git a/lib/core/options.py b/lib/core/options.py index 822b36228..04a573baa 100755 --- a/lib/core/options.py +++ b/lib/core/options.py @@ -35,6 +35,9 @@ def parse_options(): opt = parse_config(parse_arguments()) if opt.session_file: + if opt.async_mode: + print("Cannot resume a session in asynchronous mode") + exit(1) return vars(opt) opt.http_method = opt.http_method.upper() @@ -350,6 +353,7 @@ def parse_config(opt): # Advanced opt.crawl = opt.crawl or config.safe_getboolean("advanced", "crawl") + opt.async_mode = opt.async_mode or config.safe_getboolean("advanced", "async") # View opt.full_url = opt.full_url or config.safe_getboolean("view", "full-url") diff --git a/lib/core/scanner.py b/lib/core/scanner.py index 82ade5e9e..ba25c81da 100755 --- a/lib/core/scanner.py +++ b/lib/core/scanner.py @@ -16,11 +16,14 @@ # # Author: Mauro Soria +import asyncio import re import time - +from typing import Optional from urllib.parse import unquote +from lib.connection.requester import AsyncRequester, BaseRequester, Requester +from lib.connection.response import BaseResponse from lib.core.data import options from lib.core.logger import logger from lib.core.settings import ( @@ -29,80 +32,26 @@ WILDCARD_TEST_POINT_MARKER, ) from lib.parse.url import clean_path -from lib.utils.diff import generate_matching_regex, DynamicContentParser +from lib.utils.diff import DynamicContentParser, generate_matching_regex from lib.utils.random import rand_string -class Scanner: - def __init__(self, requester, **kwargs): - self.path = kwargs.get("path", "") - self.tested = kwargs.get("tested", []) - self.context = kwargs.get("context", "all cases") +class BaseScanner: + def __init__( + self, + requester: BaseRequester, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> None: + self.path = path + self.tested = tested + self.context = context self.requester = requester self.response = None self.wildcard_redirect_regex = None - self.setup() - - def setup(self): - """ - Generate wildcard response information containers, this will be - used to compare with other path responses - """ - - first_path = self.path.replace( - WILDCARD_TEST_POINT_MARKER, - rand_string(TEST_PATH_LENGTH), - ) - first_response = self.requester.request(first_path) - self.response = first_response - time.sleep(options["delay"]) - - duplicate = self.get_duplicate(first_response) - # Another test was performed before and has the same response as this - if duplicate: - self.content_parser = duplicate.content_parser - self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex - logger.debug(f'Skipped the second test for "{self.context}"') - return - - second_path = self.path.replace( - WILDCARD_TEST_POINT_MARKER, - rand_string(TEST_PATH_LENGTH, omit=first_path), - ) - second_response = self.requester.request(second_path) - time.sleep(options["delay"]) - - if first_response.redirect and second_response.redirect: - self.wildcard_redirect_regex = self.generate_redirect_regex( - clean_path(first_response.redirect), - first_path, - clean_path(second_response.redirect), - second_path, - ) - logger.debug(f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}') - - self.content_parser = DynamicContentParser( - first_response.content, second_response.content - ) - - def get_duplicate(self, response): - for category in self.tested: - for tester in self.tested[category].values(): - if response == tester.response: - return tester - - return None - def is_wildcard(self, response): - """Check if response is similar to wildcard response""" - - # Compare 2 binary responses (Response.content is empty if the body is binary) - if not self.response.content and not response.content: - return self.response.body == response.body - - return self.content_parser.compare_to(response.content) - - def check(self, path, response): + def check(self, path: str, response: BaseResponse) -> bool: """ Perform analyzing to see if the response is wildcard or not """ @@ -125,7 +74,9 @@ def check(self, path, response): # If redirection doesn't match the rule, mark as found if not is_wildcard_redirect: - logger.debug(f'"{redirect}" doesn\'t match the regular expression "{regex_to_compare}", passing') + logger.debug( + f'"{redirect}" doesn\'t match the regular expression "{regex_to_compare}", passing' + ) return True if self.is_wildcard(response): @@ -133,6 +84,23 @@ def check(self, path, response): return True + def get_duplicate(self, response: BaseResponse) -> Optional["BaseScanner"]: + for category in self.tested: + for tester in self.tested[category].values(): + if response == tester.response: + return tester + + return None + + def is_wildcard(self, response): + """Check if response is similar to wildcard response""" + + # Compare 2 binary responses (Response.content is empty if the body is binary) + if not self.response.content and not response.content: + return self.response.body == response.body + + return self.content_parser.compare_to(response.content) + @staticmethod def generate_redirect_regex(first_loc, first_path, second_loc, second_path): """ @@ -154,3 +122,127 @@ def generate_redirect_regex(first_loc, first_path, second_loc, second_path): second_loc = unquote(second_loc).replace(second_path, REFLECTED_PATH_MARKER) return generate_matching_regex(first_loc, second_loc) + + +class Scanner(BaseScanner): + def __init__( + self, + requester: Requester, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> None: + super().__init__(requester, path, tested, context) + self.setup() + + def setup(self): + """ + Generate wildcard response information containers, this will be + used to compare with other path responses + """ + + first_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH), + ) + first_response = self.requester.request(first_path) + self.response = first_response + time.sleep(options["delay"]) + + duplicate = self.get_duplicate(first_response) + # Another test was performed before and has the same response as this + if duplicate: + self.content_parser = duplicate.content_parser + self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex + logger.debug(f'Skipped the second test for "{self.context}"') + return + + second_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH, omit=first_path), + ) + second_response = self.requester.request(second_path) + time.sleep(options["delay"]) + + if first_response.redirect and second_response.redirect: + self.wildcard_redirect_regex = self.generate_redirect_regex( + clean_path(first_response.redirect), + first_path, + clean_path(second_response.redirect), + second_path, + ) + logger.debug( + f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}' + ) + + self.content_parser = DynamicContentParser( + first_response.content, second_response.content + ) + + +class AsyncScanner(BaseScanner): + def __init__( + self, + requester: AsyncRequester, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> None: + super().__init__(requester, path, tested, context) + + @classmethod + async def create( + cls, + requester: AsyncRequester, + *, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> "Scanner": + self = cls(requester, path=path, tested=tested, context=context) + await self.setup() + return self + + async def setup(self) -> None: + """ + Generate wildcard response information containers, this will be + used to compare with other path responses + """ + + first_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH), + ) + first_response = await self.requester.request(first_path) + self.response = first_response + await asyncio.sleep(options["delay"]) + + duplicate = self.get_duplicate(first_response) + # Another test was performed before and has the same response as this + if duplicate: + self.content_parser = duplicate.content_parser + self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex + logger.debug(f'Skipped the second test for "{self.context}"') + return + + second_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH, omit=first_path), + ) + second_response = await self.requester.request(second_path) + await asyncio.sleep(options["delay"]) + + if first_response.redirect and second_response.redirect: + self.wildcard_redirect_regex = self.generate_redirect_regex( + clean_path(first_response.redirect), + first_path, + clean_path(second_response.redirect), + second_path, + ) + logger.debug( + f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}' + ) + + self.content_parser = DynamicContentParser( + first_response.content, second_response.content + ) diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index 5a2d15814..fb322307b 100755 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -459,7 +459,6 @@ def parse_arguments(): connection.add_option("--ip", action="store", dest="ip", help="Server IP address") connection.add_option("--interface", action="store", dest="network_interface", help="Network interface to use") - # Advanced Settings advanced = OptionGroup(parser, "Advanced Settings") advanced.add_option( @@ -468,6 +467,12 @@ def parse_arguments(): dest="crawl", help="Crawl for new paths in responses" ) + advanced.add_option( + "--async", + action="store_true", + dest="async_mode", + help="Enable asynchronous mode", + ) # View Settings view = OptionGroup(parser, "View Settings") diff --git a/lib/parse/nmap.py b/lib/parse/nmap.py index 7ff9809ec..b1223fc27 100644 --- a/lib/parse/nmap.py +++ b/lib/parse/nmap.py @@ -1,5 +1,6 @@ import xml.etree.ElementTree as ET + def parse_nmap(file): root = ET.parse(file).getroot() targets = [] @@ -12,7 +13,7 @@ def parse_nmap(file): f"{hostname}:{port.get('portid')}" for port in host.find("ports").iter("port") if ( - port.get("protocol") == "tcp" # UDP is not used in HTTP because it is not a "reliable transport" + port.get("protocol") == "tcp" # UDP is not used in HTTP because it is not a "reliable transport" and port.find("state").get("state") == "open" and port.find("service").get("name") in ["http", "unknown"] ) diff --git a/requirements.txt b/requirements.txt index 0d30a9117..be90c2aef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,5 @@ mysql-connector-python>=8.0.20 psycopg[binary]>=3.0 requests-toolbelt>=1.0.0 setuptools>=66.0.0 +httpx>=0.27.2 +httpx-ntlm>=1.4.0 diff --git a/setup.py b/setup.py index b0941d64e..bc7cfc127 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ entry_points={"console_scripts": ["dirsearch=dirsearch.dirsearch:main"]}, package_data={"dirsearch": ["*", "db/*"]}, include_package_data=True, - python_requires=">=3.7", + python_requires=">=3.8", install_requires=get_dependencies(), classifiers=[ "Programming Language :: Python", @@ -39,7 +39,7 @@ "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", "Operating System :: OS Independent", "Topic :: Security", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", ], keywords=["infosec", "bug bounty", "pentesting", "security"], )