diff --git a/trafilatura/cli_utils.py b/trafilatura/cli_utils.py
index bb4601d0..d6208d44 100644
--- a/trafilatura/cli_utils.py
+++ b/trafilatura/cli_utils.py
@@ -231,12 +231,16 @@ def download_queue_processing(url_store, args, counter, config):
 
 def cli_discovery(args):
     "Group CLI functions dedicated to URL discovery."
-    url_store = load_input_dict(args)
     func = find_feed_urls if args.feed else sitemap_search
+
+    url_store = load_input_dict(args)
     input_urls = url_store.dump_urls()
     if args.list:
         url_store.reset()
-    ext = use_config(filename=args.config_file).getboolean('DEFAULT', 'EXTERNAL_URLS')
+
+    config = use_config(filename=args.config_file)
+    ext = config.getboolean('DEFAULT', 'EXTERNAL_URLS')
+    # sleep_time = config.getfloat('DEFAULT', 'SLEEP_TIME')
 
     # link discovery and storage
     with ThreadPoolExecutor(max_workers=args.parallel) as executor:
diff --git a/trafilatura/sitemaps.py b/trafilatura/sitemaps.py
index ca0e5b86..c21bdddb 100644
--- a/trafilatura/sitemaps.py
+++ b/trafilatura/sitemaps.py
@@ -8,12 +8,15 @@
 
 import logging
 import re
+
 from itertools import islice
+from time import sleep
 from typing import List, Set, Optional
 
 from courlan import (
     clean_url,
     extract_domain,
+    filter_urls,
     fix_relative_urls,
     get_hostinfo,
     lang_filter,
@@ -183,7 +186,7 @@ def process(self) -> None:
 
 
 def sitemap_search(
-    url: str, target_lang: Optional[str] = None, external: bool = False
+    url: str, target_lang: Optional[str] = None, external: bool = False, sleep_time: int = 2
 ) -> List[str]:
     """Look for sitemaps for the given URL and gather links.
 
@@ -194,6 +197,7 @@ def sitemap_search(
                      (two-letter string, ISO 639-1 format).
         external: Similar hosts only or external URLs
                   (boolean, defaults to False).
+        sleep_time: Wait between requests to the same website.
 
     Returns:
         The extracted links as a list (sorted list of unique links).
@@ -208,10 +212,15 @@ def sitemap_search(
         LOGGER.warning("base URL unreachable, dropping sitemap: %s", url)
         return []
 
+    urlfilter = None
+
     if url.endswith((".gz", "sitemap", ".xml")):
         sitemapurls = [url]
     else:
         sitemapurls = []
+        # set url filter to target subpages
+        if len(url) > len(baseurl) + 2:
+            urlfilter = url
 
     sitemap = SitemapObject(baseurl, domainname, sitemapurls, target_lang, external)
 
@@ -222,7 +231,7 @@ def sitemap_search(
         ]
 
     # iterate through nested sitemaps and results
-    while sitemap.sitemap_urls and len(sitemap.seen) < MAX_SITEMAPS_SEEN:
+    while sitemap.sitemap_urls:
         sitemap.current_url = sitemap.sitemap_urls.pop()
         sitemap.fetch()
         sitemap.process()
@@ -231,6 +240,14 @@ def sitemap_search(
             s for s in sitemap.sitemap_urls if s not in sitemap.seen
         ]
 
+        if len(sitemap.seen) < MAX_SITEMAPS_SEEN:
+            sleep(sleep_time)
+        else:
+            break
+
+    if urlfilter:
+        sitemap.urls = filter_urls(sitemap.urls, urlfilter)
+
     LOGGER.debug("%s sitemap links found for %s", len(sitemap.urls), domainname)
     return sitemap.urls