-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathgenerate-vhost-names-dict.sh
32 lines (32 loc) · 1.66 KB
/
generate-vhost-names-dict.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env bash
# Script to build a list of VHOST names based on sub domain names.
## References:
# See https://blog.majestic.com/development/alexa-top-1-million-sites-retired-heres-majestic-million/
# See https://github.com/danielmiessler/SecLists/issues/654
# See https://github.com/danielmiessler/SecLists/pull/671
# Add more top level domain in the expression below grab more results
# Below were selected to try to have French, English and Luxembourgish names
TLD_WANTED_EXPR="\.(fr|uk|lu)$"
echo "[+] Download Majestic CSV file..."
wget -q -O /tmp/majestic.csv https://downloads.majestic.com/majestic_million.csv
wc -l /tmp/majestic.csv
echo "[+] Extract wanted domains..."
cat /tmp/majestic.csv | cut -d',' -f3 | grep -E $TLD_WANTED_EXPR > /tmp/domains.txt
wc -l /tmp/domains.txt
echo "[+] Extract sub domains via Certificate Transparency logs (https://crt.sh)..."
while IFS= read -r line
do
printf "\rDomain: %-60s" "$line"
curl -sk "https://crt.sh/?q=$line&output=json" | jq -r -R "fromjson? | .[].name_value" | cut -d'.' -f1 1>> /tmp/subdomains.txt
done < /tmp/domains.txt
echo ""
echo "[+] Filter duplicates and apply cleanup..."
cat /tmp/subdomains.txt | sort -u > /tmp/subdomains.tmp
mv /tmp/subdomains.tmp /tmp/subdomains.txt
grep -v "@" /tmp/subdomains.txt > /tmp/subdomains2.txt
grep -v " " /tmp/subdomains2.txt > /tmp/subdomains3.txt
grep -v "*" /tmp/subdomains3.txt > /tmp/subdomains4.txt
cat /tmp/subdomains4.txt | cut -d'-' -f1 | sort -u > /tmp/subdomains5.txt
grep -v -E "[0-9]+" /tmp/subdomains5.txt > /tmp/subdomains.txt
rm /tmp/subdomains2.txt /tmp/subdomains3.txt /tmp/subdomains4.txt /tmp/subdomains5.txt /tmp/majestic.csv /tmp/domains.txt
wc -l /tmp/subdomains.txt