-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdomaingenerator.py
107 lines (81 loc) · 3.15 KB
/
domaingenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import whois
import json
import sys
import operator
import os.path
from numpy.random import choice
# settings
language = 'en'
tld = '.com'
syllable_count = 3
verbose = True
def main():
output_file = 'output/domains_%s_%i.txt' % (language, syllable_count)
with open('statistics/syllables_%s.json' % language) as file:
stats = json.load(file)
domains = re_open(output_file)
generated = {}
while True:
domain, score = generate_domain(stats)
if domain in generated:
continue
else:
generated[domain] = 1
if not is_available(domain):
log(' ' + domain + tld + "\t\t[taken] " + str(round(score, 5)))
continue
log("> " + domain + tld + "\t\t[available] " + str(round(score, 5)))
domains[domain] = score
# store available domains every fifth iteration
if len(domains) % 5 == 0:
sorted_domains = sorted(domains.items(), key=operator.itemgetter(1), reverse=True)
with open(output_file, 'w') as file:
file.write('\n'.join('%s' + tld + ' %f' % domain for domain in sorted_domains))
def generate_domain(stats):
""" Create a new random domain, based on the given language statistics. """
syllables = list(stats['syllables'].keys())
weights = list(stats['syllables'].values())
syllable = choice(syllables, p=normalize(weights))
domain = syllable
score = stats['syllables'][syllable] / sum(weights)
for i in range(1, syllable_count):
last_char = syllable[-1]
# start over if no combination can be made
if last_char not in stats['combinations']:
return generate_domain(stats)
next_chars = list(stats['combinations'][last_char].keys())
weights = list(stats['combinations'][last_char].values())
next_char = choice(next_chars, p=normalize(weights))
syllables = list(stats['by_char'][next_char].keys())
occurrences = []
for syllable in syllables:
occurrences.append(stats['syllables'][syllable])
syllable = choice(syllables, p=normalize(occurrences))
domain += syllable
score += (stats['syllables'][syllable] / sum(occurrences))
return (domain, score / float(syllable_count))
def normalize(values):
""" Let the sum of all values be one. """
return [float(i)/sum(values) for i in values]
def is_available(domain):
""" Return whether the given domain is available. """
try:
response = whois.whois(domain + tld)
return ('domain_name' not in response) or (response.domain_name is None)
except:
return False
def re_open(file):
""" Return the contents of the given file, or an empty dictionary. """
domains = {}
if os.path.isfile(file):
with open(file) as lines:
for line in lines:
tuple = line.strip().split(' ')
domains[tuple[0].split(tld)[0]] = float(tuple[1])
return domains
def log(message):
""" Print status messages to console if desired. """
if (verbose):
print(message)
if __name__ == "__main__":
main()