-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuniform_generator.py
32 lines (25 loc) · 1.01 KB
/
uniform_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from nltk.corpus import brown
from numpy.random import choice,seed
# word list from brown corpus
wordlist = list(map(lambda x:x.lower(),brown.words()))
wordlist = list(dict.fromkeys(wordlist))
# character list
charlist = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ']
def get_text(grams, avg_length):
start = choice(grams)
current_sentence = start
current_length = len(start)
while current_length < avg_length:
next_gram = choice(grams)
current_sentence = current_sentence+''+next_gram
current_length = len(current_sentence)
return current_sentence
index = 0
with open('augment/aug_random_char_1000.txt', 'w', encoding='utf-8') as file:
file.write('Id\tEssaySet\tessay_score\tessay_score\tEssayText\n')
seed(10)
for j in range(1000):
sentence = get_text(charlist, 44)
file.write('10100'+'{0:03}'.format(index) + '\t' + '0' + '\t' + '0\t0\t' + sentence + '\n')
index += 1
file.close()