-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandbyrand.py
138 lines (110 loc) · 3.97 KB
/
randbyrand.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import requests
import zipfile
import os
import random
import logging
import numpy as np
import pandas as pd
URL_MILLION_RANDOM_DIGITS = "https://www.rand.org/content/dam/rand/pubs/monograph_reports/MR1418/MR1418.digits.txt.zip"
N = 1000000
DIGITS_FILENAME = "digits.txt"
def download_digits():
"""Downloads the digits in the original format to a digits.txt file"""
r = requests.get(url=URL_MILLION_RANDOM_DIGITS)
open("digits.zip", 'wb').write(r.content)
zipfile.ZipFile("digits.zip", mode="r").extract(DIGITS_FILENAME)
os.remove("digits.zip")
def get_digits(format="list"):
"""Returns the digits in the preferred format (list, numpy, pandas)"""
# If the source file is missing, download it
if not os.path.exists(DIGITS_FILENAME):
download_digits()
digits = ""
for line in open(DIGITS_FILENAME, "rt").readlines():
# Ignore the first group of digits for each line, it is just the row number,
# then remove any remaining spaces and newline charachters
digits += list(line.split(" ", maxsplit=1))[1] \
.replace(" ", "").replace("\n", "")
list_digits = [int(digit) for digit in digits]
if format == "list":
return list_digits
elif format == "numpy":
return np.array(list_digits)
elif format == "pandas":
return pd.DataFrame(list_digits)
def rand_digit(start_index = None):
"""
Yield random digits in order from the list.
If no start_index is specified, then it is picked (pseudo)-randomly.
"""
if start_index is None:
start_index = random.randint(0, N)
logging.debug("Starting index for random digits generator is {}".format(start_index))
i = start_index
digits = get_digits()
# Yield all digits, then stop
while i < start_index + N:
yield digits[i % N]
i += 1
return
def main():
print("Testing the module randbyrand.")
heading_digits = [1, 0, 0, 9, 7]
tailing_digits = [4, 1, 9, 8, 8]
# Test download
if os.path.exists(DIGITS_FILENAME):
os.remove(DIGITS_FILENAME)
try:
digits = get_digits()
except:
print("Downloading or reading digits failed.")
return
else:
print("Downloading and reading digits succeded.")
# Test consistency
n = len(digits)
if n != N:
print("There should be one million digits, but only {} have been loaded.".format(n))
return
else:
print("One million digits have been retrieved correctly.")
if digits[:5] != heading_digits or digits[999995:] != tailing_digits:
print("Heading and tailing digits do NOT match.")
return
else:
print("Heading and tailing digits match.")
# Test formats other than list
try:
digits = get_digits("numpy")
except:
print("Digits failed to load as Numpy array.")
return
else:
print("Digits successfully loaded as Numpy array.")
try:
digits = get_digits("pandas")
except:
print("Digits failed to load as Pandas DataFrame.")
return
else:
print("Digits successfully loaded as Pandas DataFrame.")
# Test random digit generator
x = rand_digit()
print("Random digit generator loaded successfully.")
n = len(list(rand_digit()))
if n != N:
print("The function rand_digit only generated {} digits instead of one million.".format(n))
return
else:
print("The function rand_digit correctly generated one million digits.")
digits = list(rand_digit(start_index=0))
if digits[:5] != heading_digits or digits[999995:] != tailing_digits:
print("Heading and tailing digits of the function rand_digit do NOT match.")
return
else:
print("Heading and tailing digits of the function rand_digit match.")
# Test end
print("All tests completed successfully.")
return
if __name__ == "__main__":
main()