forked from BenediktRiegel/quantum-no-free-lunch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbarren_plateau_solve.py
238 lines (195 loc) · 10.2 KB
/
barren_plateau_solve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
from scipy.optimize import fsolve
from classic_training import cost_func as loss
from barren_plateau_hessian import *
from itertools import product
from concurrent.futures import ProcessPoolExecutor
def cartesian(arrays, out=None, dtype=np.float64):
arrays = [np.asarray(x) for x in arrays]
n = np.prod([x.size for x in arrays])
if out is None:
out = np.zeros([n, len(arrays)], dtype=dtype)
# out = np.array([[1e-1[[1e-16]*len(arrays) for _ in range(n)])
# print(out)
m = int(n / arrays[0].size)
out[:,0] = np.repeat(arrays[0], m)
if arrays[1:]:
cartesian(arrays[1:], out=out[0:m, 1:])
for j in range(1, arrays[0].size):
out[j*m:(j+1)*m, 1:] = out[0:m, 1:]
return out
def get_offset_list(list):
list_of_offset = [el for el in product(list, repeat=4)]
# print('Full offset', list_of_offset)
def compute_gradient(p_idx, qnn, X, y_conj):
original_params = deepcopy(qnn.params)
r = 1
s = np.pi / (4 * r)
qnn.params[p_idx[0]][p_idx[1:]] = original_params[p_idx[0]][p_idx[1:]] + s
f_p_s = loss(X, y_conj, qnn)
qnn.params[p_idx[0]][p_idx[1:]] = original_params[p_idx[0]][p_idx[1:]] - s
f_m_s = loss(X, y_conj, qnn)
sum = f_p_s - f_m_s
return sum
def get_cost_function(qnn, X, U):
param_indices = get_param_indices(qnn.params)
y_conj = torch.matmul(U, X).conj()
def cost_func(params):
#try to get zero grad to find critical points
for i in range(len(param_indices)):
qnn.params[param_indices[i][0]][param_indices[i][1:]] = params[i]
return [compute_gradient(p_idx, qnn, X, y_conj) for p_idx in param_indices]
return cost_func
def neighbourhood_gradients(qnn, X, U, num_samples, sample_step_size, param_indices, sample_values, neighbourhood_params):
y_conj = torch.matmul(U, X).conj()
result = np.empty((len(neighbourhood_params), len(param_indices)))
org_params = deepcopy(qnn.params)
for n_p_idx in range(len(neighbourhood_params)):
offsets = neighbourhood_params[n_p_idx]
for i in range(len(param_indices)):
qnn.params[param_indices[i]] = org_params[param_indices[i]] + offsets[i]
for i in range(len(param_indices)):
result[n_p_idx, i] = compute_gradient(param_indices[i], qnn, X, y_conj)
qnn.params = org_params
#return result.reshape([len(sample_values)]*len(param_indices)+[len(param_indices)])
return result
def torch_neighbourhood_gradients(qnn, X, U, num_samples, sample_step_size, param_indices, sample_values, neighbourhood_params):
y_conj = torch.matmul(U, X).conj()
#print(len(neighbourhood_params))
result = np.empty((len(neighbourhood_params), len(param_indices)))
org_params = qnn.params
for n_p_idx in range(len(neighbourhood_params)):
offsets = neighbourhood_params[n_p_idx]
new_params = torch.empty(org_params.shape, dtype=torch.float64)
for i in range(len(param_indices)):
new_params[param_indices[i][0]][param_indices[i][1:]] = org_params[param_indices[i]] + offsets[i]
# qnn.params = torch.tensor(new_params, requires_grad=True, dtype=torch.float64)
qnn.params = new_params.requires_grad_(True)
grads = torch_gradients(qnn, X, y_conj)
for i in range(len(param_indices)):
result[n_p_idx][i] = np.array(grads[param_indices[i][0]][param_indices[i][1:]])
qnn.params = org_params
# [len(sample_values), ...,len(sample_values), len(param_indices)]
#return result.reshape([len(sample_values)]*len(param_indices)+[len(param_indices)]
return result
def neighbourhood_loss(qnn, X, U, num_samples, sample_step_size, param_indices, sample_values, neighbourhood_params):
y_conj = torch.matmul(U, X).conj()
# print("neighbourhood_params:", neighbourhood_params)
result = np.empty((len(neighbourhood_params),))
org_params = qnn.params
for n_p_idx in range(len(neighbourhood_params)):
offsets = neighbourhood_params[n_p_idx]
new_params = torch.empty(qnn.params.shape, dtype=torch.float64)
for i in range(len(param_indices)):
new_params[param_indices[i][0]][param_indices[i][1:]] = org_params[param_indices[i][0]][param_indices[i][1:]] + offsets[i]
qnn.params = new_params
result[n_p_idx] = loss(X, y_conj, qnn)
qnn.params = org_params
#return result.reshape([len(sample_values)]*len(param_indices))
return result.flatten()
def get_eigenvalues(x):
#x = np.matrix(x)
return np.linalg.eigvals(x)
def indefinite(x):
eigenvalues = get_eigenvalues(x)
if np.all(eigenvalues >=0):
if np.all(eigenvalues > 0):
return 'pos_def'
else:
return 'pos_semidef'
elif np.all(eigenvalues <= 0 ):
if np.all(eigenvalues < 0):
return 'neg_def'
else:
return 'neg_semidef'
else:
return 'indef'
def simple_plateau_search(process_id, x_qbits, num_layers, sample_step_size, grad_tol,
loss_tol, QNNs, schmidt_rank, num_points, num_samples, static_samples):
for element in product(x_qbits, num_layers, sample_step_size, grad_tol, loss_tol, QNNs):
x_qbits_prod = element[0]
num_layers_prod = element[1]
sample_step_size_prod = element[2]
grad_tol_prod = element[3]
loss_tol_prod = element[4]
qnn_name = element[5]
U = torch.tensor(random_unitary_matrix(x_qbits_prod), dtype=torch.complex128)
X = torch.tensor(
np.array(uniform_random_data(schmidt_rank, num_points, x_qbits_prod, int(np.ceil(np.log2(schmidt_rank))))),
dtype=torch.complex128)
X = X.reshape((X.shape[0], int(X.shape[1] / U.shape[0]), U.shape[0])).permute(0, 2, 1)
qnn = get_qnn(qnn_name, list(range(x_qbits_prod)), num_layers_prod, device='cpu')
if isinstance(qnn.params, list):
qnn.params = [el.detach() for el in qnn.params]
else:
qnn.params = qnn.params.detach()
param_indices = get_param_indices(qnn.params)
cost_func = get_cost_function(qnn, X, U)
init_p = np.array([qnn.params[p_idx[0]][p_idx[1:]].item() for p_idx in get_param_indices(qnn.params)])
out = fsolve(cost_func, init_p, xtol=1e-13, maxfev=1000)
for i in range(len(out)):
qnn.params[param_indices[i][0]][param_indices[i][1:]] = out[i]
print(f'{process_id}: test hessian for indefiniteness')
# hessian = calc_hessian(qnn, X, U)
hessian = torch_calc_hessian(qnn, X, U)
if (indefinite(hessian) == 'indef'):
y_conj = torch.matmul(U, X).conj()
p_loss = loss(X, y_conj, qnn).item()
param_indices = get_param_indices(qnn.params)
sample_values = np.linspace(-num_samples * sample_step_size_prod, num_samples * sample_step_size_prod,
num=2 * num_samples + 1, endpoint=True, dtype=np.float64)
# grid sampling
# neighbourhood_params = [el for el in product(sample_values, repeat=len(param_indices))]
# static sampling
neighbourhood_params = [
np.random.uniform(-num_samples * sample_step_size_prod, num_samples * sample_step_size_prod,
size=len(param_indices)) for i in range(static_samples)]
# print(neighbourhood_params)
n_gradients = torch_neighbourhood_gradients(qnn, X, U, num_samples, sample_step_size_prod, param_indices,
sample_values, neighbourhood_params)
n_loss = neighbourhood_loss(qnn, X, U, num_samples, sample_step_size_prod, param_indices, sample_values,
neighbourhood_params)
grad_mags = np.array([np.linalg.norm(grad) for grad in n_gradients])
if (grad_mags <= grad_tol_prod).all():
if (np.abs(n_loss - p_loss) <= loss_tol_prod).all():
print("found plateau")
with open(f'./experimental_results/plateau/plateau_results_{process_id}.txt', 'a') as f:
f.write(
f"plateau_type=strong, qnn_params={str(qnn.params.tolist()).replace(' ', '')}, unitary={str(U.tolist()).replace(' ', '')}, data_points={str(X.tolist()).replace(' ', '')}, QNN architecture={str(qnn_name)}, x_qbits={str(x_qbits_prod)}, num_layers={str(num_layers_prod)}, sample_step_size={str(sample_step_size_prod)}, grad_tol={str(grad_tol_prod)}, loss_tol={str(loss_tol_prod)}\n")
f.close()
else:
with open(f'./experimental_results/plateau/plateau_results_{process_id}.txt', 'a') as f:
f.write(
f"plateau_type=weak, qnn_params={str(qnn.params.tolist()).replace(' ', '')}, unitary={str(U.tolist()).replace(' ', '')}, data_points={str(X.tolist()).replace(' ', '')}, QNN architecture={str(qnn_name)}, x_qbits={str(x_qbits_prod)}, num_layers={str(num_layers_prod)}, sample_step_size={str(sample_step_size_prod)}, grad_tol={str(grad_tol_prod)}, loss_tol={str(loss_tol_prod)}\n")
f.close()
print(f"{process_id}: found weak plateau")
print(f"{process_id}: no plateau found at saddle point")
def process_execution(process_id):
# x_qbits = [1,2]
x_qbits = [4]
num_layers = [3,4,5]
schmidt_rank = 1
num_points = 2
num_samples = 1
static_samples = 1000
sample_step_size = [1e-12, 1e-10, 1e-08]
grad_tol = [1e-12, 1e-10, 1e-08]
loss_tol = [1e-12, 1e-10, 1e-08]
# QNNs = ['CudaPennylane']
QNNs = ['CudaCircuit6']
num_trials = 100
for current_trials in range(num_trials):
simple_plateau_search(process_id, x_qbits, num_layers, sample_step_size, grad_tol,
loss_tol, QNNs, schmidt_rank, num_points, num_samples, static_samples)
# for current_trials in range(num_trials):
# cheat_plateau_search()
def main():
num_processes = 8
ppe = ProcessPoolExecutor(max_workers=num_processes)
worker_args = list(range(num_processes))
results = ppe.map(process_execution, worker_args)
for res in results:
print(res)
if __name__ == '__main__':
# torch.manual_seed(56789)
# np.random.seed(56789)
main()