forked from BenediktRiegel/quantum-no-free-lunch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbarren_plateau_hessian.py
153 lines (124 loc) · 5.06 KB
/
barren_plateau_hessian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import numpy as np
import torch
from copy import deepcopy
from qnns.qnn import get_qnn
from data import uniform_random_data, random_unitary_matrix
from classic_training import cost_func
from torch.autograd.functional import hessian
def get_param_indices(params):
indices = []
if isinstance(params, torch.Tensor):
if len(params.shape) == 1:
return [(i,) for i in range(params.shape[0])]
for i in range(len(params)):
indices += [(i,)+el for el in get_param_indices(params[i])]
return indices
def get_shift_derv(x_i, x_j, qnn, X, y_conj):
original_params = deepcopy(qnn.params)
r = 1
s = np.pi/(4*r)
if x_i == x_j:
f = cost_func(X, y_conj, qnn)
qnn.params[x_i[0]][x_i[1:]] = original_params[x_i[0]][x_i[1:]] + 2*s
f_p_2si = cost_func(X, y_conj, qnn)
qnn.params[x_i[0]][x_i[1:]] = original_params[x_i[0]][x_i[1:]] - 2*s
f_m_2si = cost_func(X, y_conj, qnn)
sum = f_p_2si - 2*f + f_m_2si
else:
# xi + si and xj + sj
qnn.params[x_i[0]][x_i[1:]] = original_params[x_i[0]][x_i[1:]] + s
qnn.params[x_j[0]][x_j[1:]] = original_params[x_j[0]][x_j[1:]] + s
f_p_si_p_sj = cost_func(X, y_conj, qnn)
# xi + si and xj - sj
qnn.params[x_j[0]][x_j[1:]] = original_params[x_j[0]][x_j[1:]] - s
f_p_si_m_sj = cost_func(X, y_conj, qnn)
# xi - si and xj - sj
qnn.params[x_i[0]][x_i[1:]] = original_params[x_i[0]][x_i[1:]] - s
f_m_si_m_sj = cost_func(X, y_conj, qnn)
# xi - si and xj + sj
qnn.params[x_j[0]][x_j[1:]] = original_params[x_j[0]][x_j[1:]] + s
f_m_si_p_sj = cost_func(X, y_conj, qnn)
# sum = second derivative
sum = f_p_si_p_sj - f_p_si_m_sj - f_m_si_p_sj + f_m_si_m_sj
qnn.params = original_params
return sum.item()
def calc_hessian(qnn, X, U):
if isinstance(qnn.params, list):
qnn.params = [el.detach() for el in qnn.params]
else:
qnn.params = qnn.params.detach()
indices = get_param_indices(qnn.params)
hessian_matrix = np.zeros((len(indices), len(indices)))
# qnn.params = List manchmal n Tensor und die haben mehrere Dimensionen
#define shift
y_conj = torch.matmul(U, X).conj()
for x_i in range(len(indices)):
for x_j in range(x_i):
hessian_matrix[x_i][x_j] = hessian_matrix[x_j][x_i]
for x_j in range(x_i, len(indices)):
derv = get_shift_derv(indices[x_i], indices[x_j], qnn, X, y_conj)
hessian_matrix[x_i][x_j] = derv
return hessian_matrix.T
def calc_hessian_list_params(qnn, X, U):
params = qnn.params
param_shape = [el.shape for el in params]
flat_params = []
num_params_list = []
for el in params:
num_params_list.append(len(get_param_indices(el)))
flat_params.extend(el.reshape((num_params_list[-1],)).tolist())
y_conj = torch.matmul(U, X).conj()
def func(params):
qnn.params = []
current_i = 0
for i in range(len(num_params_list)):
num_params = num_params_list[i]
qnn.params.append(
params[current_i:current_i + num_params].detach().clone().reshape(param_shape[i]))
current_i += num_params
return cost_func(X, y_conj, qnn)
return hessian(func, torch.tensor(flat_params, dtype=torch.float64))
def calc_hessian_no_list_params(qnn, X, U):
params = qnn.params
param_shape = params.shape
num_params = len(get_param_indices(params))
flat_params = params.reshape((num_params,))
y_conj = torch.matmul(U, X).conj()
def func(params):
qnn.params = params.reshape(param_shape)
return cost_func(X, y_conj, qnn)
return hessian(func, torch.tensor(flat_params))
def torch_calc_hessian(qnn, X, U):
if isinstance(qnn.params, list):
return calc_hessian_list_params(qnn, X, U)
else:
return calc_hessian_no_list_params(qnn, X, U)
def torch_gradients(qnn, X, y_conj):
loss = cost_func(X, y_conj, qnn)
qnn.params.retain_grad()
loss.backward()
gradients = qnn.params.grad
return gradients
if __name__ == '__main__':
x_qbits = 1
num_layers = 1
schmidt_rank = 1
num_points = 1
r_qbits = int(np.ceil(np.log2(schmidt_rank)))
qnn = get_qnn('CudaPennylane', range(x_qbits), 1, 'cpu')
qnn.params = qnn.params.detach()
X = torch.from_numpy(np.array(uniform_random_data(schmidt_rank, num_points, x_qbits, r_qbits)))
X = X.reshape((X.shape[0], int(X.shape[1] / 2**x_qbits), 2**x_qbits)).permute(0, 2, 1)
X[0, 0, 0] = 1
X[0, 1, 0] = 0
print(X)
U = torch.tensor(random_unitary_matrix(x_qbits), dtype=torch.complex128, device='cpu')
U = torch.eye(U.shape[0], dtype=torch.complex128)
print(U)
indices = get_param_indices(qnn.params)
for idx in indices:
qnn.params[idx[0]][idx[1:]] = 0
print(qnn.params)
print(qnn.get_tensor_V())
hessian = calc_hessian(qnn, X, U)
print(hessian)