-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbackend-nbd.cpp
370 lines (303 loc) · 9.81 KB
/
backend-nbd.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
#include <cassert>
#include <cinttypes>
#include <cstring>
#include <fcntl.h>
#include <unistd.h>
#if defined(__MINGW32__)
#include <winsock2.h>
#include <ws2tcpip.h>
#else
#include <netdb.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#endif
#include "backend-nbd.h"
#include "log.h"
#include "utils.h"
#define NBD_CMD_READ 0
#define NBD_CMD_WRITE 1
#define NBD_CMD_FLUSH 3
#define NBD_CMD_TRIM 4
#define NBD_EPERM 1 // Operation not permitted.
#define NBD_EIO 5 // Input/output error.
#define NBD_ENOMEM 12 // Cannot allocate memory.
#define NBD_EINVAL 22 // Invalid argument.
#define NBD_ENOSPC 28 // No space left on device.
#define NBD_EOVERFLOW 75 // Value too large.
#define NBD_ENOTSUP 95 // Operation not supported.
#define NBD_ESHUTDOWN 108 // Server is in the
backend_nbd::backend_nbd(const std::string & host, const int port):
backend(myformat("%s:%d", host.c_str(), port)),
host(host), port(port),
fd(-1)
{
}
backend_nbd::~backend_nbd()
{
if (fd != -1)
close(fd);
}
bool backend_nbd::begin()
{
return connect(false);
}
bool backend_nbd::connect(const bool retry)
{
if (fd != -1)
return true;
do {
// LOOP until connected, logging message, exponential backoff?
addrinfo *res = nullptr;
addrinfo hints { };
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
char port_str[8] { 0 };
snprintf(port_str, sizeof port_str, "%d", port);
int rc = getaddrinfo(host.c_str(), port_str, &hints, &res);
if (rc != 0) {
DOLOG(logging::ll_error, "backend_nbd::connect", identifier, "Cannot resolve \"%s\"", host.c_str());
sleep(1);
continue;
}
for(addrinfo *p = res; p != NULL; p = p->ai_next) {
if ((fd = socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) {
DOLOG(logging::ll_error, "backend_nbd::connect", identifier, "Failed to create socket");
continue;
}
if (::connect(fd, p->ai_addr, p->ai_addrlen) == -1) {
DOLOG(logging::ll_error, "backend_nbd::connect", identifier, "Failed to connect");
close(fd);
fd = -1;
continue;
}
break;
}
freeaddrinfo(res);
struct __attribute__ ((packed)) {
uint8_t magic1[8];
uint8_t magic2[8];
uint64_t size;
uint32_t flags;
uint8_t padding[124];
} nbd_hello { };
if (fd != -1) {
if (READ(fd, reinterpret_cast<uint8_t *>(&nbd_hello), sizeof nbd_hello) != sizeof nbd_hello) {
DOLOG(logging::ll_error, "backend_nbd::connect", identifier, "NBD_HELLO receive failed");
close(fd);
fd = -1;
}
dev_size = my_NTOHLL(nbd_hello.size);
}
if (fd != -1 && memcmp(nbd_hello.magic1, "NBDMAGIC", 8) != 0) {
DOLOG(logging::ll_error, "backend_nbd::connect", identifier, "NBD_HELLO magic failed");
close(fd);
fd = -1;
}
if (fd != -1)
socket_set_nodelay(fd);
}
while(fd == -1 && retry);
DOLOG(logging::ll_debug, "backend_nbd::connect", identifier, "Connected to NBD server");
return fd != -1;
}
uint64_t backend_nbd::get_size_in_blocks() const
{
return dev_size / get_block_size();
}
uint64_t backend_nbd::get_block_size() const
{
return 4096;
}
bool backend_nbd::invoke_nbd(const uint32_t command, const uint64_t offset, const uint32_t n_bytes, uint8_t *const data)
{
auto start = get_micros();
do {
if (!connect(true)) {
DOLOG(logging::ll_debug, "backend_nbd::invoke_nbd", identifier, "(re-)connect");
sleep(1);
continue;
}
struct __attribute__ ((packed)) {
uint32_t magic;
uint32_t type;
uint64_t handle;
uint64_t offset;
uint32_t length;
} nbd_request { };
nbd_request.magic = ntohl(0x25609513);
nbd_request.type = htonl(command);
nbd_request.offset = my_HTONLL(offset);
nbd_request.length = htonl(n_bytes);
if (WRITE(fd, reinterpret_cast<const uint8_t *>(&nbd_request), sizeof nbd_request) != sizeof nbd_request) {
DOLOG(logging::ll_error, "backend_nbd::invoke_nbd", identifier, "backend_nbd::invoke_nbd: problem sending request");
close(fd);
fd = -1;
sleep(1);
continue;
}
if (command == NBD_CMD_WRITE) {
if (WRITE(fd, reinterpret_cast<const uint8_t *>(data), n_bytes) != ssize_t(n_bytes)) {
DOLOG(logging::ll_error, "backend_nbd::invoke_nbd", identifier, "backend_nbd::invoke_nbd: problem sending payload");
close(fd);
fd = -1;
sleep(1);
continue;
}
}
struct __attribute__ ((packed)) {
uint32_t magic;
uint32_t error;
uint64_t handle;
} nbd_reply;
if (READ(fd, reinterpret_cast<uint8_t *>(&nbd_reply), sizeof nbd_reply) != sizeof nbd_reply) {
DOLOG(logging::ll_error, "backend_nbd::invoke_nbd", identifier, "backend_nbd::invoke_nbd: problem receiving reply header");
close(fd);
fd = -1;
sleep(1);
continue;
}
if (ntohl(nbd_reply.magic) != 0x67446698) {
DOLOG(logging::ll_error, "backend_nbd::invoke_nbd", identifier, "backend_nbd::invoke_nbd: bad reply header %08x", nbd_reply.magic);
close(fd);
fd = -1;
sleep(1);
continue;
}
int error = ntohl(nbd_reply.error);
if (error) {
std::string error_str;
if (error == NBD_EPERM)
error_str = "NBD_EPERM";
else if (error == NBD_EIO)
error_str = "NBD_EIO";
else if (error == NBD_ENOMEM)
error_str = "NBD_ENOMEM";
else if (error == NBD_ENOSPC)
error_str = "NBD_EINVAL";
else if (error == NBD_EINVAL)
error_str = "NBD_ENOSPC";
else if (error == NBD_EOVERFLOW)
error_str = "NBD_EOVERFLOW";
else if (error == NBD_ENOTSUP)
error_str = "NBD_ENOTSUP";
else if (error == NBD_ESHUTDOWN)
error_str = "NBD_ESHUTDOWN";
else
error_str = myformat("%d", error);
DOLOG(logging::ll_error, "backend_nbd::invoke_nbd", identifier, "backend_nbd::invoke_nbd: NBD server indicated error: %s", error_str.c_str());
return false;
}
if (command == NBD_CMD_READ) {
if (READ(fd, data, n_bytes) != ssize_t(n_bytes)) {
DOLOG(logging::ll_error, "backend_nbd::invoke_nbd", identifier, "backend_nbd::invoke_nbd: problem receiving payload");
close(fd);
fd = -1;
sleep(1);
continue;
}
}
}
while(fd == -1);
auto end = get_micros();
bs.io_wait += end-start;
return fd != -1;
}
bool backend_nbd::sync()
{
bs.n_syncs++;
ts_last_acces = get_micros();
return invoke_nbd(NBD_CMD_FLUSH, 0, 0, nullptr);
}
bool backend_nbd::write(const uint64_t block_nr, const uint32_t n_blocks, const uint8_t *const data)
{
auto block_size = get_block_size();
off_t offset = block_nr * block_size;
size_t n_bytes = n_blocks * block_size;
DOLOG(logging::ll_debug, "backend_nbd::write", identifier, "block %" PRIu64 " (%lu), %d blocks, block size: %" PRIu64, block_nr, offset, n_blocks, block_size);
auto lock_list = lock_range(block_nr, n_blocks);
bool rc = invoke_nbd(NBD_CMD_WRITE, offset, n_bytes, const_cast<uint8_t *>(data));
unlock_range(lock_list);
ts_last_acces = get_micros();
bs.bytes_written += n_bytes;
bs.n_writes++;
return rc;
}
bool backend_nbd::trim(const uint64_t block_nr, const uint32_t n_blocks)
{
auto block_size = get_block_size();
off_t offset = block_nr * block_size;
size_t n_bytes = n_blocks * block_size;
DOLOG(logging::ll_debug, "backend_nbd::trim", identifier, "block %" PRIu64 " (%lu), %d blocks, block size: %" PRIu64, block_nr, offset, n_blocks, block_size);
auto lock_list = lock_range(block_nr, n_blocks);
bool rc = invoke_nbd(NBD_CMD_TRIM, offset, n_bytes, nullptr);
unlock_range(lock_list);
ts_last_acces = get_micros();
bs.n_trims += n_blocks;
return rc;
}
bool backend_nbd::read(const uint64_t block_nr, const uint32_t n_blocks, uint8_t *const data)
{
auto block_size = get_block_size();
off_t offset_in = block_nr * block_size;
off_t offset = offset_in;
size_t n_bytes = n_blocks * block_size;
DOLOG(logging::ll_debug, "backend_nbd::read", identifier, "block %" PRIu64 " (%lu), %d blocks, block size: %" PRIu64, block_nr, offset, n_blocks, block_size);
auto lock_list = lock_range(block_nr, n_blocks);
bool rc = invoke_nbd(NBD_CMD_READ, offset, n_bytes, data);
unlock_range(lock_list);
ts_last_acces = get_micros();
bs.bytes_read += n_bytes;
bs.n_reads++;
return rc;
}
backend::cmpwrite_result_t backend_nbd::cmpwrite(const uint64_t block_nr, const uint32_t n_blocks, const uint8_t *const data_write, const uint8_t *const data_compare)
{
auto lock_list = lock_range(block_nr, n_blocks);
auto block_size = get_block_size();
DOLOG(logging::ll_debug, "backend_nbd::cmpwrite", identifier, "block %" PRIu64 " (%lu), %d blocks (%zu), block size: %" PRIu64, block_nr, block_nr * block_size, n_blocks, n_blocks * block_size, block_size);
cmpwrite_result_t result = cmpwrite_result_t::CWR_OK;
uint8_t *buffer = new uint8_t[block_size]();
// DO
for(uint32_t i=0; i<n_blocks; i++) {
// read
off_t offset = (block_nr + i) * block_size;
bool rc = invoke_nbd(NBD_CMD_READ, offset, block_size, buffer);
if (rc == false) {
DOLOG(logging::ll_error, "backend_nbd::cmpwrite", identifier, "error reading");
result = cmpwrite_result_t::CWR_READ_ERROR;
break;
}
bs.bytes_read += block_size;
// compare
if (memcmp(buffer, &data_compare[i * block_size], block_size) != 0) {
DOLOG(logging::ll_warning, "backend_nbd::cmpwrite", identifier, "data mismatch");
result = cmpwrite_result_t::CWR_MISMATCH;
break;
}
}
delete [] buffer;
if (result == cmpwrite_result_t::CWR_OK) {
// write
bool rc = invoke_nbd(NBD_CMD_WRITE, block_nr * block_size, n_blocks * block_size, const_cast<uint8_t *>(data_write));
if (rc == false) {
DOLOG(logging::ll_error, "backend_nbd::cmpwrite", identifier, "ERROR writing");
result = cmpwrite_result_t::CWR_WRITE_ERROR;
}
else {
bs.bytes_written += block_size;
ts_last_acces = get_micros();
}
}
unlock_range(lock_list);
bs.n_reads++;
bs.n_writes++;
return result;
}
std::string backend_nbd::get_serial() const
{
return identifier;
}