-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhamr_buffer.h
838 lines (742 loc) · 37 KB
/
hamr_buffer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
#ifndef buffer_h
#define buffer_h
#include "hamr_config.h"
#include "hamr_buffer_allocator.h"
#include "hamr_buffer_transfer.h"
#include "hamr_stream.h"
#include <memory>
#include <type_traits>
/// heterogeneous accelerator memory resource
namespace hamr
{
/** @brief A technology agnostic buffer that manages memory on the host, GPUs,
* and other accelerators.
* @details The buffer mediates between different accelerator and platform
* portability technologies' memory models. Examples of platform portability
* technologies are HIP, OpenMP, OpenCL, SYCL, and Kokos, Examples of
* accelerator technologies are CUDA and ROCm. Other accelerator and platform
* portability technologies exist and can be supported. Data can be left in
* place until it is consumed. The consumer of the data can get a pointer that
* is accessible in the technology that will be used to process the data. If
* the data is already accessible in that technology access is a NOOP,
* otherwise the data will be moved such that it is accessible. Smart pointers
* take care of destruction of temporary buffers if needed.
*/
template <typename T>
class HAMR_EXPORT buffer
{
public:
/** An enumeration for the type of allocator to use for memory allocations.
* See ::buffer_allocator.
*/
using allocator = buffer_allocator;
/** An enumeration for the types of transfer supported. See
* ::buffer_transfer
*/
using transfer = buffer_transfer;
/** Construct an empty buffer.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
*/
buffer(allocator alloc, const hamr::stream &strm, transfer sync = transfer::async);
/** Construct an empty buffer. This constructor will result in the default
* stream for the chosen technology with transfer::async.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
*/
buffer(allocator alloc) : buffer(alloc, stream(), transfer::async) {}
/** Construct a buffer with storage allocated but unitialized.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
* @param[in] n_elem the initial size of the new buffer
*/
buffer(allocator alloc, const hamr::stream &strm, transfer sync, size_t n_elem);
/** Construct a buffer configured for asynchronous data transfers, with
* storage allocated, but unitialized.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] n_elem the initial size of the new buffer
*/
buffer(allocator alloc, const hamr::stream &strm, size_t n_elem)
: buffer(alloc, strm, transfer::async, n_elem) {}
/** Construct a buffer with storage allocated but unitialized. This
* constructor will result in the default stream for the chosen technology
* with transfer::async mode.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] n_elem the initial size of the new buffer
*/
buffer(allocator alloc, size_t n_elem) :
buffer(alloc, stream(), transfer::async, n_elem) {}
/** Construct a buffer with storage allocated and initialized to a single
* value.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
* @param[in] n_elem the initial size of the new buffer
* @param[in] val an single value used to initialize the buffer
* contents
*/
buffer(allocator alloc, const hamr::stream &strm,
transfer sync, size_t n_elem, const T &val);
/** Construct a buffer configured for asynchronous data movement, with
* storage allocated, and initialized to a single value.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] n_elem the initial size of the new buffer
* @param[in] val an single value used to initialize the buffer
* contents
*/
buffer(allocator alloc, const hamr::stream &strm, size_t n_elem, const T &val)
: buffer(alloc, strm, transfer::async, n_elem, val) {}
/** Construct a buffer with storage allocated and initialized to a single
* value. This constructor will result in the default stream for the chosen
* technology with transfer::async mode.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] n_elem the initial size of the new buffer
* @param[in] val an single value used to initialize the buffer
* contents
*/
buffer(allocator alloc, size_t n_elem, const T &val) :
buffer(alloc, stream(), transfer::async, n_elem, val) {}
/** Construct a buffer with storage allocated and initialized to the array
* of values. This array is always assumed to be accessible on the host. Use
* one of the zero-copy constructors if the data is already accessible on
* the device.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
* @param[in] n_elem the initial size of the new buffer and number of
* elements in the array pointed to by vals
* @param[in] vals an array of values accessible on the host used to
* initialize the buffer contents
*/
buffer(allocator alloc, const hamr::stream &strm,
transfer sync, size_t n_elem, const T *vals);
/** Construct a buffer configured for asynchronous data movement, with
* storage allocated, and initialized to the array of values. This array is
* always assumed to be accessible on the host. Use one of the zero-copy
* constructors if the data is already accessible on the device.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] n_elem the initial size of the new buffer and number of
* elements in the array pointed to by vals
* @param[in] vals an array of values accessible on the host used to
* initialize the buffer contents
*/
buffer(allocator alloc, const hamr::stream &strm, size_t n_elem, const T *vals)
: buffer(alloc, strm, transfer::async, n_elem, vals) {}
/** Construct a buffer with storage allocated and initialized to the array
* of values. This array is always assumed to be accessible on the host. Use
* one of the zero-copy constructors if the data is already accessible on
* the device. This constructor will result in the default stream for the
* chosen technology with transfer::async mode.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] n_elem the initial size of the new buffer and number of
* elements in the array pointed to by vals
* @param[in] vals an array of values accessible on the host used to
* initialize the buffer contents
*/
buffer(allocator alloc, size_t n_elem, const T *vals) :
buffer(alloc, stream(), transfer::async, n_elem, vals) {}
/** Construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] ptr a pointer to the array
* @param[in] df a function `void df(void*ptr)` used to delete the array
* when this instance is finished.
*/
template <typename delete_func_t>
buffer(allocator alloc, const hamr::stream &strm, transfer sync,
size_t size, int owner, T *ptr, delete_func_t df);
/** Construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers. The buffer is
* configured for asynchronous data transfers.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] strm a ::stream object used to order operations
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] ptr a pointer to the array
* @param[in] df a function `void df(void*ptr)` used to delete the array
* when this instance is finished.
*/
template <typename delete_func_t>
buffer(allocator alloc, const hamr::stream &strm, size_t size,
int owner, T *ptr, delete_func_t df)
: buffer(alloc, strm, transfer::async, size, owner, ptr, df) {}
/** Construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers. This
* constructor will result in the default stream for the chosen technology
* with transfer::async mode.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] ptr a pointer to the array
* @param[in] df a function `void df(void*ptr)` used to delete the array
* when this instance is finished.
*/
template <typename delete_func_t>
buffer(allocator alloc, size_t size, int owner, T *ptr, delete_func_t df)
: buffer(alloc, stream(), transfer::async, size, owner, ptr, df) {}
/** Construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers.
* The pass ::buffer_allocator is used to create the deleter that will be
* called when this instance is finished with the memeory. Use this
* constructor to transfer ownership of the array.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] ptr a pointer to the array
* @param[in] take set non-zero if the buffer should delete the passed
* memory using the named allocator
*/
buffer(allocator alloc, const hamr::stream &strm,
transfer sync, size_t size, int owner, T *ptr, int take = 1);
/** Construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers.
* The pass ::buffer_allocator is used to create the deleter that will be
* called when this instance is finished with the memeory. Use this
* constructor to transfer ownership of the array. The buffer is configured
* for asynchronous data transfers.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] strm a ::stream object used to order operations
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] ptr a pointer to the array
*/
buffer(allocator alloc, const hamr::stream &strm, size_t size, int owner, T *ptr)
: buffer(alloc, strm, transfer::async, size, owner, ptr) {}
/** construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers. The pass
* ::buffer_allocator is used to create the deleter that will be called
* when this instance is finished with the memeory. Use this constructor to
* transfer ownership of the array. This constructor will result in the
* default stream for the chosen technology with transfer::async mode.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] ptr a pointer to the array
*/
buffer(allocator alloc, size_t size, int owner, T *ptr) :
buffer(alloc, stream(), transfer::async, size, owner, ptr) {}
/** Construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] data a shared pointer managing the data
*/
buffer(allocator alloc, const hamr::stream &strm, transfer sync,
size_t size, int owner, const std::shared_ptr<T> &data);
/** Construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers. The buffer is
* configured for asynchronous data transfers.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] strm a ::stream object used to order operations
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] data a shared pointer managing the data
*/
buffer(allocator alloc, const hamr::stream &strm,
size_t size, int owner, const std::shared_ptr<T> &data)
: buffer(alloc, strm, transfer::async, size, owner, data) {}
/** Construct by directly providing the buffer contents. This can be used
* for zero-copy transfer of data. One must also name the allocator type
* and device owning the data. In addition for new allocations the
* allocator type and owner are used internally to know how to
* automatically move data during inter technology transfers. This
* constructor will result in the default stream for the chosen technology
* with transfer::async mode.
*
* @param[in] alloc a ::buffer_allocator indicating the technology
* backing the pointer
* @param[in] size the number of elements in the array pointed to by ptr
* @param[in] owner the device owning the memory, -1 for host. if the
* allocator is a GPU allocator and -1 is passed the
* driver API is used to determine the device that
* allocated the memory.
* @param[in] data a shared pointer managing the data
*/
buffer(allocator alloc, size_t size, int owner, const std::shared_ptr<T> &data)
: buffer(alloc, stream(), transfer::async, size, owner, data) {}
/// copy construct from the passed buffer
template <typename U>
buffer(const buffer<U> &other);
/// copy construct from the passed buffer
buffer(const buffer<T> &other);
/** Copy construct from the passed buffer, while specifying a potentially
* different allocator, stream, and synchronization behavior.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
*/
template <typename U>
buffer(allocator alloc, const hamr::stream &strm,
transfer sync, const buffer<U> &other);
/** Copy construct from the passed buffer, while specifying a potentially
* different allocator, stream, and synchronization behavior. The buffer is
* configured for asynchronous data transfers.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
*/
template <typename U>
buffer(allocator alloc, const hamr::stream &strm, const buffer<U> &other)
: buffer(alloc, strm, transfer::async, other) {}
/** Copy construct from the passed buffer using the specified allocator.
* The stream and sync mode are obtained from the copied instance.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
*/
template <typename U>
buffer(allocator alloc, const buffer<U> &other) :
buffer(alloc, other.m_stream, other.m_sync, other) {}
#if !defined(SWIG)
/// Move construct from the passed buffer.
buffer(buffer<T> &&other);
/** Move construct from the passed buffer, while specifying a potentially
* different allocator, owner, stream, and synchronization behavior. The
* move occurs only if the allocators and owners match, otherwise a copy is
* made. For non-host allocators, the active device is used to set the owner
* of the new object prior to the atempted move.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
*/
buffer(allocator alloc, const hamr::stream &strm, transfer sync, buffer<T> &&other);
/** Move construct from the passed buffer, while specifying a potentially
* different allocator, owner, stream, and synchronization behavior. The
* move occurs only if the allocators and owners match, otherwise a copy is
* made. For non-host allocators, the active device is used to set the owner
* of the new object prior to the atempted move. The buffer is configured
* for asynchronous data transfers.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
* @param[in] strm a ::stream object used to order operations
*/
buffer(allocator alloc, const hamr::stream &strm, buffer<T> &&other)
: buffer(alloc, strm, transfer::async, std::move(other)) {}
/** Move construct from the passed buffer, while specifying a potentially
* different allocator, owner, stream, and synchronization behavior. The
* move occurs only if the allocators and owners match, otherwise a copy is
* made. For non-host allocators, the active device is used to set the owner
* of the new object prior to the atempted move. This constructor will
* result in the default stream for the chosen technology with
* transfer::sync_host mode which synchronizes after data movement from a
* device to the host.
*
* @param[in] alloc a ::buffer_allocator indicates what technology
* manages the data internally
*/
buffer(allocator alloc, buffer<T> &&other) :
buffer(alloc, other.m_stream, other.m_sync, std::move(other)) {}
/** move assign from the other buffer. The target buffer's allocator,
* stream, and device transfer mode are preserved. if this and the passed
* buffer have the same type, allocator, and owner the passed buffer is
* moved. If this and the passed buffer have different allocators or owners
* this allocator is used to allocate space and the data will be copied.
* if this and the passed buffer have different types elements are cast to
* this type as they are copied.
*/
void operator=(buffer<T> &&other);
#endif
/** Allocate space and copy the contents of another buffer. The allocator,
* owner, stream, and sychronization mode of the receiving object are
* unmodified by this operation. Thus one may move data around the system
* using copy assignment.
*/
template <typename U>
void operator=(const buffer<U> &other);
void operator=(const buffer<T> &other);
/// swap the contents of the two buffers
void swap(buffer<T> &other);
/** This is used to change the location of the buffer contents in place.
* For GPU based allocators, the new allocation is made on the device
* active at the time the call is made. If the new allocator and owner are
* the same as the current allocator and owner, then the call is a NOOP.
* Otherwise the data is reallocated and moved.
*
* @param[in] alloc the new allocator
* @returns zero if the operation was successful
*/
int move(allocator alloc);
/** @name reserve
* allocates space for n_elems of data
*/
///@{
/// reserve n_elem of memory
int reserve(size_t n_elem);
/// reserve n_elem of memory and initialize them to val
int reserve(size_t n_elem, const T &val);
///@}
/** @name resize
* resizes storage for n_elems of data
*/
///@{
/// resize the buffer to hold n_elem of memory
int resize(size_t n_elem);
/** resize the buffer to hold n_elem of memory and initialize new elements
* to val */
int resize(size_t n_elem, const T &val);
///@}
/// free all internal storage
int free();
/// returns the number of elements of storage allocated to the buffer
size_t size() const { return m_size; }
/** @name assign
* Copies data into the buffer resizing the buffer.
*/
///@{
/// assign the range from the passed array (src is always on the host)
template<typename U>
int assign(const U *src, size_t src_start, size_t n_vals);
/// assign the range from the passed buffer
template<typename U>
int assign(const buffer<U> &src, size_t src_start, size_t n_vals);
/// assign the passed buffer
template<typename U>
int assign(const buffer<U> &src);
///@}
/** @name append
* insert values at the back of the buffer, growing as needed
*/
///@{
/** appends n_vals from src starting at src_start to the end of the buffer,
* extending the buffer as needed. (src is always on the host)
*/
template <typename U>
int append(const U *src, size_t src_start, size_t n_vals);
/** appends n_vals from src starting at src_start to the end of the buffer,
* extending the buffer as needed.
*/
template <typename U>
int append(const buffer<U> &src, size_t src_start, size_t n_vals);
/** appends to the end of the buffer, extending the buffer as needed.
*/
template <typename U>
int append(const buffer<U> &src);
///@}
/** @name set
* sets a range of elements in the buffer
*/
///@{
/** sets n_vals elements starting at dest_start from the passed buffer's
* elements starting at src_start (src is always on the host)*/
template <typename U>
int set(size_t dest_start, const U *src, size_t src_start, size_t n_vals);
/** sets n_vals elements starting at dest_start from the passed buffer's
* elements starting at src_start */
template <typename U>
int set(const buffer<U> &src)
{
return this->set(0, src, 0, src.size());
}
/** sets n_vals elements starting at dest_start from the passed buffer's
* elements starting at src_start */
template <typename U>
int set(size_t dest_start, const buffer<U> &src, size_t src_start, size_t n_vals);
///@}
/** @name get
* gets a range of values from the buffer
*/
///@{
/** gets n_vals elements starting at src_start into the passed array
* elements starting at dest_start (dest is always on the host)*/
template <typename U>
int get(size_t src_start, U *dest, size_t dest_start, size_t n_vals) const;
/** gets n_vals elements starting at src_start into the passed buffer's
* elements starting at dest_start */
template <typename U>
int get(size_t src_start, buffer<U> &dest, size_t dest_start, size_t n_vals) const;
/** gets n_vals elements starting at src_start into the passed buffer's
* elements starting at dest_start */
template <typename U>
int get(buffer<U> &dest) const
{
return this->get(0, dest, 0, this->size());
}
///@}
#if !defined(SWIG)
/** @returns a read only pointer to the contents of the buffer accessible on
* the host. If the buffer is currently accessible by codes running on the
* host then this call is a NOOP. If the buffer is not currently accessible
* by codes running on the host then a temporary buffer is allocated and the
* data is moved to the host. The returned shared_ptr deals with
* deallocation of the temporary if needed.
*/
std::shared_ptr<const T> get_host_accessible() const;
#endif
/// returns true if the data is accessible from codes running on the host
int host_accessible() const;
#if !defined(SWIG)
/** @returns a read only pointer to the contents of the buffer accessible
* from the active CUDA device. If the buffer is currently accessible on
* the active CUDA device then this call is a NOOP. If the buffer is not
* currently accessible on the active CUDA device then a temporary buffer
* is allocated and the data is moved. The returned shared_ptr deals with
* deallocation of the temporary if needed.
*/
std::shared_ptr<const T> get_cuda_accessible() const;
#endif
/// returns true if the data is accessible from CUDA codes
int cuda_accessible() const;
#if !defined(SWIG)
/** @returns a read only pointer to the contents of the buffer accessible
* from the active HIP device. If the buffer is currently accessible on
* the active HIP device then this call is a NOOP. If the buffer is not
* currently accessible on the active HIP device then a temporary buffer is
* allocated and the data is moved. The returned shared_ptr deals with
* deallocation of the temporary if needed.
*/
std::shared_ptr<const T> get_hip_accessible() const;
#endif
/// returns true if the data is accessible from HIP codes
int hip_accessible() const;
#if !defined(SWIG)
/** @name get_openmp_accessible
* @returns a read only pointer to the contents of the buffer accessible
* from the active OpenMP off load device. If the buffer is currently
* accessible on the active OpenMP off load device then this call is a
* NOOP. If the buffer is not currently accessible on the active OpenMP
* off load device then a temporary buffer is allocated and the data is
* moved. The returned shared_ptr deals with deallocation of the temporary
* if needed.
*/
///@{
/** returns a pointer to the contents of the buffer accessible from within
* OpenMP off load
*/
std::shared_ptr<const T> get_openmp_accessible() const;
///@}
#endif
/// returns true if the data is accessible from OpenMP off load codes
int openmp_accessible() const;
#if !defined(SWIG)
/** @returns a read only pointer to the contents of the buffer accessible
* from the active device using the technology most suitable witht he
* current build configuration. If the buffer is currently accessible on
* the active device then this call is a NOOP. If the buffer is not
* currently accessible on the active device then a temporary buffer is
* allocated and the data is moved. The returned shared_ptr deals with
* deallocation of the temporary if needed.
*/
std::shared_ptr<const T> get_device_accessible() const;
#endif
/** returns true if the data is accessible from device codes using the
* technology most suitable with the current build configuration.
*/
int device_accessible() const;
/** @name data
* @returns a writable pointer to the buffer contents. Use this to modify
* the buffer contents or when you know that the buffer contents are
* accessible by the code operating on them to save the cost of a
* std::shared_ptr copy construct.
*/
///@{
/// return a pointer to the buffer contents
T *data() { return m_data.get(); }
/// return a const pointer to the buffer contents
const T *data() const { return m_data.get(); }
///@}
/** @name pointer
* @returns the smart pointer managing the buffer contents. Use this when you
* know that the buffer contents are accessible by the code operating on
* them to save the costs of the logic that determines if a temporary is
* needed
*/
///@{
/// @returns a pointer to the buffer contents
std::shared_ptr<T> &pointer() { return m_data; }
/// @returns a const pointer to the buffer contents
const std::shared_ptr<T> &pointer() const { return m_data; }
///@}
/// @returns the allocator type enum
allocator get_allocator() const { return m_alloc; }
/// @returns the device id where the memory was allocated
int get_owner() const { return m_owner; }
/// @returns the active stream
const hamr::stream &get_stream() const { return m_stream; }
hamr::stream &get_stream() { return m_stream; }
/** Sets the active stream and data transfer synchrnonization mode. See
* buffer_transfer.
*
* @param[in] strm a ::stream object used to order operations
* @param[in] sync a ::buffer_transfer specifies synchronous or
* asynchronous behavior.
*/
void set_stream(const stream &strm, transfer sync = transfer::async)
{
m_stream = strm;
m_sync = sync;
}
/** Set the transfer mode to asynchronous. One must manually synchronize
* before data access when needed. See ::synchronize
*/
void set_transfer_asynchronous() { m_sync = transfer::async; }
/** Set the transfer mode to synchronize automatically after data movement
* from the GPU to the host.
*/
void set_transfer_sycnhronous_host() { m_sync = transfer::sync_host; }
/** Set the transfer mode to synchronize every data transfer. This mode
* should not be used except for debugging.
*/
void set_transfer_sycnhronous() { m_sync = transfer::sync; }
/// @returns the current ::buffer_transfer mode
transfer get_transfer_mode() const { return m_sync; }
/** synchronizes with the current stream. This ensures that asynchronous
* data transfers have completed before you access the data.
*/
int synchronize() const;
/// prints the contents to the stderr stream
int print() const;
protected:
/// grow the buffer if needed. doubles in size
int reserve_for_append(size_t n_vals);
/// allocate space for n_elem
std::shared_ptr<T> allocate(size_t n_elem);
/// allocate space for n_elem initialized to val
std::shared_ptr<T> allocate(size_t n_elem, const T &val);
/// allocate space for n_elem initialized with an array of values
template <typename U>
std::shared_ptr<T> allocate(size_t n_elem, const U *vals);
/// allocate space for n_elem initialized with an array of values
template <typename U>
std::shared_ptr<T> allocate(const buffer<U> &vals);
/** set the device where the buffer is located to the active device or the
* host. The allocator is used to determine which. @returns 0 if successful.
*/
int set_owner();
/** set the device where the buffer is located by querying the driver API or the
* host. The allocator is used to determine which. @returns 0 if successful.
*/
int set_owner(const T *ptr);
/// get the active device id associated with the current allocator
int get_active_device(int &dev_id);
private:
allocator m_alloc;
std::shared_ptr<T> m_data;
size_t m_size;
size_t m_capacity;
int m_owner;
hamr::stream m_stream;
transfer m_sync;
template<typename U> friend class buffer;
};
}
#if !defined(HAMR_SEPARATE_IMPL)
#include "hamr_buffer_impl.h"
#endif
#endif