Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable user-customization of packing #549

Merged
merged 28 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
204e52f
Simplify the l3 oapi by removing the rather unnecessary macro layer.
devinamatthews Aug 15, 2021
905b266
Completely cull the framework code for syrk/herk/syr2k/her2k and just…
devinamatthews Aug 15, 2021
eba0777
Implement user-defined packing microkernels.
devinamatthews Sep 4, 2021
f57edcc
Fix induced method handling for gemmt/syr2k/her2k/syrk/herk.
devinamatthews Sep 10, 2021
2ba5109
Merge branch 'l3_oapi_simplification' into obj_t_makeover
devinamatthews Sep 10, 2021
123677d
Major update of l3 packing code:
devinamatthews Sep 13, 2021
2eba282
Moved packed object initialization and pack buffer acquisition to bli…
devinamatthews Sep 13, 2021
05c9ee5
Merge bli_gemm_int and bli_trsm_into into one happy function.
devinamatthews Sep 14, 2021
3f32e87
Enable 1m in the "fast" testsuite.
devinamatthews Sep 15, 2021
57f116c
The packing kernel (variant) can now be overridden by the user.
devinamatthews Sep 15, 2021
dfc1267
Fix race condition in packing allocation.
devinamatthews Sep 28, 2021
184cb76
`bli_packm_struc_cxk_1er` doesn't handle stored sub-parts of triangul…
devinamatthews Sep 29, 2021
6051056
Fixes for GEMM-MD.
devinamatthews Sep 29, 2021
d58d4c0
Add explicit handling for beta == 0 in armsve sd and armv7a d gemm ukrs.
devinamatthews Sep 29, 2021
dc30955
Apply patch from @xrq-phys.
devinamatthews Oct 2, 2021
c4fadf2
Make `bli_pba_rntm_set_pba` inline, instead of forcing export on Wind…
devinamatthews Oct 2, 2021
c493032
Make error checking level a thread-local variable.
devinamatthews Oct 2, 2021
ef64cdf
Fix data race in the testsuite.
devinamatthews Oct 2, 2021
edffd67
Fix problem in the gemmtrsm ukr test driver.
devinamatthews Oct 2, 2021
69915e9
VSCode keeps inserting headers.
devinamatthews Oct 2, 2021
ca93314
Export `bli_pba_query`.
devinamatthews Oct 3, 2021
288b551
Fix symbol visibility problems.
devinamatthews Oct 4, 2021
ec7d24e
Revert "Fix data race in the testsuite."
devinamatthews Oct 5, 2021
58df338
Merge branch 'master' into obj_t_makeover_phase1
devinamatthews Oct 5, 2021
9056d4f
Merge branch 'master' into obj_t_makeover_phase1
devinamatthews Nov 12, 2021
d79f75e
Merge branch 'master' into obj_t_makeover_phase1
devinamatthews Nov 12, 2021
a099d87
Handle .root correctly in bli_obj_swap().
fgvanzee Nov 30, 2021
9f5b215
Removed old calls to bli_obj_set_as_root().
fgvanzee Dec 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion build/libblis-symbols.def
Original file line number Diff line number Diff line change
Expand Up @@ -1307,7 +1307,6 @@ bli_pba_init_pools
bli_pba_pool_size
bli_pba_query
bli_pba_release
bli_pba_rntm_set_pba
bli_memsys_finalize
bli_memsys_init
bli_mkherm
Expand Down
18 changes: 10 additions & 8 deletions frame/1m/bli_l1m_ft_ker.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,23 @@
typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \
( \
struc_t strucc, \
doff_t diagoffc, \
diag_t diagc, \
uplo_t uploc, \
conj_t conjc, \
pack_t schema, \
bool invdiag, \
dim_t m_panel, \
dim_t n_panel, \
dim_t m_panel_max, \
dim_t n_panel_max, \
dim_t panel_dim, \
dim_t panel_len, \
dim_t panel_dim_max, \
dim_t panel_len_max, \
dim_t panel_dim_off, \
dim_t panel_len_off, \
ctype* restrict kappa, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict p, inc_t rs_p, inc_t cs_p, \
ctype* restrict c, inc_t incc, inc_t ldc, \
ctype* restrict p, inc_t ldp, \
inc_t is_p, \
cntx_t* cntx \
cntx_t* cntx, \
void* params \
);

INSERT_GENTDEF( packm )
Expand Down
1 change: 1 addition & 0 deletions frame/1m/bli_l1m_oft_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ typedef void (*PASTECH(opname,_var_oft)) \
obj_t* a, \
obj_t* p, \
cntx_t* cntx, \
rntm_t* rntm, \
cntl_t* cntl, \
thrinfo_t* thread \
);
Expand Down
8 changes: 5 additions & 3 deletions frame/1m/packm/bli_packm.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@

*/

#include "bli_packm_alloc.h"
#include "bli_packm_cntl.h"
#include "bli_packm_check.h"
#include "bli_packm_init.h"
#include "bli_packm_int.h"
#include "bli_packm_scalar.h"

#include "bli_packm_part.h"

#include "bli_packm_var.h"

#include "bli_packm_struc_cxk.h"
#include "bli_packm_struc_cxk_1er.h"

Expand All @@ -50,6 +50,8 @@

// Mixed datatype support.
#ifdef BLIS_ENABLE_GEMM_MD
#include "bli_packm_md.h"
#include "bli_packm_struc_cxk_md.h"
#endif

#include "bli_packm_blk_var1.h"

139 changes: 64 additions & 75 deletions frame/1m/packm/bli_packm_var.h → frame/1m/packm/bli_packm_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
libraries.

Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Copyright (C) 2016, Hewlett Packard Enterprise Development LP

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
Expand Down Expand Up @@ -33,78 +33,67 @@

*/

//
// Prototype object-based interfaces.
//

#undef GENPROT
#define GENPROT( opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
( \
obj_t* c, \
obj_t* p, \
cntx_t* cntx, \
cntl_t* cntl, \
thrinfo_t* t \
);

GENPROT( packm_unb_var1 )
GENPROT( packm_blk_var1 )

//
// Prototype BLAS-like interfaces with void pointer operands.
//

#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
struc_t strucc, \
doff_t diagoffc, \
diag_t diagc, \
uplo_t uploc, \
trans_t transc, \
dim_t m, \
dim_t n, \
dim_t m_max, \
dim_t n_max, \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
cntx_t* cntx \
);

INSERT_GENTPROT_BASIC0( packm_unb_var1 )

#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
struc_t strucc, \
doff_t diagoffc, \
diag_t diagc, \
uplo_t uploc, \
trans_t transc, \
pack_t schema, \
bool invdiag, \
bool revifup, \
bool reviflo, \
dim_t m, \
dim_t n, \
dim_t m_max, \
dim_t n_max, \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
inc_t is_p, \
dim_t pd_p, inc_t ps_p, \
void_fp packm_ker, \
cntx_t* cntx, \
thrinfo_t* thread \
);

INSERT_GENTPROT_BASIC0( packm_blk_var1 )
#include "blis.h"

void* bli_packm_alloc
(
siz_t size_needed,
rntm_t* rntm,
cntl_t* cntl,
thrinfo_t* thread
)
{
// Query the pack buffer type from the control tree node.
packbuf_t pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );

// Query the address of the mem_t entry within the control tree node.
mem_t* cntl_mem_p = bli_cntl_pack_mem( cntl );

mem_t* local_mem_p;
mem_t local_mem_s;

siz_t cntl_mem_size = 0;

if ( bli_mem_is_alloc( cntl_mem_p ) )
cntl_mem_size = bli_mem_size( cntl_mem_p );

if ( cntl_mem_size < size_needed )
{
if ( bli_thread_am_ochief( thread ) )
{
// The chief thread releases the existing block associated with
// the mem_t entry in the control tree, and then re-acquires a
// new block, saving the associated mem_t entry to local_mem_s.
if ( bli_mem_is_alloc( cntl_mem_p ) )
{
bli_pba_release
(
rntm,
cntl_mem_p
);
}
bli_pba_acquire_m
(
rntm,
size_needed,
pack_buf_type,
&local_mem_s
);
}

// Broadcast the address of the chief thread's local mem_t entry to
// all threads.
local_mem_p = bli_thread_broadcast( thread, &local_mem_s );

// Save the chief thread's local mem_t entry to the mem_t field in
// this thread's control tree node.
*cntl_mem_p = *local_mem_p;

// Barrier so that the master thread doesn't return from the function
// before we are done reading.
bli_thread_barrier( thread );
}

return bli_mem_buffer( cntl_mem_p );
}

17 changes: 7 additions & 10 deletions frame/3/bli_l3_packm.h → frame/1m/packm/bli_packm_alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
libraries.

Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
Expand Down Expand Up @@ -33,13 +32,11 @@

*/

void bli_l3_packm
(
obj_t* x,
obj_t* x_pack,
cntx_t* cntx,
rntm_t* rntm,
cntl_t* cntl,
thrinfo_t* thread
);
BLIS_EXPORT_BLIS void* bli_packm_alloc
(
siz_t size_needed,
rntm_t* rntm,
cntl_t* cntl,
thrinfo_t* thread
);

Loading