Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADBDEV-6156 Count startup memory of each process when using resource groups #1023

Open
wants to merge 30 commits into
base: adb-6.x-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
635f0da
Count startup memory of each process when using resource groups
Aug 23, 2024
f075158
Try to fix diff caused by gpstop
Oct 21, 2024
f8bd708
Merge branch 'adb-6.x-dev' into ADBDEV-6156
Oct 23, 2024
da24513
Use the actual number of startup chunks instaed of a heardcoded value
Oct 24, 2024
c1acd81
Merge branch 'adb-6.x-dev' into ADBDEV-6156
Oct 24, 2024
fb14e74
Add resGroupPallocIgnoreStartup which copies old Add resGroupPalloc
Oct 30, 2024
cdf357a
Add new test case which tests redzone
Oct 31, 2024
ec269c9
Ignore generated file
Oct 31, 2024
8c71e1c
Ignore function creation output
Oct 31, 2024
4579e61
Add a comment for resGroupPallocIgnoreStartup
Oct 31, 2024
ef84cdb
Consider startup memory only if allocation was successful
Nov 1, 2024
cf1b560
Bring out allocation logic to a dedicated function
Nov 14, 2024
562ac84
Reword comments so they would be more clear
Nov 14, 2024
94328c9
Merge branch 'adb-6.x-dev' into ADBDEV-6156
Nov 14, 2024
73582b3
Remove redundant test
Nov 14, 2024
4a9360e
Make resGroupPallocImpl static
Nov 15, 2024
475f3cb
Fix library path for test output
Nov 22, 2024
ef3a7e5
Merge branch 'adb-6.x-dev' into ADBDEV-6156
Nov 22, 2024
524afa5
new test
KnightMurloc Dec 26, 2024
021c829
fix test
KnightMurloc Dec 27, 2024
bd7ab6e
rework test
KnightMurloc Jan 14, 2025
c765b67
rework resGroupPalloc
KnightMurloc Jan 15, 2025
0c1831d
improve the test
KnightMurloc Jan 16, 2025
caa4e57
improve the test
KnightMurloc Jan 16, 2025
492e4bd
Merge branch 'adb-6.x-dev' into ADBDEV-6156
KnightMurloc Jan 16, 2025
c4f373c
reduce diff
KnightMurloc Jan 17, 2025
bdf674a
make test more stable and make sql in the test lowercase.
KnightMurloc Jan 29, 2025
da15bdd
rework test
KnightMurloc Jan 30, 2025
cc03728
add function removal
KnightMurloc Jan 31, 2025
19f9105
Merge branch 'adb-6.x-dev' into ADBDEV-6156
KnightMurloc Jan 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/backend/utils/mmgr/vmem_tracker.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,12 @@ VmemTracker_ReleaseVmem(int64 toBeFreedRequested)
}
}

int32
VmemTracker_GetStartupChunks(void)
{
return startupChunks;
}

/*
* Register the startup memory to vmem tracker.
*
Expand Down Expand Up @@ -670,6 +676,8 @@ VmemTracker_RegisterStartupMemory(int64 bytes)
pg_atomic_add_fetch_u32((pg_atomic_uint32 *) segmentVmemChunks,
startupChunks);

ResGroupProcAddStartupChunks(startupChunks);

/*
* Step 2, check if an OOM error should be raised by allocating 0 chunk.
*/
Expand All @@ -692,6 +700,8 @@ VmemTracker_UnregisterStartupMemory(void)
pg_atomic_sub_fetch_u32((pg_atomic_uint32 *) &MySessionState->sessionVmem,
startupChunks);

ResGroupProcAddStartupChunks(-startupChunks);
andr-sokolov marked this conversation as resolved.
Show resolved Hide resolved

trackedBytes -= startupBytes;
trackedVmemChunks -= startupChunks;

Expand Down
11 changes: 11 additions & 0 deletions src/backend/utils/resgroup/resgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1594,6 +1594,17 @@ selfDetachResGroup(ResGroupData *group, ResGroupSlotData *slot)
selfUnsetGroup();
}

/*
* Add startup memory before a resgroup is assigned. This memory
* will later be added to resgroup via selfAttachResGroup
*/
void
ResGroupProcAddStartupChunks(int32 chunks)
{
if (IsResGroupEnabled())
self->memUsage += chunks;
}

/*
* Initialize the members of a slot
*/
Expand Down
2 changes: 2 additions & 0 deletions src/include/utils/resgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ extern Oid ResGroupGetGroupIdBySessionId(int sessionId);
extern char *getCpuSetByRole(const char *cpuset);
extern void checkCpuSetByRole(const char *cpuset);

extern void ResGroupProcAddStartupChunks(int32 chunks);

#define LOG_RESGROUP_DEBUG(...) \
do {if (Debug_resource_group) elog(__VA_ARGS__); } while(false);

Expand Down
1 change: 1 addition & 0 deletions src/include/utils/vmem_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ extern void VmemTracker_UnregisterStartupMemory(void);
extern void VmemTracker_RequestWaiver(int64 waiver_bytes);
extern void VmemTracker_ResetWaiver(void);
extern int64 VmemTracker_Fault(int32 reason, int64 arg);
extern int32 VmemTracker_GetStartupChunks(void);

extern int32 RedZoneHandler_GetRedZoneLimitChunks(void);
extern int32 RedZoneHandler_GetRedZoneLimitMB(void);
Expand Down
105 changes: 105 additions & 0 deletions src/test/isolation2/input/resgroup/resgroup_startup_memory.source
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
-- start_ignore
drop table if exists t1;
drop role if exists test1;
drop role if exists test2;
drop resource group rg1;
drop resource group rg2;
drop resource group rg3;
andr-sokolov marked this conversation as resolved.
Show resolved Hide resolved
-- end_ignore

create extension if not exists gp_inject_fault;

create or replace function resGroupPalloc(float) returns int as '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' language C reads sql data;
create or replace function hold_memory_by_percent_on_qe(float) returns int as $$
select resGroupPalloc($1) from gp_dist_random('gp_id')
$$ language sql;

create or replace function repeatPalloc(int, int) returns int as
'@abs_builddir@/../regress/regress@DLSUFFIX@', 'repeatPalloc'
language C reads sql data;

create or replace function hold_memory(int) returns int as $$
select * from repeatPalloc(1, $1)
$$ language sql;
andr-sokolov marked this conversation as resolved.
Show resolved Hide resolved

alter resource group admin_group set memory_spill_ratio 0;
alter resource group default_group set memory_spill_ratio 0;
alter resource group admin_group set memory_limit 0;
alter resource group default_group set memory_limit 0;

create resource group rg1 with (cpu_rate_limit=20, memory_limit=15, memory_shared_quota=100);
create resource group rg2 with (cpu_rate_limit=20, memory_limit=85, memory_shared_quota=100);

create role test1 with resource group rg1;
create role test2 with resource group rg2;

1: set role test1;
1: create table t1 (a int) distributed randomly;
-- Add exactly one line to each segment.
0U: insert into t1 values (1);
1U: insert into t1 values (2);
2U: insert into t1 values (3);
0Uq:
1Uq:
2Uq:

-- Force nested loop join to avoid unnecessary memory allocations.
1: set enable_nestloop = on;
1: set enable_hashjoin = off;

1: create view sleep_view as select a, pg_sleep(60) is null from t1;
1: create view alloc_view as select * from sleep_view where hold_memory(40) is not null;

set role test2;
begin;

-- Allocate all the memory in the second resource group.
select hold_memory_by_percent_on_qe(1);
-- Wait for all backends to run before executing the query. This is necessary
-- to avoid executing hold_memory before all backends are up.
select gp_inject_fault_infinite('executor_pre_tuple_processed', 'suspend', dbid)
from gp_segment_configuration where role = 'p' and content > -1;
-- The first resource group has 102 MB of memory available. 6 slices consume
andr-sokolov marked this conversation as resolved.
Show resolved Hide resolved
-- 12 * 6 = 72 MB, plus one slice explicitly allocates 40 MB of memory.
-- It is expected that the query will also be canceled due to lack of memory in
-- the resource group and not in the Vmem tracker.
1&: select * from alloc_view a1
andr-sokolov marked this conversation as resolved.
Show resolved Hide resolved
join sleep_view a2 using(a)
join sleep_view a3 using(a)
join sleep_view a4 using(a)
join sleep_view a5 using(a);
select gp_wait_until_triggered_fault('executor_pre_tuple_processed', 6, dbid)
from gp_segment_configuration where role = 'p' and content > -1;
select gp_inject_fault('executor_pre_tuple_processed', 'reset', dbid)
from gp_segment_configuration where role = 'p' and content > -1;
1<:
1q:
rollback;
reset role;
drop view alloc_view;
drop view sleep_view;

-- Free up some memory for the default resource group.
alter resource group rg2 set memory_limit 75;
-- Test that the starting memory is visible to the resource group.
1: set role test1;
1&: select count(*) from t1 where pg_sleep(1) is not null;
andr-sokolov marked this conversation as resolved.
Show resolved Hide resolved

select segment, mem.* from gp_toolkit.gp_resgroup_status, json_object_keys(memory_usage)
andr-sokolov marked this conversation as resolved.
Show resolved Hide resolved
as segment, json_to_record(memory_usage -> segment) mem (used int) where rsgname = 'rg1';

1<:
1q:

drop table t1;
reset role;
drop role test1;
drop role test2;
drop resource group rg1;
drop resource group rg2;

alter resource group default_group set memory_limit 30;
alter resource group default_group set memory_spill_ratio 10;

alter resource group default_group set memory_limit 10;
alter resource group default_group set memory_spill_ratio 10;
andr-sokolov marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 2 additions & 0 deletions src/test/isolation2/isolation2_resgroup_schedule
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,6 @@ test: resgroup/resgroup_dumpinfo
# test larget group id
test: resgroup/resgroup_large_group_id

test: resgroup/resgroup_startup_memory
RekGRpth marked this conversation as resolved.
Show resolved Hide resolved

test: resgroup/disable_resgroup
22 changes: 11 additions & 11 deletions src/test/isolation2/output/resgroup/resgroup_bypass.source
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 12.0
rg_bypass_test | 1 | 24.0
bandetto marked this conversation as resolved.
Show resolved Hide resolved
(2 rows)
61: SELECT * FROM eat_memory_on_qd_large;
ERROR: Out of memory
Expand Down Expand Up @@ -202,8 +202,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 4.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 16.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_one_slice;
count
Expand All @@ -213,8 +213,8 @@ SELECT * FROM memory_result;
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 8.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 20.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_one_slice;
ERROR: Out of memory (seg0 slice1 127.0.0.1:25432 pid=336)
Expand All @@ -234,7 +234,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 1 | 12.0
(2 rows)
61q: ... <quitting>

Expand All @@ -256,8 +256,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 4.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 16.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_slices;
count
Expand All @@ -267,8 +267,8 @@ SELECT * FROM memory_result;
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 8.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 20.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_slices;
ERROR: Out of memory (seg0 slice2 127.0.0.1:25432 pid=354)
Expand All @@ -288,7 +288,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 1 | 12.0
(2 rows)
61q: ... <quitting>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
-----------------+----------+---------
rg1_memory_test | 0 | 0.0
rg1_memory_test | 1 | 20.0
rg1_memory_test | 1 | 30.0
rg2_memory_test | 0 | 0.0
rg2_memory_test | 1 | 0.0
(4 rows)
Expand Down Expand Up @@ -144,8 +144,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
-----------------+----------+---------
rg1_memory_test | 0 | 20.0
rg1_memory_test | 1 | 0.0
rg1_memory_test | 0 | 30.0
rg1_memory_test | 1 | 10.0
rg2_memory_test | 0 | 0.0
rg2_memory_test | 1 | 0.0
(4 rows)
Expand Down
Loading