Skip to content

Commit

Permalink
add config support delete shard from fe onlyfix
Browse files Browse the repository at this point in the history
Signed-off-by: drake_wang <[email protected]>
  • Loading branch information
wxl24life committed Dec 5, 2024
1 parent 9627535 commit 7fbac72
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 9 deletions.
10 changes: 10 additions & 0 deletions fe/fe-core/src/main/java/com/starrocks/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -2538,6 +2538,16 @@ public class Config extends ConfigBase {
@ConfField(mutable = true)
public static boolean meta_sync_force_delete_shard_meta = false;

/**
* Fast deletion large volume of shard meta from FE to lower GC overhead in shared-data mode cluster.
* Usually shard number will increase quickly if frequent adding/dropping partitions happened.
*
* If it is enabled, shard meta will be deleted from FE only.
* Users might need to delete tablet data from storage manually.
*/
@ConfField(mutable = true)
public static boolean meta_sync_delete_shard_meta_in_fe_only = false;

// ***********************************************************
// * BEGIN: Cloud native meta server related configurations
// ***********************************************************
Expand Down
33 changes: 24 additions & 9 deletions fe/fe-core/src/main/java/com/starrocks/lake/StarMgrMetaSyncer.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ public static void dropTabletAndDeleteShard(List<Long> shardIds, StarOSAgent sta
}
} catch (DdlException e) {
LOG.warn("failed to delete shard from starMgr");
continue;
}
}
}
Expand Down Expand Up @@ -197,17 +196,33 @@ private void deleteUnusedShardAndShardGroup() {
long nowMs = System.currentTimeMillis();
List<Long> emptyShardGroup = new ArrayList<>();
for (long groupId : diffList) {
if (Config.shard_group_clean_threshold_sec * 1000L + Long.parseLong(groupToCreateTimeMap.get(groupId)) < nowMs) {
try {
List<Long> shardIds = starOSAgent.listShard(groupId);
if (shardIds.isEmpty()) {
emptyShardGroup.add(groupId);
} else {
dropTabletAndDeleteShard(shardIds, starOSAgent);
if (Config.shard_group_clean_threshold_sec * 1000L + Long.parseLong(groupToCreateTimeMap.get(groupId)) >= nowMs) {
continue;
}
try {
List<Long> shardIds = starOSAgent.listShard(groupId);
if (shardIds.isEmpty()) {
emptyShardGroup.add(groupId);
continue;
}

// delete shard from star manager only, not considering tablet data on be/cn
if (Config.meta_sync_delete_shard_meta_in_fe_only) {
LOG.debug("delete shards from starMgr only, shard group: {}", groupId);
try {
starOSAgent.deleteShards(new HashSet<>(shardIds));
} catch (DdlException e) {
LOG.warn("failed to delete shards from starMgr");
}
} catch (Exception e) {
continue;
}
// drop meta and data
long start = System.currentTimeMillis();
dropTabletAndDeleteShard(shardIds, starOSAgent);
LOG.debug("delete shards from starMgr and FE, shard group: {}, cost: {} ms", groupId,
(System.currentTimeMillis() - start));
} catch (Exception e) {
LOG.warn("delete shards from starMgr and FE failed, shard group: {}, {}", groupId, e.getMessage());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,85 @@ Future<DeleteTabletResponse> deleteTablet(DeleteTabletRequest request) throws Ex
Config.meta_sync_force_delete_shard_meta = false;
}

@Test
public void testDeleteSyncerMetaOnly() {
boolean originalConfig = Config.meta_sync_delete_shard_meta_in_fe_only;
Config.shard_group_clean_threshold_sec = 0;
long groupIdToClear = shardGroupId + 1;
// build shardGroupInfos
List<Long> allShardIds = Stream.of(1000L, 1001L, 1002L, 1003L).collect(Collectors.toList());
int numOfShards = allShardIds.size();
List<ShardGroupInfo> shardGroupInfos = new ArrayList<>();
ShardGroupInfo info = ShardGroupInfo.newBuilder()
.setGroupId(groupIdToClear)
.putProperties("createTime", String.valueOf(System.currentTimeMillis() - 86400 * 1000))
.addAllShardIds(allShardIds)
.build();
shardGroupInfos.add(info);

new MockUp<StarOSAgent>() {
@Mock
public void deleteShardGroup(List<Long> groupIds) throws
StarClientException {
for (long groupId : groupIds) {
shardGroupInfos.removeIf(item -> item.getGroupId() == groupId);
}
}
@Mock
public List<ShardGroupInfo> listShardGroup() {
return shardGroupInfos;
}

@Mock
public List<Long> listShard(long groupId) throws DdlException {
if (groupId == groupIdToClear) {
return allShardIds;
} else {
return Lists.newArrayList();
}
}

@Mock
public void deleteShards(Set<Long> shardIds) throws DdlException {
allShardIds.removeAll(shardIds);
}
};

new MockUp<BrpcProxy>() {
@Mock
public LakeService getLakeService(String host, int port) throws RpcException {
return new PseudoBackend.PseudoLakeService();
}
};

// mock dropTabletAndDeleteShard failed to delete meta data
{
new MockUp<PseudoBackend.PseudoLakeService>() {
@Mock
Future<DeleteTabletResponse> deleteTablet(DeleteTabletRequest request) {
DeleteTabletResponse resp = new DeleteTabletResponse();
resp.status = new StatusPB();
resp.status.statusCode = TStatusCode.INTERNAL_ERROR.getValue();
resp.failedTablets = new ArrayList<>(request.tabletIds);
return CompletableFuture.completedFuture(resp);
}
};

Config.meta_sync_delete_shard_meta_in_fe_only = false;
Deencapsulation.invoke(starMgrMetaSyncer, "deleteUnusedShardAndShardGroup");
Assert.assertEquals(numOfShards, allShardIds.size());
}

// mock success to delete meta after set meta_sync_delete_shard_meta_in_fe_only
{
Config.meta_sync_delete_shard_meta_in_fe_only = true;
Deencapsulation.invoke(starMgrMetaSyncer, "deleteUnusedShardAndShardGroup");
Assert.assertEquals(0, allShardIds.size());
}

Config.meta_sync_delete_shard_meta_in_fe_only = originalConfig;
}

@Test
public void testSyncTableMetaInternal() throws Exception {
long dbId = 100;
Expand Down

0 comments on commit 7fbac72

Please sign in to comment.