Skip to content

Commit

Permalink
Merge pull request #5907 from janekmi/call_stacks-1st-validation-step
Browse files Browse the repository at this point in the history
common: pass the validation step
  • Loading branch information
janekmi authored Nov 10, 2023
2 parents 3d0f0ee + c557f67 commit d4349f5
Show file tree
Hide file tree
Showing 7 changed files with 366 additions and 33 deletions.
1 change: 1 addition & 0 deletions utils/call_stacks_analysis/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*.json
*.txt
!stack_usage.txt
!white_list.json
34 changes: 20 additions & 14 deletions utils/call_stacks_analysis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,33 @@

> XXX This document requires more details.
1. Generate stack usage file using `stack_usage_stats.sh`.
2. Generate call stack graph using `cflow.sh`.
3. Generate all possible call stacks given the data provided.
## Pre-requisites

- built PMDK
- `cflow` command available in the system. Available [here](https://www.gnu.org/software/cflow/).

## Generating call stacks

```sh
# -u, --stack-usage-stat-file
# -f, --cflow-output-file
# -i, --config-file
./generate_call_stacks.py \
-u stack-usage-nondebug.txt \
-f cflow.txt \
-e extra_calls.json \
-w white_list.json
./make_stack_usage.sh
./make_api.sh
./make_extra.py
./make_cflow.sh
./generate_call_stacks.py
```

If succesfull, it produces:

- `call_stacks_all.json` with call stacks ordered descending by call stack consumption.
- `stack_usage.json` with the data extracted from the provided `src/stats/stack-usage-nondebug.txt`.
- `stack_usage.json` with call stack usages per function.

**Note**: If too many functions ought to be added to a white list it might be useful to ignore functions having a certain stack usage or lower. Please see `-t` option to set a desired threshold.

4. (Optional) Break down a call stack's stack consumption per function. Use the `stack_usage.json` as produced in the previous step and extract a single call stack and put it into a file (name `call_stack.json` below). Please see the examples directory for an example.
## Optional

### Call stack's stack consumption per function

Use the `stack_usage.json` as produced in the previous step and extract a single call stack and put it into a file (name `call_stack.json` below). Please see the examples directory for an example.

```sh
# -s, --stack-usage-file
Expand All @@ -49,7 +53,9 @@ If successful, it prints out on the screen a list of functions along with their
224 out_snprintf
```

5. (Optional) List all API calls which call stacks contains a given function. Use the `stack_usage.json` as produced in the previous step.
### List all API calls which call stacks contains a given function

Use the `stack_usage.json` as produced in the previous step.

```sh
# -a, --all-call-stacks-file
Expand Down
20 changes: 10 additions & 10 deletions utils/call_stacks_analysis/generate_call_stacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
PARSER.add_argument('-f', '--cflow-output-file', default='cflow.txt')
PARSER.add_argument('-e', '--extra-calls', default='extra_calls.json')
PARSER.add_argument('-a', '--api-file', default='api.txt')
PARSER.add_argument('-w', '--white-list') # XXX
PARSER.add_argument('-w', '--white-list', default='white_list.json')
PARSER.add_argument('-i', '--api-filter-file')
PARSER.add_argument('-d', '--dump', action='store_true', help='Dump debug files')
PARSER.add_argument('-t', '--skip-threshold', type=int, default=0,
Expand All @@ -29,8 +29,9 @@
# API: TypeAlias = List[str] # for Python >= 3.10
API = List[str] # for Python < 3.8

# WhiteList: TypeAlias = List[str] # for Python >= 3.10
WhiteList = List[str] # for Python < 3.8
# class WhiteList(TypedDict): # for Python >= 3.8
# not_called: List[str]
WhiteList = Dict[str, List[str]] # for Python < 3.8

# APIFilter: TypeAlias = List[str] # for Python >= 3.10
APIFilter = List[str] # for Python < 3.8
Expand Down Expand Up @@ -188,7 +189,7 @@ def validate(stack_usage: StackUsage, calls:Calls, api: API, white_list: WhiteLi
continue
if k in api:
continue
if k in white_list:
if k in white_list['not_called']:
continue
if v['size'] <= skip_threshold:
continue
Expand All @@ -203,7 +204,7 @@ def validate(stack_usage: StackUsage, calls:Calls, api: API, white_list: WhiteLi
for k, v in stack_usage.items():
if k in api:
continue
if k in white_list:
if k in white_list['not_called']:
continue
if v['size'] <= skip_threshold:
continue
Expand Down Expand Up @@ -266,12 +267,10 @@ def generate_call_stacks(func: str, stack_usage: StackUsage, rcalls: RCalls, api
def call_stack_key(e):
return e['size']

def generate_all_call_stacks(stack_usage: StackUsage, calls: Calls, rcalls: RCalls, api: API, white_list: WhiteList, debug: bool = False) -> List[CallStack]:
def generate_all_call_stacks(stack_usage: StackUsage, calls: Calls, rcalls: RCalls, api: API, debug: bool = False) -> List[CallStack]:
call_stacks = []
# loop over called functions
for func in rcalls.keys():
if func in white_list:
continue
# if a function calls something else, call stack generation will start from its callees
if func in calls.keys():
continue
Expand Down Expand Up @@ -304,7 +303,8 @@ def main():
dump(api, 'api')
print('Load API - done')

white_list = []
white_list = load_from_json(args.white_list)
print('White List - done')

stack_usage = parse_stack_usage(args.stack_usage_file)
# dumping stack_usage.json to allow further processing
Expand All @@ -322,7 +322,7 @@ def main():
rcalls = prepare_rcalls(calls)
print('Reverse calls - done')

call_stacks = generate_all_call_stacks(stack_usage, calls, rcalls, api, white_list)
call_stacks = generate_all_call_stacks(stack_usage, calls, rcalls, api)
dump(call_stacks, 'call_stacks_all', True)
print('Number of found call stacks: {}'.format(len(call_stacks)))
print('Call stack generation - done')
Expand Down
8 changes: 7 additions & 1 deletion utils/call_stacks_analysis/make_api.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ for link in $LINK_FILES; do
fi
done

API=$WD/api.txt

grep ";" $LINK_FILES | \
grep -v -e'*' -e'}' -e'_pobj_cache' | \
gawk -F "[;\t]" '{ print $3 }' | sort | uniq > $WD/api.txt
gawk -F "[;\t]" '{ print $3 }' | sort | uniq > $API

for api in libpmem_init libpmem_fini libpmemobj_init libpmemobj_fini; do
echo $api >> $API
done
9 changes: 7 additions & 2 deletions utils/call_stacks_analysis/make_cflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,19 @@ WD=$(realpath $(dirname "$0"))
SRC=$(realpath $WD/../../src)

API=$WD/api.txt

if [ ! -f "$API" ]; then
echo "$API is missing"
exit 1
fi

EXTRA_ENTRY_POINTS=$WD/extra_entry_points.txt
if [ ! -f "$EXTRA_ENTRY_POINTS" ]; then
echo "$EXTRA_ENTRY_POINTS is missing"
exit 1
fi

STARTS=
for start in `cat $API`; do
for start in `cat $API $EXTRA_ENTRY_POINTS`; do
STARTS="$STARTS --start $start"
done

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ def pmem_function_pointers(calls: Calls) -> Calls:
memsetfuncs['nt']['empty'] + \
memsetfuncs['t']['empty']

is_pmem_all = ['is_pmem_never', 'is_pmem_always', 'is_pmem_detect']

calls = dict_extend(calls, 'pmem_is_pmem', is_pmem_all)

return calls

def pmemobj_function_pointers(calls: Calls) -> Calls:
Expand All @@ -175,8 +179,28 @@ def pmemobj_function_pointers(calls: Calls) -> Calls:

calls = dict_extend(calls, 'bucket_insert_block', insert_all)

calls = dict_extend(calls, 'bucket_remove_block', get_rm_exact_all)

calls = dict_extend(calls, 'bucket_alloc_block', get_rm_bestfit_all)

callers = [
'bucket_attach_run',
'bucket_detach_run'
]
for caller in callers:
calls = dict_extend(calls, caller, rm_all_all)

calls = dict_extend(calls, 'bucket_fini', destroy_all)

compare_all = ['ravl_interval_compare']

callers = [
'ravl_emplace',
'ravl_find'
]
for caller in callers:
calls = dict_extend(calls, caller, compare_all)

# memory_block_ops
block_size_all = ['huge_block_size', 'run_block_size']
prep_hdr_all = ['huge_prep_operation_hdr', 'run_prep_operation_hdr']
Expand All @@ -200,8 +224,74 @@ def pmemobj_function_pointers(calls: Calls) -> Calls:
get_bitmap_all = ['run_get_bitmap']
fill_pct_all = ['huge_fill_pct', 'run_fill_pct']

calls = dict_extend(calls, 'heap_free_chunk_reuse', prep_hdr_all)
calls = dict_extend(calls, 'palloc_heap_action_exec', prep_hdr_all)
callers = [
'memblock_header_none_get_size',
'block_get_real_size',
'memblock_from_offset_opt',
]
for caller in callers:
calls = dict_extend(calls, caller, block_size_all)

callers = [
'heap_free_chunk_reuse',
'palloc_heap_action_exec',
]
for caller in callers:
calls = dict_extend(calls, caller, prep_hdr_all)

callers = [
'bucket_attach_run',
'heap_run_into_free_chunk',
'palloc_reservation_create',
'palloc_defer_free_create',
'palloc_defrag',
'recycler_element_new',
]
for caller in callers:
calls = dict_extend(calls, caller, get_lock_all)

callers = [
'container_ravl_insert_block',
'block_invalidate',
'alloc_prep_block',
'palloc_heap_action_on_cancel',
'palloc_heap_action_on_process',
'palloc_first',
'palloc_next',
'palloc_vg_register_alloc',
]
for caller in callers:
calls = dict_extend(calls, caller, get_user_data_all)

callers = [
'bucket_insert_block',
'memblock_header_legacy_get_size',
'memblock_header_compact_get_size',
'memblock_header_legacy_get_extra',
'memblock_header_compact_get_extra',
'memblock_header_legacy_get_flags',
'memblock_header_compact_get_flags',
'memblock_header_legacy_write',
'memblock_header_compact_write',
'memblock_header_legacy_invalidate',
'memblock_header_compact_invalidate',
'memblock_header_legacy_reinit',
'memblock_header_compact_reinit',
'block_get_user_data',
]
for caller in callers:
calls = dict_extend(calls, caller, get_real_data_all)

callers = [
'block_invalidate',
'alloc_prep_block',
'palloc_operation',
'palloc_defrag',
'palloc_usable_size',
'palloc_vg_register_alloc',
]
for caller in callers:
calls = dict_extend(calls, caller, get_user_size_all)

calls = dict_extend(calls, 'alloc_prep_block', write_header_all)

Expand All @@ -213,12 +303,22 @@ def pmemobj_function_pointers(calls: Calls) -> Calls:

calls = dict_extend(calls, 'heap_vg_open', vg_init_all)

calls = dict_extend(calls, 'palloc_defrag', get_extra_all)
calls = dict_extend(calls, 'palloc_extra', get_extra_all)

calls = dict_extend(calls, 'palloc_defrag', get_flags_all)
calls = dict_extend(calls, 'palloc_flags', get_flags_all)

calls = dict_extend(calls, 'bucket_attach_run', iterate_free_all)

calls = dict_extend(calls, 'heap_zone_foreach_object', iterate_used_all)

calls = dict_extend(calls, 'heap_reclaim_zone_garbage', reinit_chunk_all)

calls = dict_extend(calls, 'recycler_element_new', calc_free_all)

calls = dict_extend(calls, 'palloc_defrag', fill_pct_all)

# memblock_header_ops
get_size_all = ['memblock_header_legacy_get_size', 'memblock_header_compact_get_size', 'memblock_header_none_get_size']
get_extra_all = ['memblock_header_legacy_get_extra', 'memblock_header_compact_get_extra', 'memblock_header_none_get_extra']
Expand All @@ -227,6 +327,15 @@ def pmemobj_function_pointers(calls: Calls) -> Calls:
invalidate_all = ['memblock_header_legacy_invalidate', 'memblock_header_compact_invalidate', 'memblock_header_none_invalidate']
reinit_all = ['memblock_header_legacy_reinit', 'memblock_header_compact_reinit', 'memblock_header_none_reinit']

callers = [
'block_get_real_size',
'memblock_from_offset_opt',
]
for caller in callers:
calls = dict_extend(calls, caller, get_size_all)

calls = dict_extend(calls, 'block_get_extra', get_extra_all)
calls = dict_extend(calls, 'block_get_flags', get_flags_all)
calls = dict_extend(calls, 'block_write_header', write_all)
calls = dict_extend(calls, 'block_invalidate', invalidate_all)
calls = dict_extend(calls, 'block_reinit_header', reinit_all)
Expand All @@ -237,6 +346,8 @@ def pmemobj_function_pointers(calls: Calls) -> Calls:
on_process_all = ['palloc_heap_action_on_process', 'palloc_mem_action_noop']
on_unlock_all = ['palloc_heap_action_on_unlock', 'palloc_mem_action_noop']

calls = dict_extend(calls, 'palloc_exec_actions', exec_all)

calls = dict_extend(calls, 'palloc_cancel', on_cancel_all)

calls = dict_extend(calls, 'palloc_exec_actions', on_process_all + on_unlock_all)
Expand Down Expand Up @@ -290,10 +401,14 @@ def pmemobj_function_pointers(calls: Calls) -> Calls:
calls = dict_extend(calls, 'obj_norep_drain', drain_local_all)
calls = dict_extend(calls, 'obj_rep_drain', drain_local_all)

calls = dict_extend(calls, 'obj_norep_memcpy', memcpy_local_all)
calls = dict_extend(calls, 'obj_rep_memcpy', memcpy_local_all)
calls = dict_extend(calls, 'obj_rep_flush', memcpy_local_all)
calls = dict_extend(calls, 'obj_replicas_check_basic', memcpy_local_all)
callers = [
'obj_norep_memcpy',
'obj_rep_memcpy',
'obj_rep_flush',
'obj_replicas_check_basic',
]
for caller in callers:
calls = dict_extend(calls, caller, memcpy_local_all)

calls = dict_extend(calls, 'obj_norep_memmove', memmove_local_all)
calls = dict_extend(calls, 'obj_rep_memmove', memmove_local_all)
Expand All @@ -303,12 +418,25 @@ def pmemobj_function_pointers(calls: Calls) -> Calls:

return calls

def get_callees(calls):
callees = []
for _, v in calls.items():
callees.extend(v)
return list(set(callees))

def main():
extra_calls = inlines({})
extra_calls = pmem_function_pointers(extra_calls)
extra_calls = pmemobj_function_pointers(extra_calls)
with open("extra_calls.json", "w") as outfile:
json.dump(extra_calls, outfile, indent = 4)

# All functions accessed via function pointers have to be provided
# on top of regular API calls for cflow to process their call stacks.
extra_entry_points = get_callees(extra_calls)
extra_entry_points.sort()
with open("extra_entry_points.txt", "w") as outfile:
outfile.write("\n".join(extra_entry_points))

if __name__ == '__main__':
main()
Loading

0 comments on commit d4349f5

Please sign in to comment.