Skip to content

Commit

Permalink
HAFix initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasShiyu committed Jan 14, 2025
0 parents commit 16eac87
Show file tree
Hide file tree
Showing 118 changed files with 264,795 additions and 0 deletions.
30 changes: 30 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
readbymyself.txt

.idea/
analysis/inspect_model_output/
analysis/result_csv/
analysis/result_pictures/
analysis/RQ3
analysis/**/*.png

backup/

dataset/src/test.py

evaluation/**/*.png

model_inference/codellama_13b_instruct_hf_instruct/
model_inference/codellama_34b_instruct_hf
model_inference/codellama_34b_python_hf
model_inference/codellama_70b_instruct_hf
model_inference/codellama_70b_python_hf
model_inference/examples
model_inference/log
model_inference/test
model_inference/codellama_13b*
model_inference/codellama_34b*
model_inference/codellama_70b*

subject_projects/

**/__pycache__/
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
HAFix: History-Augmented Large Language Models for Bug Fixing
164 changes: 164 additions & 0 deletions analysis/draw_figure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
def main():
# Define your sets with case IDs
instruct_baseline = {'3', '5', '6', '7', '8', '9', '23', '24', '26', '27', '28', '36', '37', '38', '42', '48', '49', '54', '66', '67'}
instruct_combine = {'2', '3', '5', '6', '7', '8', '9', '10', '17', '23', '24', '26', '27', '28', '29', '32', '36', '37', '38', '42', '46', '47', '48', '49', '53', '54', '60', '66', '67'}

create_2_set_venn_percentage(instruct_baseline, instruct_combine, 'Baseline', 'HAFix-Agg', 'instruct_1_HAFix_percentage')

instruct_6 = {'2', '3', '5', '6', '7', '8', '9', '10', '17', '23', '24', '26', '27', '29', '32', '36', '38', '42', '48', '49', '54', '66'}
create_2_set_venn_percentage(instruct_baseline, instruct_6, 'baseline', 'FLN-all', 'instruct_6_FLN-all_percentage')























create_2_set_venn(instruct_baseline, instruct_combine, 'baseline', 'aggregation', 'instruct')

instruct_label_baseline = {'3', '5', '6', '7', '8', '9', '23', '24', '26', '27', '28', '32', '36', '37', '41', '42', '44', '49', '54', '67'}
instruct_label_combine = {'3', '6', '7', '8', '9', '10', '23', '24', '26', '27', '28', '32', '36', '37', '41', '42', '44', '46', '49', '53', '54', '60', '67'}
create_2_set_venn(instruct_label_baseline, instruct_label_combine, 'baseline', 'aggregation', 'instruct_label')
create_2_set_venn_percentage(instruct_label_baseline, instruct_label_combine, 'baseline', 'aggregation', 'instruct_label_percentage')


infill_baseline = {'2', '23', '24', '26', '47', '48', '49'}
infill_combine = {'2', '5', '23', '26', '41', '47', '48', '49'}
create_2_set_venn(infill_baseline, infill_combine, 'baseline', 'aggregation', 'infill')
create_2_set_venn_percentage(infill_baseline, infill_combine, 'baseline', 'aggregation', 'infill_percentage')


union_baseline = {'2', '3', '5', '6', '7', '8', '9', '23', '24', '26', '27', '28', '32', '36', '37', '38', '41', '42', '44', '47', '48', '49', '54', '66', '67'}
union_aggregation = {'2', '3', '5', '6', '7', '8', '9', '10', '17', '23', '24', '26', '27', '28', '29', '32', '36', '37', '38', '41', '42', '44', '46', '47', '48', '49', '53', '54', '60', '66', '67'}
create_2_set_venn(union_baseline, union_aggregation, 'baseline', 'aggregation', 'union')
create_2_set_venn_percentage(union_baseline, union_aggregation, 'baseline', 'aggregation', 'union_percentage')



def create_2_set_venn(set1, set2, set1_label, set2_label, picture_name):
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn2_circles

def format_label_text(numbers):
"""Format the list of numbers into a multi-line string with a dynamic number of items per line."""
sorted_numbers = sorted(numbers, key=int)
lines = []
line_length = 1
i = 0

while i < len(sorted_numbers):
line = ", ".join(sorted_numbers[i:i + line_length])
lines.append(line)
i += line_length
line_length += 1 # Gradually increase the number of items per line

return "\n".join(lines)

# Create the Venn diagram
venn = venn2([set1, set2], (set1_label, set2_label))

# Customize the labels inside the circles
venn.get_label_by_id('10').set_text(format_label_text(set1 - set2))
venn.get_label_by_id('01').set_text(format_label_text(set2 - set1))
venn.get_label_by_id('11').set_text(format_label_text(set1 & set2))

# Optionally set the font size for better readability
for subset in ['10', '01', '11']:
venn.get_label_by_id(subset).set_fontsize(8)

plt.savefig(f'{picture_name}.png', bbox_inches='tight')
plt.close()


def create_2_set_venn_percentage(set1, set2, set1_label, set2_label, picture_name):
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn2_circles

def calculate_percentage(subset, total):
"""Calculate the percentage of the subset relative to the total."""
return len(subset) / total * 100

total_elements = len(set1 | set2)

# Calculate percentages for each subset
only_set1 = calculate_percentage(set1 - set2, total_elements)
only_set2 = calculate_percentage(set2 - set1, total_elements)
intersection = calculate_percentage(set1 & set2, total_elements)
# Create the Venn diagram
venn = venn2([set1, set2], (set1_label, set2_label))

# Customize the labels inside the circles with percentages
if len(set1 - set2) != 0:
venn.get_label_by_id('10').set_text(f'{len(set1 - set2)}\n\n{only_set1:.2f}%')
else:
venn.get_label_by_id('10').set_text(f'')
venn.get_label_by_id('01').set_text(f'{len(set2 - set1)}\n\n{only_set2:.2f}%')
venn.get_label_by_id('11').set_text(f'{len(set1 & set2)}\n\n{intersection:.2f}%')


# Optionally set the font size for better readability
for subset in ['10', '01', '11']:
venn.get_label_by_id(subset).set_fontsize(8)

# Change the colors of the circle areas if needed
# venn_patches = venn.patches
# venn_patches[0].set_facecolor('red')
# venn_patches[1].set_facecolor('blue')
# venn_patches[2].set_facecolor('purple') # Overlapping area color if needed

plt.savefig(f'{picture_name}.png', bbox_inches='tight')
plt.close()


def create_3_set_venn():
# import matplotlib.pyplot as plt
# from matplotlib_venn import venn3
#
# # Define your sets with case IDs
# set1 = {'ID1', 'ID2', 'ID3', 'ID4'}
# set2 = {'ID3', 'ID4', 'ID5', 'ID6'}
# set3 = {'ID4', 'ID6', 'ID7', 'ID8'}
#
# # Create the Venn diagram
# venn = venn3([set1, set2, set3], ('Set1', 'Set2', 'Set3'))
#
# # Customize the labels inside the circles
# venn.get_label_by_id('100').set_text('\n'.join(set1 - set2 - set3))
# venn.get_label_by_id('010').set_text('\n'.join(set2 - set1 - set3))
# venn.get_label_by_id('001').set_text('\n'.join(set3 - set1 - set2))
# venn.get_label_by_id('110').set_text('\n'.join(set1 & set2 - set3))
# venn.get_label_by_id('101').set_text('\n'.join(set1 & set3 - set2))
# venn.get_label_by_id('011').set_text('\n'.join(set2 & set3 - set1))
# venn.get_label_by_id('111').set_text('\n'.join(set1 & set2 & set3))
#
# # Optionally set the font size for better readability
# for subset in ['100', '010', '001', '110', '101', '011', '111']:
# venn.get_label_by_id(subset).set_fontsize(8)
#
#
# # Display the plot
# plt.savefig('venn3.png', bbox_inches='tight')
# # Display the plot
# plt.show()
# plt.close()
pass


if __name__ == '__main__':
main()
51 changes: 51 additions & 0 deletions analysis/inference_time_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import re


def get_nucleus_sampling_start_time(file_path):
with open(file_path, 'r') as file:
text = file.read()

# Regular expression to find finish_at timestamps and bug_ids
pattern = re.compile \
(r'Current time (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}): Finish generation.*?finish the bug: (\d+)', re.DOTALL)

# Find all matches in the text
matches = pattern.findall(text)

# Print out the results
for match in matches:
start_at, bug_id = match
print(f"bug_id={bug_id} start_at={start_at}")


def get_nucleus_sampling_end_time(file_path):
with open(file_path, 'r') as file:
text = file.read()

# Regular expression to find finish_at timestamps and bug_ids
pattern = re.compile \
(r'Current time (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}): Finish generation.*?finish the bug: (\d+)', re.DOTALL)

pattern = re.compile \
(r'finish the bug: (\d+).*?Current time (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}): Start generation', re.DOTALL)

# Find all matches in the text
matches = pattern.findall(text)

# Print out the results
for match in matches:
bug_id, finish_at = match
print(f"bug_id={bug_id} finish_at={finish_at}")


def main():
for i in range(8):
file_path = f"/home/22ys22/project/fm-apr-replay/model_inference/log/old_prompt/codellama_7b_instruct_{i+1}.log"
print(f"==========================log {i + 1}==========================")
get_nucleus_sampling_start_time(file_path)
print(f"=============================================================")
get_nucleus_sampling_end_time(file_path)


if __name__ == '__main__':
main()
64 changes: 64 additions & 0 deletions analysis/inspect_model_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os
import json

from util import HistoryCategory

CURRENT_DIR_PATH = os.path.abspath(os.path.dirname(__file__))
PROJECT_DIR_BASE = os.path.abspath(os.path.join(CURRENT_DIR_PATH, '../'))
MODEL_INFERENCE_BASE_PATH = os.path.abspath(os.path.join(PROJECT_DIR_BASE, 'backup', 'model_inference'))
MODEL_EVALUATION_BASE_PATH = os.path.abspath(os.path.join(PROJECT_DIR_BASE, 'backup', 'evaluation'))


def main():
model_name_path = [
'codellama_7b_instruct_hf',
'codellama_7b_instruct_hf_infill',
'codellama_13b_instruct_hf',
'codellama_13b_instruct_hf_infill',
'codellama_34b_instruct_hf'
]
history_settings_ids = [1, 2, 3, 4, 5, 6, 7, 8]

for bug_id in range(1, 69):
bug_id_str = str(bug_id)
_bug_id_inspect_path = f"{CURRENT_DIR_PATH}/inspect_model_output"
os.makedirs(_bug_id_inspect_path, exist_ok=True)
bug_id_inspect_path = f"{_bug_id_inspect_path}/model_output_bug_{bug_id}.txt"
bug_id_inspect_result = open(bug_id_inspect_path, 'a')
bug_id_inspect_result.write(f"####################inspect model-generated code for bug {bug_id}####################\n")
for model_name in model_name_path:
bug_id_inspect_result.write(
f"####################inspect {model_name}-generated code####################\n")
for history_flag in history_settings_ids:
history_flag = str(history_flag)
if model_name == 'codellama_7b_instruct_hf_infill' or model_name == 'codellama_13b_instruct_hf_infill':
inference_path = f"{MODEL_INFERENCE_BASE_PATH}/{model_name}/{model_name.replace('_infill', '')}_{HistoryCategory(history_flag).name}.json"
evaluate_path = f"{MODEL_EVALUATION_BASE_PATH}/{model_name}/unittest_result_{model_name.replace('_infill', '')}_{HistoryCategory(history_flag).name}.json"
else:
inference_path = f"{MODEL_INFERENCE_BASE_PATH}/{model_name}/{model_name}_{HistoryCategory(history_flag).name}.json"
evaluate_path = f"{MODEL_EVALUATION_BASE_PATH}/{model_name}/unittest_result_{model_name}_{HistoryCategory(history_flag).name}.json"

inference_json = json.load(open(inference_path, 'r'))
evaluate_result: dict = json.load(open(evaluate_path, 'r'))

if bug_id_str not in inference_json or bug_id_str not in evaluate_result:
continue
if history_flag == '1':
buggy_code = inference_json[bug_id_str]['input']['buggy_code']
ground_fixed_code = inference_json[bug_id_str]['ground_fixed_code']
bug_id_inspect_result.write(f"#1. buggy code\n")
bug_id_inspect_result.write(f"{buggy_code}\n\n")
bug_id_inspect_result.write(f"#2. ground fixed code\n")
bug_id_inspect_result.write(f"{ground_fixed_code}\n\n")

model_generated_code = inference_json[bug_id_str]['output']['greedy_search']
result = evaluate_result[bug_id_str]['greedy_search_flag']
bug_id_inspect_result.write(f"#setting_{history_flag}: {HistoryCategory(history_flag).name}\n")
bug_id_inspect_result.write(f"{result}\n")
bug_id_inspect_result.write(f"{model_generated_code}\n\n")

bug_id_inspect_result.close()


if __name__ == '__main__':
main()
55 changes: 55 additions & 0 deletions analysis/result_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import json
import os
import csv
from util import HistoryCategory


def main():
model_size = '7b' # 34b 13b 7b
# prompt_style = 'instruct'

k = 5 # 1, 5, 10

save_csv_file = f"/home/22ys22/project/fm-apr-replay/analysis/[email protected]"
source_path = f"/home/22ys22/project/fm-apr-replay/backup/evaluation/codellama_7b_instruct_hf_infill/"
if os.path.exists(save_csv_file):
os.remove(save_csv_file)

# settings = []
# for setting in HistoryCategory:
# settings.append(setting.value)
for file_name in os.listdir(source_path):
if file_name.endswith('.json'):
for setting in HistoryCategory:
if f'unittest_result_{setting.name}.json' == file_name:
name_suf_dict = json.load(open(os.path.join(source_path, file_name), 'r'))
if k == 1:
for bug_id, result in name_suf_dict.items():
flag = 1 if result['greedy_search_flag'] == 'Pass' else 0
save_result(save_csv_file, f"codellama_{model_size}_instruct", setting.value,
setting.name, bug_id, flag)

else:
for bug_id, result in name_suf_dict.items():
# calculate all cases
flag = 1 if any(
result == 'Pass' for result in list(result['nucleus_sampling_flags'])[:k]) else 0
# flag = 1 if result['greedy_search_flag'] == 'Pass' else 0
save_result(save_csv_file, f"codellama_{model_size}_instruct", setting.value, setting.name,
bug_id, flag)


def save_result(save_csv_file, model_id, setting_id, setting_name, bug_id, test_result):
if not os.path.exists(save_csv_file):
with open(save_csv_file, 'w') as f:
csv_write = csv.writer(f)
csv_head = ["model_id", "setting_id", "setting_name", "bug_id", "test_result"]
csv_write.writerow(csv_head)
with open(save_csv_file, 'a+') as f:
csv_write = csv.writer(f)
csv_row = [model_id, setting_id, setting_name, bug_id, test_result]
csv_write.writerow(csv_row)


if __name__ == "__main__":
main()
Loading

0 comments on commit 16eac87

Please sign in to comment.