Skip to content

Commit

Permalink
Add visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
threonorm committed Nov 22, 2024
1 parent d4a47a8 commit 0791691
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 3 deletions.
5 changes: 4 additions & 1 deletion run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
gcc $1.c -o $1
objdump -D $1 > $1.dump

rm log$1
touch log$1
for i in {1..100}; do
./$1
./$1 >> log$1
done
cat log$1 | python stats.py
7 changes: 5 additions & 2 deletions serializeOff.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ int __attribute__ ((noinline)) withspecKind1(int* a,volatile int** c){
__asm__ volatile(
"serialize\n"
// "lfence\n"
// "rdtsc\n"
// "rdtscp\n"
// "addq %%rax, %%rdi\n"
// "subq %%rax, %%rdi\n"
"movq (%%rdi), %%rax\n"
Expand Down Expand Up @@ -111,7 +111,8 @@ void test_withspecKind1() {
// return 0 for total elapsed time if we got interrupted by
// system in the middle of our measurement.
total_elapsed += end -start;
}
}
else { total_elapsed = -1; }

printf(" %llu ", total_elapsed);
}
Expand Down Expand Up @@ -175,6 +176,7 @@ void test_withspecKind2() {
// system in the middle of our measurement.
total_elapsed += end -start;
}
else { total_elapsed = -1; }

printf(" %llu ", total_elapsed);
}
Expand Down Expand Up @@ -237,6 +239,7 @@ void ref(int incache){
if (end - start < 1000) {
total_elapsed += end -start;
}
else { total_elapsed = -1; }
printf(" %llu ", total_elapsed);
}

Expand Down
128 changes: 128 additions & 0 deletions stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import json
import sys
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np

# Configuration parameters
MAX_VALUE = 400 # Maximum value threshold

def read_json_lines():
"""Read JSON lines from stdin and collect values for each field."""
data = defaultdict(list)
filtered_counts = defaultdict(int)

for line in sys.stdin:
try:
json_obj = json.loads(line.strip())
# Collect values for each field, applying threshold
for key, value in json_obj.items():
if value <= MAX_VALUE:
data[key].append(value)
else:
filtered_counts[key] += 1
except json.JSONDecodeError as e:
print(f"Warning: Skipping invalid JSON line: {line.strip()}")
print(f"Error: {e}")
except Exception as e:
print(f"Warning: Unexpected error processing line: {line.strip()}")
print(f"Error: {e}")
return data, filtered_counts

def create_histograms(data, filtered_counts):
"""Create histograms for each field in the data using the same bins."""
n_fields = len(data)
n_rows = (n_fields + 1) // 2

fig, axes = plt.subplots(n_rows, 2, figsize=(15, 5*n_rows))
fig.suptitle(f'Distribution of Values by Field (Values ≤ {MAX_VALUE})', fontsize=16, y=0.95)

# Flatten axes array for easier iteration
axes = axes.flatten() if n_fields > 2 else [axes] if n_fields == 1 else axes

# Calculate global min and max for consistent bins
all_values = []
for values in data.values():
all_values.extend(values)
global_min = min(all_values)
global_max = min(MAX_VALUE, max(all_values))

# Calculate optimal number of bins using Sturge's rule on the total dataset
n_bins = int(np.log2(len(all_values)) + 1)

# Create bins that will be used for all histograms
bins = np.linspace(global_min, global_max, n_bins + 1)

# Create a histogram for each field
for idx, (field, values) in enumerate(sorted(data.items())):
ax = axes[idx]

# Create histogram with the common bins
counts, bins, _ = ax.hist(values, bins=bins, edgecolor='black', alpha=0.7)

# Add mean and median lines
mean_val = np.mean(values)
median_val = np.median(values)
ax.axvline(mean_val, color='red', linestyle='dashed', linewidth=2, label=f'Mean: {mean_val:.2f}')
ax.axvline(median_val, color='green', linestyle='dashed', linewidth=2, label=f'Median: {median_val:.2f}')

# Create title with filtered count information
filtered_msg = f"\nFiltered out: {filtered_counts[field]} values > {MAX_VALUE}" if filtered_counts[field] > 0 else ""
ax.set_title(f'Distribution of {field}\n(n={len(values)}{filtered_msg})')

ax.set_xlabel('Value')
ax.set_ylabel('Frequency')
ax.grid(True, alpha=0.3)
ax.legend()

# Add value annotations above each bar
for i in range(len(counts)):
if counts[i] > 0: # Only annotate non-empty bars
ax.text(bins[i], counts[i], str(int(counts[i])),
horizontalalignment='center', verticalalignment='bottom')

# Set the same x-axis limits for all plots
ax.set_xlim(global_min - (global_max - global_min)*0.05,
global_max + (global_max - global_min)*0.05)

# Hide empty subplots if any
for idx in range(len(data), len(axes)):
axes[idx].set_visible(False)

# Adjust layout to prevent overlap
plt.tight_layout()
return fig

def print_statistics(data, filtered_counts):
"""Print basic statistics for each field."""
print("\nBasic Statistics:")
print("-" * 80)
print(f"{'Field':<15} {'Count':>8} {'Filtered':>8} {'Mean':>10} {'Median':>10} {'Min':>8} {'Max':>8}")
print("-" * 80)

for field, values in sorted(data.items()):
print(f"{field:<15} {len(values):8d} {filtered_counts[field]:8d} {np.mean(values):10.2f} "
f"{np.median(values):10.2f} {min(values):8d} {max(values):8d}")

def main():
print(f"Reading JSON data from stdin... (filtering values > {MAX_VALUE})")
data, filtered_counts = read_json_lines()

if not data:
print("No valid JSON data received!")
sys.exit(1)

# Print statistics
print_statistics(data, filtered_counts)

# Create and save histograms
fig = create_histograms(data, filtered_counts)
output_file = 'distributions.png'
fig.savefig(output_file, dpi=300, bbox_inches='tight')
print(f"\nHistograms saved to: {output_file}")

# Close the figure to free memory
plt.close(fig)

if __name__ == "__main__":
main()

0 comments on commit 0791691

Please sign in to comment.