diff --git a/run.sh b/run.sh index adc6031..6ad411f 100755 --- a/run.sh +++ b/run.sh @@ -2,6 +2,9 @@ gcc $1.c -o $1 objdump -D $1 > $1.dump +rm log$1 +touch log$1 for i in {1..100}; do - ./$1 + ./$1 >> log$1 done +cat log$1 | python stats.py diff --git a/serializeOff.c b/serializeOff.c index 83c6443..3038fbd 100644 --- a/serializeOff.c +++ b/serializeOff.c @@ -25,7 +25,7 @@ int __attribute__ ((noinline)) withspecKind1(int* a,volatile int** c){ __asm__ volatile( "serialize\n" // "lfence\n" -// "rdtsc\n" +// "rdtscp\n" // "addq %%rax, %%rdi\n" // "subq %%rax, %%rdi\n" "movq (%%rdi), %%rax\n" @@ -111,7 +111,8 @@ void test_withspecKind1() { // return 0 for total elapsed time if we got interrupted by // system in the middle of our measurement. total_elapsed += end -start; - } + } + else { total_elapsed = -1; } printf(" %llu ", total_elapsed); } @@ -175,6 +176,7 @@ void test_withspecKind2() { // system in the middle of our measurement. total_elapsed += end -start; } + else { total_elapsed = -1; } printf(" %llu ", total_elapsed); } @@ -237,6 +239,7 @@ void ref(int incache){ if (end - start < 1000) { total_elapsed += end -start; } + else { total_elapsed = -1; } printf(" %llu ", total_elapsed); } diff --git a/stats.py b/stats.py new file mode 100644 index 0000000..43fdb9c --- /dev/null +++ b/stats.py @@ -0,0 +1,128 @@ +import json +import sys +from collections import defaultdict +import matplotlib.pyplot as plt +import numpy as np + +# Configuration parameters +MAX_VALUE = 400 # Maximum value threshold + +def read_json_lines(): + """Read JSON lines from stdin and collect values for each field.""" + data = defaultdict(list) + filtered_counts = defaultdict(int) + + for line in sys.stdin: + try: + json_obj = json.loads(line.strip()) + # Collect values for each field, applying threshold + for key, value in json_obj.items(): + if value <= MAX_VALUE: + data[key].append(value) + else: + filtered_counts[key] += 1 + except json.JSONDecodeError as e: + print(f"Warning: Skipping invalid JSON line: {line.strip()}") + print(f"Error: {e}") + except Exception as e: + print(f"Warning: Unexpected error processing line: {line.strip()}") + print(f"Error: {e}") + return data, filtered_counts + +def create_histograms(data, filtered_counts): + """Create histograms for each field in the data using the same bins.""" + n_fields = len(data) + n_rows = (n_fields + 1) // 2 + + fig, axes = plt.subplots(n_rows, 2, figsize=(15, 5*n_rows)) + fig.suptitle(f'Distribution of Values by Field (Values ≤ {MAX_VALUE})', fontsize=16, y=0.95) + + # Flatten axes array for easier iteration + axes = axes.flatten() if n_fields > 2 else [axes] if n_fields == 1 else axes + + # Calculate global min and max for consistent bins + all_values = [] + for values in data.values(): + all_values.extend(values) + global_min = min(all_values) + global_max = min(MAX_VALUE, max(all_values)) + + # Calculate optimal number of bins using Sturge's rule on the total dataset + n_bins = int(np.log2(len(all_values)) + 1) + + # Create bins that will be used for all histograms + bins = np.linspace(global_min, global_max, n_bins + 1) + + # Create a histogram for each field + for idx, (field, values) in enumerate(sorted(data.items())): + ax = axes[idx] + + # Create histogram with the common bins + counts, bins, _ = ax.hist(values, bins=bins, edgecolor='black', alpha=0.7) + + # Add mean and median lines + mean_val = np.mean(values) + median_val = np.median(values) + ax.axvline(mean_val, color='red', linestyle='dashed', linewidth=2, label=f'Mean: {mean_val:.2f}') + ax.axvline(median_val, color='green', linestyle='dashed', linewidth=2, label=f'Median: {median_val:.2f}') + + # Create title with filtered count information + filtered_msg = f"\nFiltered out: {filtered_counts[field]} values > {MAX_VALUE}" if filtered_counts[field] > 0 else "" + ax.set_title(f'Distribution of {field}\n(n={len(values)}{filtered_msg})') + + ax.set_xlabel('Value') + ax.set_ylabel('Frequency') + ax.grid(True, alpha=0.3) + ax.legend() + + # Add value annotations above each bar + for i in range(len(counts)): + if counts[i] > 0: # Only annotate non-empty bars + ax.text(bins[i], counts[i], str(int(counts[i])), + horizontalalignment='center', verticalalignment='bottom') + + # Set the same x-axis limits for all plots + ax.set_xlim(global_min - (global_max - global_min)*0.05, + global_max + (global_max - global_min)*0.05) + + # Hide empty subplots if any + for idx in range(len(data), len(axes)): + axes[idx].set_visible(False) + + # Adjust layout to prevent overlap + plt.tight_layout() + return fig + +def print_statistics(data, filtered_counts): + """Print basic statistics for each field.""" + print("\nBasic Statistics:") + print("-" * 80) + print(f"{'Field':<15} {'Count':>8} {'Filtered':>8} {'Mean':>10} {'Median':>10} {'Min':>8} {'Max':>8}") + print("-" * 80) + + for field, values in sorted(data.items()): + print(f"{field:<15} {len(values):8d} {filtered_counts[field]:8d} {np.mean(values):10.2f} " + f"{np.median(values):10.2f} {min(values):8d} {max(values):8d}") + +def main(): + print(f"Reading JSON data from stdin... (filtering values > {MAX_VALUE})") + data, filtered_counts = read_json_lines() + + if not data: + print("No valid JSON data received!") + sys.exit(1) + + # Print statistics + print_statistics(data, filtered_counts) + + # Create and save histograms + fig = create_histograms(data, filtered_counts) + output_file = 'distributions.png' + fig.savefig(output_file, dpi=300, bbox_inches='tight') + print(f"\nHistograms saved to: {output_file}") + + # Close the figure to free memory + plt.close(fig) + +if __name__ == "__main__": + main()