Add visualization

VCA-EPFL · Nov 22, 2024 · 0791691 · 0791691
1 parent d4a47a8
commit 0791691
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 3 deletions.
diff --git a/run.sh b/run.sh
@@ -2,6 +2,9 @@
 gcc $1.c -o $1
 objdump -D $1 > $1.dump
 
+rm log$1
+touch log$1
 for i in {1..100}; do
-	./$1
+	./$1 >> log$1
 done
+cat log$1 | python stats.py
diff --git a/serializeOff.c b/serializeOff.c
@@ -25,7 +25,7 @@ int __attribute__ ((noinline)) withspecKind1(int* a,volatile int** c){
         __asm__ volatile(
 		"serialize\n"
 //              "lfence\n"
-//              "rdtsc\n"
+//              "rdtscp\n"
 //              "addq %%rax, %%rdi\n"
 //		"subq %%rax, %%rdi\n"
                 "movq (%%rdi), %%rax\n"
@@ -111,7 +111,8 @@ void test_withspecKind1() {
 		// return 0 for total elapsed time if we got interrupted by
 		// system in the middle of our measurement.
 	        total_elapsed += end -start;
-	}
+	} 
+	else { total_elapsed = -1; }
 
     printf(" %llu ", total_elapsed);
    }
@@ -175,6 +176,7 @@ void test_withspecKind2() {
 		// system in the middle of our measurement.
 	        total_elapsed += end -start;
 	}
+	else { total_elapsed = -1; }
 
     printf(" %llu ", total_elapsed);
 }
@@ -237,6 +239,7 @@ void ref(int incache){
 	if (end - start < 1000) {
 	        total_elapsed += end -start;
 	}
+	else { total_elapsed = -1; }
     printf(" %llu ", total_elapsed);
 }
 

diff --git a/stats.py b/stats.py
@@ -0,0 +1,128 @@
+import json
+import sys
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import numpy as np
+
+# Configuration parameters
+MAX_VALUE = 400  # Maximum value threshold
+
+def read_json_lines():
+    """Read JSON lines from stdin and collect values for each field."""
+    data = defaultdict(list)
+    filtered_counts = defaultdict(int)
+
+    for line in sys.stdin:
+        try:
+            json_obj = json.loads(line.strip())
+            # Collect values for each field, applying threshold
+            for key, value in json_obj.items():
+                if value <= MAX_VALUE:
+                    data[key].append(value)
+                else:
+                    filtered_counts[key] += 1
+        except json.JSONDecodeError as e:
+            print(f"Warning: Skipping invalid JSON line: {line.strip()}")
+            print(f"Error: {e}")
+        except Exception as e:
+            print(f"Warning: Unexpected error processing line: {line.strip()}")
+            print(f"Error: {e}")
+    return data, filtered_counts
+
+def create_histograms(data, filtered_counts):
+    """Create histograms for each field in the data using the same bins."""
+    n_fields = len(data)
+    n_rows = (n_fields + 1) // 2
+
+    fig, axes = plt.subplots(n_rows, 2, figsize=(15, 5*n_rows))
+    fig.suptitle(f'Distribution of Values by Field (Values ≤ {MAX_VALUE})', fontsize=16, y=0.95)
+
+    # Flatten axes array for easier iteration
+    axes = axes.flatten() if n_fields > 2 else [axes] if n_fields == 1 else axes
+
+    # Calculate global min and max for consistent bins
+    all_values = []
+    for values in data.values():
+        all_values.extend(values)
+    global_min = min(all_values)
+    global_max = min(MAX_VALUE, max(all_values))
+
+    # Calculate optimal number of bins using Sturge's rule on the total dataset
+    n_bins = int(np.log2(len(all_values)) + 1)
+
+    # Create bins that will be used for all histograms
+    bins = np.linspace(global_min, global_max, n_bins + 1)
+
+    # Create a histogram for each field
+    for idx, (field, values) in enumerate(sorted(data.items())):
+        ax = axes[idx]
+
+        # Create histogram with the common bins
+        counts, bins, _ = ax.hist(values, bins=bins, edgecolor='black', alpha=0.7)
+
+        # Add mean and median lines
+        mean_val = np.mean(values)
+        median_val = np.median(values)
+        ax.axvline(mean_val, color='red', linestyle='dashed', linewidth=2, label=f'Mean: {mean_val:.2f}')
+        ax.axvline(median_val, color='green', linestyle='dashed', linewidth=2, label=f'Median: {median_val:.2f}')
+
+        # Create title with filtered count information
+        filtered_msg = f"\nFiltered out: {filtered_counts[field]} values > {MAX_VALUE}" if filtered_counts[field] > 0 else ""
+        ax.set_title(f'Distribution of {field}\n(n={len(values)}{filtered_msg})')
+
+        ax.set_xlabel('Value')
+        ax.set_ylabel('Frequency')
+        ax.grid(True, alpha=0.3)
+        ax.legend()
+
+        # Add value annotations above each bar
+        for i in range(len(counts)):
+            if counts[i] > 0:  # Only annotate non-empty bars
+                ax.text(bins[i], counts[i], str(int(counts[i])), 
+                       horizontalalignment='center', verticalalignment='bottom')
+
+        # Set the same x-axis limits for all plots
+        ax.set_xlim(global_min - (global_max - global_min)*0.05, 
+                   global_max + (global_max - global_min)*0.05)
+
+    # Hide empty subplots if any
+    for idx in range(len(data), len(axes)):
+        axes[idx].set_visible(False)
+
+    # Adjust layout to prevent overlap
+    plt.tight_layout()
+    return fig
+
+def print_statistics(data, filtered_counts):
+    """Print basic statistics for each field."""
+    print("\nBasic Statistics:")
+    print("-" * 80)
+    print(f"{'Field':<15} {'Count':>8} {'Filtered':>8} {'Mean':>10} {'Median':>10} {'Min':>8} {'Max':>8}")
+    print("-" * 80)
+
+    for field, values in sorted(data.items()):
+        print(f"{field:<15} {len(values):8d} {filtered_counts[field]:8d} {np.mean(values):10.2f} "
+              f"{np.median(values):10.2f} {min(values):8d} {max(values):8d}")
+
+def main():
+    print(f"Reading JSON data from stdin... (filtering values > {MAX_VALUE})")
+    data, filtered_counts = read_json_lines()
+
+    if not data:
+        print("No valid JSON data received!")
+        sys.exit(1)
+
+    # Print statistics
+    print_statistics(data, filtered_counts)
+
+    # Create and save histograms
+    fig = create_histograms(data, filtered_counts)
+    output_file = 'distributions.png'
+    fig.savefig(output_file, dpi=300, bbox_inches='tight')
+    print(f"\nHistograms saved to: {output_file}")
+
+    # Close the figure to free memory
+    plt.close(fig)
+
+if __name__ == "__main__":
+    main()