From 0ed16aa126d7f3125a46a19a2a7b85fd76a7c27c Mon Sep 17 00:00:00 2001 From: JonatanEnes Date: Fri, 6 Mar 2020 11:19:26 +0100 Subject: [PATCH] Files from original subproject adapted to act as a standalone project --- .gitignore | 4 + README.md | 1 + __init__.py | 0 conf/report_generator_config.ini | 30 ++ .../metagenomics/report_generator_config.ini | 30 ++ .../energy/metagenomics/timeseries_config.ini | 3 + .../metagenomics/timestamping_config.ini | 7 + .../report_generator_config.ini | 31 ++ .../microbenchmarks/timeseries_config.ini | 3 + .../microbenchmarks/timestamping_config.ini | 7 + .../streaming/report_generator_config.ini | 31 ++ .../energy/streaming/timeseries_config.ini | 3 + .../energy/streaming/timestamping_config.ini | 7 + .../report_generator_config_example.ini | 21 + .../report_generator_config_hadoop.ini | 32 ++ .../report_generator_config_spark.ini | 32 ++ .../report_generator_config_streaming.ini | 32 ++ .../concurrent/report_generator_config.ini | 21 + .../concurrent/timestamping_config.ini | 7 + .../hybrid/report_generator_config.ini | 18 + .../hybrid/timestamping_config.ini | 7 + .../report_generator_config.ini | 18 + .../microbenchmarks/timestamping_config.ini | 7 + .../streaming/report_generator_config.ini | 18 + .../streaming/timestamping_config.ini | 7 + conf/timeseries_config.ini | 3 + conf/timestamping_config.ini | 7 + requirements.txt | 6 + scripts/convert_to_png.sh | 42 ++ scripts/energy/generate_metagenomics.sh | 70 +++ scripts/energy/generate_microbenchmarks.sh | 56 +++ scripts/energy/generate_streaming.sh | 19 + scripts/generate_example.sh | 3 + scripts/generate_report.sh | 19 + scripts/installation/install-dependencies.sh | 2 + .../serverless/generate_microbenchmarks.sh | 3 + scripts/serverless/generate_serverless.sh | 26 ++ scripts/serverless/generate_streaming.sh | 3 + set_pythonpath.sh | 4 + src/__init__.py | 0 src/opentsdb/__init__.py | 0 src/opentsdb/bdwatchdog.py | 161 +++++++ src/plotting/__init__.py | 0 src/plotting/barplots.py | 364 ++++++++++++++++ src/plotting/timeseries_plots.py | 321 ++++++++++++++ src/plotting/utils.py | 279 ++++++++++++ src/report_generator.py | 64 +++ src/reporting/ExperimentReporter.py | 166 +++++++ src/reporting/TestReporter.py | 403 +++++++++++++++++ src/reporting/__init__.py | 0 src/reporting/config.py | 406 ++++++++++++++++++ src/reporting/latex_output.py | 49 +++ src/reporting/utils.py | 335 +++++++++++++++ templates/simple_report.template | 360 ++++++++++++++++ 54 files changed, 3548 insertions(+) create mode 100644 .gitignore create mode 100755 README.md create mode 100755 __init__.py create mode 100755 conf/report_generator_config.ini create mode 100755 conf/templates/energy/metagenomics/report_generator_config.ini create mode 100755 conf/templates/energy/metagenomics/timeseries_config.ini create mode 100755 conf/templates/energy/metagenomics/timestamping_config.ini create mode 100755 conf/templates/energy/microbenchmarks/report_generator_config.ini create mode 100755 conf/templates/energy/microbenchmarks/timeseries_config.ini create mode 100755 conf/templates/energy/microbenchmarks/timestamping_config.ini create mode 100755 conf/templates/energy/streaming/report_generator_config.ini create mode 100755 conf/templates/energy/streaming/timeseries_config.ini create mode 100755 conf/templates/energy/streaming/timestamping_config.ini create mode 100755 conf/templates/serverless/report_generator_config_example.ini create mode 100755 conf/templates/serverless/report_generator_config_hadoop.ini create mode 100755 conf/templates/serverless/report_generator_config_spark.ini create mode 100755 conf/templates/serverless/report_generator_config_streaming.ini create mode 100755 conf/templates/serverless_revision/concurrent/report_generator_config.ini create mode 100755 conf/templates/serverless_revision/concurrent/timestamping_config.ini create mode 100755 conf/templates/serverless_revision/hybrid/report_generator_config.ini create mode 100755 conf/templates/serverless_revision/hybrid/timestamping_config.ini create mode 100755 conf/templates/serverless_revision/microbenchmarks/report_generator_config.ini create mode 100755 conf/templates/serverless_revision/microbenchmarks/timestamping_config.ini create mode 100755 conf/templates/serverless_revision/streaming/report_generator_config.ini create mode 100755 conf/templates/serverless_revision/streaming/timestamping_config.ini create mode 100755 conf/timeseries_config.ini create mode 100755 conf/timestamping_config.ini create mode 100755 requirements.txt create mode 100755 scripts/convert_to_png.sh create mode 100755 scripts/energy/generate_metagenomics.sh create mode 100755 scripts/energy/generate_microbenchmarks.sh create mode 100755 scripts/energy/generate_streaming.sh create mode 100755 scripts/generate_example.sh create mode 100755 scripts/generate_report.sh create mode 100644 scripts/installation/install-dependencies.sh create mode 100755 scripts/serverless/generate_microbenchmarks.sh create mode 100755 scripts/serverless/generate_serverless.sh create mode 100755 scripts/serverless/generate_streaming.sh create mode 100755 set_pythonpath.sh create mode 100755 src/__init__.py create mode 100755 src/opentsdb/__init__.py create mode 100755 src/opentsdb/bdwatchdog.py create mode 100755 src/plotting/__init__.py create mode 100755 src/plotting/barplots.py create mode 100755 src/plotting/timeseries_plots.py create mode 100755 src/plotting/utils.py create mode 100755 src/report_generator.py create mode 100755 src/reporting/ExperimentReporter.py create mode 100755 src/reporting/TestReporter.py create mode 100755 src/reporting/__init__.py create mode 100755 src/reporting/config.py create mode 100755 src/reporting/latex_output.py create mode 100755 src/reporting/utils.py create mode 100755 templates/simple_report.template diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0f3985d --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/pandoc_reports/* +.pyc +**/__pycache__/** +.idea/ diff --git a/README.md b/README.md new file mode 100755 index 0000000..8b96a6f --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Report Generator diff --git a/__init__.py b/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/conf/report_generator_config.ini b/conf/report_generator_config.ini new file mode 100755 index 0000000..25cc0d1 --- /dev/null +++ b/conf/report_generator_config.ini @@ -0,0 +1,30 @@ +[DEFAULT] +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = true + +GENERATE_EXPERIMENT_PLOT = true +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = false + +NODES_LIST = "aux0,aux1,pre0,pre1,pre2,pre3,comp0,comp1,comp2,comp3,comp4,comp5,comp6,comp7,comp8,comp9" +APPS_LIST = "aux_user0,pre_user0,comp_user0" + +#STATIC_LIMITS = false +STATIC_LIMITS = true +XLIM =aux_user0:3300,pre_user0:3300,comp_user0:1700 +#2500 +YLIM = "cpu:default:15000,energy:default:600,cpu:aux_user0:1800,energy:aux_user0:200,cpu:pre_user0:3500,energy:pre_user0:300,cpu:comp_user0:9000,energy:comp_user0:600" +XTICKS_STEP = 400 + +REPORTED_RESOURCES = "cpu,energy" +EXPERIMENT_TYPE = "energy" + +#PLOTTING_FORMATS= png +PLOTTING_FORMATS = svg +#PLOTTING_FORMATS = "png,svg" + +PRINT_ENERGY_MAX=true + +DOWNSAMPLE = 20 diff --git a/conf/templates/energy/metagenomics/report_generator_config.ini b/conf/templates/energy/metagenomics/report_generator_config.ini new file mode 100755 index 0000000..2fd9249 --- /dev/null +++ b/conf/templates/energy/metagenomics/report_generator_config.ini @@ -0,0 +1,30 @@ +[DEFAULT] +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = true + +GENERATE_EXPERIMENT_PLOT = true +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = false + +NODES_LIST = "aux0,aux1,pre0,pre1,pre2,pre3,comp0,comp1,comp2,comp3,comp4,comp5,comp6,comp7,comp8,comp9" +APPS_LIST = "aux_user0,pre_user0,comp_user0" + +#STATIC_LIMITS = false +STATIC_LIMITS = true +__XLIM__ ={XLIM} +#2500 +YLIM = "cpu:default:15000,energy:default:600,cpu:aux_user0:1800,energy:aux_user0:200,cpu:pre_user0:3500,energy:pre_user0:300,cpu:comp_user0:9000,energy:comp_user0:600" +XTICKS_STEP = 400 + +REPORTED_RESOURCES = "cpu,energy" +EXPERIMENT_TYPE = "energy" + +#PLOTTING_FORMATS= png +PLOTTING_FORMATS = svg +#PLOTTING_FORMATS = "png,svg" + +__PRINT_ENERGY_MAX__={PRINT_ENERGY_MAX} + +DOWNSAMPLE = 20 diff --git a/conf/templates/energy/metagenomics/timeseries_config.ini b/conf/templates/energy/metagenomics/timeseries_config.ini new file mode 100755 index 0000000..c7bcdb4 --- /dev/null +++ b/conf/templates/energy/metagenomics/timeseries_config.ini @@ -0,0 +1,3 @@ +[DEFAULT] +OPENTSDB_IP = 192.168.50.61 +OPENTSDB_PORT = 4242 \ No newline at end of file diff --git a/conf/templates/energy/metagenomics/timestamping_config.ini b/conf/templates/energy/metagenomics/timestamping_config.ini new file mode 100755 index 0000000..bc99da2 --- /dev/null +++ b/conf/templates/energy/metagenomics/timestamping_config.ini @@ -0,0 +1,7 @@ +[DEFAULT] +MONGODB_IP = 192.168.50.61 +MONGODB_PORT = 8000 +MONGODB_USER = root +TESTS_DATABASE_NAME = tests +EXPERIMENTS_DATABASE_NAME = experiments +MAX_CONNECTION_TRIES = 10 \ No newline at end of file diff --git a/conf/templates/energy/microbenchmarks/report_generator_config.ini b/conf/templates/energy/microbenchmarks/report_generator_config.ini new file mode 100755 index 0000000..05b832d --- /dev/null +++ b/conf/templates/energy/microbenchmarks/report_generator_config.ini @@ -0,0 +1,31 @@ +[DEFAULT] +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = true + +GENERATE_EXPERIMENT_PLOT = false +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = false + +NODES_LIST = "node0,node1,node2,node3,node4,node5,node6,node7,node8,node9,node10,node11,node12,node13,node14,node15,node16,node17,node18,node19,node20,node21,node22,node23,node24,node25,node26,node27,node28,node29,node30,node31" +APPS_LIST = "app1" + +#STATIC_LIMITS = false +STATIC_LIMITS = true +__XLIM__ ={XLIM} +#2500 +YLIM = "cpu:default:25000,energy:default:2000" +__XTICKS_STEP__ = {XTICKS_STEP} + +REPORTED_RESOURCES = "cpu,energy" +EXPERIMENT_TYPE = "energy" + +#PLOTTING_FORMATS= png +PLOTTING_FORMATS = svg +#PLOTTING_FORMATS = "png,svg" + +__PRINT_ENERGY_MAX__={PRINT_ENERGY_MAX} + +#DOWNSAMPLE = 10 +__DOWNSAMPLE__ = {DOWNSAMPLE} \ No newline at end of file diff --git a/conf/templates/energy/microbenchmarks/timeseries_config.ini b/conf/templates/energy/microbenchmarks/timeseries_config.ini new file mode 100755 index 0000000..c7bcdb4 --- /dev/null +++ b/conf/templates/energy/microbenchmarks/timeseries_config.ini @@ -0,0 +1,3 @@ +[DEFAULT] +OPENTSDB_IP = 192.168.50.61 +OPENTSDB_PORT = 4242 \ No newline at end of file diff --git a/conf/templates/energy/microbenchmarks/timestamping_config.ini b/conf/templates/energy/microbenchmarks/timestamping_config.ini new file mode 100755 index 0000000..bc99da2 --- /dev/null +++ b/conf/templates/energy/microbenchmarks/timestamping_config.ini @@ -0,0 +1,7 @@ +[DEFAULT] +MONGODB_IP = 192.168.50.61 +MONGODB_PORT = 8000 +MONGODB_USER = root +TESTS_DATABASE_NAME = tests +EXPERIMENTS_DATABASE_NAME = experiments +MAX_CONNECTION_TRIES = 10 \ No newline at end of file diff --git a/conf/templates/energy/streaming/report_generator_config.ini b/conf/templates/energy/streaming/report_generator_config.ini new file mode 100755 index 0000000..e4432a2 --- /dev/null +++ b/conf/templates/energy/streaming/report_generator_config.ini @@ -0,0 +1,31 @@ +[DEFAULT] +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = true + +GENERATE_EXPERIMENT_PLOT = true +GENERATE_USER_PLOTS = true +GENERATE_APP_PLOTS = false +GENERATE_NODES_PLOTS = false + + +NODES_LIST = "master0,master1,hibench0,hibench0,kafka0,kafka1,kafka2,kafka3,slave0,slave1,slave2,slave3,slave4,slave5,slave6,slave7,slave8,slave9" +APPS_LIST = "hibenches_user0,kafkas_user0,spark_user0,hibenches_user1,kafkas_user1,spark_user1" +USERS_LIST = "user0,user1" + +#STATIC_LIMITS = false +STATIC_LIMITS = true +__XLIM__ ={XLIM} +__YLIM__ = {YLIM} +__XTICKS_STEP__ = {XTICKS_STEP} +REPORTED_RESOURCES = cpu,energy +EXPERIMENT_TYPE = energy + +#PLOTTING_FORMATS= png +PLOTTING_FORMATS = svg +#PLOTTING_FORMATS = "png,svg" + +PRINT_ENERGY_MAX = true + +DOWNSAMPLE = 20 diff --git a/conf/templates/energy/streaming/timeseries_config.ini b/conf/templates/energy/streaming/timeseries_config.ini new file mode 100755 index 0000000..c7bcdb4 --- /dev/null +++ b/conf/templates/energy/streaming/timeseries_config.ini @@ -0,0 +1,3 @@ +[DEFAULT] +OPENTSDB_IP = 192.168.50.61 +OPENTSDB_PORT = 4242 \ No newline at end of file diff --git a/conf/templates/energy/streaming/timestamping_config.ini b/conf/templates/energy/streaming/timestamping_config.ini new file mode 100755 index 0000000..8d18e0a --- /dev/null +++ b/conf/templates/energy/streaming/timestamping_config.ini @@ -0,0 +1,7 @@ +[DEFAULT] +MONGODB_IP = 192.168.50.61 +MONGODB_PORT = 8000 +MONGODB_USER = root +TESTS_DATABASE_NAME = tests +EXPERIMENTS_DATABASE_NAME = experiments +MAX_CONNECTION_TRIES = 10 diff --git a/conf/templates/serverless/report_generator_config_example.ini b/conf/templates/serverless/report_generator_config_example.ini new file mode 100755 index 0000000..9f16922 --- /dev/null +++ b/conf/templates/serverless/report_generator_config_example.ini @@ -0,0 +1,21 @@ +[DEFAULT] +MONGODB_IP = mongodb +MONGODB_PORT = 8000 +TESTS_DATABASE_NAME = tests +EXPERIMENTS_DATABASE_NAME = experiments +MAX_CONNECTION_TRIES = 10 +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = false + +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = true + +ADD_APP_PLOTS_TO_REPORT = false +ADD_NODES_PLOTS_TO_REPORT = false +NODES_LIST = "node1,node2,node3,node4,node5,node6,node7,node8,node9,node10,node11" + + +NUM_BASE_EXPERIMENTS = 2 +TEST_TYPE_STEPPING = 2 diff --git a/conf/templates/serverless/report_generator_config_hadoop.ini b/conf/templates/serverless/report_generator_config_hadoop.ini new file mode 100755 index 0000000..a5ee612 --- /dev/null +++ b/conf/templates/serverless/report_generator_config_hadoop.ini @@ -0,0 +1,32 @@ +[DEFAULT] +MONGODB_IP = mongodb +MONGODB_PORT = 8000 +TESTS_DATABASE_NAME = tests +EXPERIMENTS_DATABASE_NAME = experiments +MAX_CONNECTION_TRIES = 10 +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = false + +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = true + +ADD_APP_PLOTS_TO_REPORT = false +ADD_NODES_PLOTS_TO_REPORT = false +NODES_LIST = "node1,node2,node3,node4,node5,node6,node7,node8,node9,node10,node11" + +#NUM_BASE_EXPERIMENTS = 2 +#TEST_TYPE_STEPPING = 2 + +# HADOOP +NUM_BASE_EXPERIMENTS = 7 +TEST_TYPE_STEPPING = 11 + +# SPARK +#NUM_BASE_EXPERIMENTS = 19 +#TEST_TYPE_STEPPING = 19 + +# STREAMING +#NUM_BASE_EXPERIMENTS = 4 +#TEST_TYPE_STEPPING = 4 diff --git a/conf/templates/serverless/report_generator_config_spark.ini b/conf/templates/serverless/report_generator_config_spark.ini new file mode 100755 index 0000000..cf73110 --- /dev/null +++ b/conf/templates/serverless/report_generator_config_spark.ini @@ -0,0 +1,32 @@ +[DEFAULT] +MONGODB_IP = mongodb +MONGODB_PORT = 8000 +TESTS_DATABASE_NAME = tests +EXPERIMENTS_DATABASE_NAME = experiments +MAX_CONNECTION_TRIES = 10 +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = false + +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = true + +ADD_APP_PLOTS_TO_REPORT = false +ADD_NODES_PLOTS_TO_REPORT = false +NODES_LIST = "node1,node2,node3,node4,node5,node6,node7,node8,node9,node10,node11" + +#NUM_BASE_EXPERIMENTS = 2 +#TEST_TYPE_STEPPING = 2 + +# HADOOP +#NUM_BASE_EXPERIMENTS = 8 +#TEST_TYPE_STEPPING = 13 + +# SPARK +NUM_BASE_EXPERIMENTS = 19 +TEST_TYPE_STEPPING = 19 + +# STREAMING +#NUM_BASE_EXPERIMENTS = 4 +#TEST_TYPE_STEPPING = 4 diff --git a/conf/templates/serverless/report_generator_config_streaming.ini b/conf/templates/serverless/report_generator_config_streaming.ini new file mode 100755 index 0000000..1a72b7e --- /dev/null +++ b/conf/templates/serverless/report_generator_config_streaming.ini @@ -0,0 +1,32 @@ +[DEFAULT] +MONGODB_IP = mongodb +MONGODB_PORT = 8000 +TESTS_DATABASE_NAME = tests +EXPERIMENTS_DATABASE_NAME = experiments +MAX_CONNECTION_TRIES = 10 +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = false + +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = true + +ADD_APP_PLOTS_TO_REPORT = false +ADD_NODES_PLOTS_TO_REPORT = false +NODES_LIST = "node1,node2,node3,node4,node5,node6,node7,node8,node9" + +#NUM_BASE_EXPERIMENTS = 2 +#TEST_TYPE_STEPPING = 2 + +# HADOOP +#NUM_BASE_EXPERIMENTS = 8 +#TEST_TYPE_STEPPING = 13 + +# SPARK +#NUM_BASE_EXPERIMENTS = 19 +#TEST_TYPE_STEPPING = 19 + +# STREAMING +NUM_BASE_EXPERIMENTS = 4 +TEST_TYPE_STEPPING = 4 diff --git a/conf/templates/serverless_revision/concurrent/report_generator_config.ini b/conf/templates/serverless_revision/concurrent/report_generator_config.ini new file mode 100755 index 0000000..792c3d6 --- /dev/null +++ b/conf/templates/serverless_revision/concurrent/report_generator_config.ini @@ -0,0 +1,21 @@ +[DEFAULT] +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = true + +GENERATE_EXPERIMENT_PLOT = true +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = false + +NODES_LIST = "node0,node1,node2,node3,node4,node5,node6,node7,node8,node9,node10,node11,node12,node13,node14,node15,node16,node17,node18,node19,node20,node21,node22,node23,node24,node25,node26,node27,node28,node29,node30,node31" +APPS_LIST = "app1" + +STATIC_LIMITS = true +XLIM = "app1:1900" +YLIM = "cpu:default:20000" +XTICKS_STEP = 200 +REPORTED_RESOURCES = "cpu" +EXPERIMENT_TYPE = "serverless" +PLOTTING_FORMATS="png,svg" + diff --git a/conf/templates/serverless_revision/concurrent/timestamping_config.ini b/conf/templates/serverless_revision/concurrent/timestamping_config.ini new file mode 100755 index 0000000..e0157af --- /dev/null +++ b/conf/templates/serverless_revision/concurrent/timestamping_config.ini @@ -0,0 +1,7 @@ +[DEFAULT] +MONGODB_IP = times-hybrid +MONGODB_PORT = 8000 +MONGODB_USER = jonatan +TESTS_POST_ENDPOINT = tests +EXPERIMENTS_POST_ENDPOINT = experiments +MAX_CONNECTION_TRIES = 10 \ No newline at end of file diff --git a/conf/templates/serverless_revision/hybrid/report_generator_config.ini b/conf/templates/serverless_revision/hybrid/report_generator_config.ini new file mode 100755 index 0000000..9e18de5 --- /dev/null +++ b/conf/templates/serverless_revision/hybrid/report_generator_config.ini @@ -0,0 +1,18 @@ +[DEFAULT] +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = true + +GENERATE_EXPERIMENT_PLOT = true +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = false + +NODES_LIST = "node0,node1,node2,node3,node4,node5,node6,node7,node8,node9,node10,node11,node12,node13,node14,node15,node16,node17,node18,node19,node20,node21,node22,node23,node24,node25,node26,node27,node28,node29,node30,node31" +APPS_LIST = "app1" + +STATIC_LIMITS = true +XLIM = "app1:3400" +YLIM = "cpu:default:20000" +XTICKS_STEP = 400 +REPORTED_RESOURCES = "cpu" diff --git a/conf/templates/serverless_revision/hybrid/timestamping_config.ini b/conf/templates/serverless_revision/hybrid/timestamping_config.ini new file mode 100755 index 0000000..e0157af --- /dev/null +++ b/conf/templates/serverless_revision/hybrid/timestamping_config.ini @@ -0,0 +1,7 @@ +[DEFAULT] +MONGODB_IP = times-hybrid +MONGODB_PORT = 8000 +MONGODB_USER = jonatan +TESTS_POST_ENDPOINT = tests +EXPERIMENTS_POST_ENDPOINT = experiments +MAX_CONNECTION_TRIES = 10 \ No newline at end of file diff --git a/conf/templates/serverless_revision/microbenchmarks/report_generator_config.ini b/conf/templates/serverless_revision/microbenchmarks/report_generator_config.ini new file mode 100755 index 0000000..fe80b64 --- /dev/null +++ b/conf/templates/serverless_revision/microbenchmarks/report_generator_config.ini @@ -0,0 +1,18 @@ +[DEFAULT] +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = true + +GENERATE_EXPERIMENT_PLOT = false +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = false + +NODES_LIST = "node0,node1,node2,node3,node4,node5,node6,node7,node8,node9,node10,node11,node12,node13,node14,node15,node16,node17,node18,node19,node20,node21,node22,node23,node24,node25,node26,node27,node28,node29,node30,node31" +APPS_LIST = "app1" + +STATIC_LIMITS = true +XLIM = "app1:1350" +YLIM = "cpu:default:20000" +XTICKS_STEP = 100 +REPORTED_RESOURCES = "cpu" diff --git a/conf/templates/serverless_revision/microbenchmarks/timestamping_config.ini b/conf/templates/serverless_revision/microbenchmarks/timestamping_config.ini new file mode 100755 index 0000000..e0157af --- /dev/null +++ b/conf/templates/serverless_revision/microbenchmarks/timestamping_config.ini @@ -0,0 +1,7 @@ +[DEFAULT] +MONGODB_IP = times-hybrid +MONGODB_PORT = 8000 +MONGODB_USER = jonatan +TESTS_POST_ENDPOINT = tests +EXPERIMENTS_POST_ENDPOINT = experiments +MAX_CONNECTION_TRIES = 10 \ No newline at end of file diff --git a/conf/templates/serverless_revision/streaming/report_generator_config.ini b/conf/templates/serverless_revision/streaming/report_generator_config.ini new file mode 100755 index 0000000..8b063c1 --- /dev/null +++ b/conf/templates/serverless_revision/streaming/report_generator_config.ini @@ -0,0 +1,18 @@ +[DEFAULT] +PRINT_NODE_INFO = false +MAX_DIFF_TIME = 9 +PRINT_MISSING_INFO_REPORT = true +PRINT_TEST_BASIC_INFORMATION = true + +GENERATE_EXPERIMENT_PLOT = true +GENERATE_APP_PLOTS = true +GENERATE_NODES_PLOTS = false + +NODES_LIST = "slave0,slave1,slave2,slave3,slave4,slave5,slave6,slave7,slave8,slave9,slave10,slave11,slave12,slave13,slave14,slave15,slave16,slave17,slave18,slave19" +APPS_LIST = "slaves,hibench,kafka" + +STATIC_LIMITS = true +XLIM = "slaves:900,hibench:1000,kafka:1000" +YLIM = "cpu:default:14000" +XTICKS_STEP = 100 +REPORTED_RESOURCES = "cpu" diff --git a/conf/templates/serverless_revision/streaming/timestamping_config.ini b/conf/templates/serverless_revision/streaming/timestamping_config.ini new file mode 100755 index 0000000..e0157af --- /dev/null +++ b/conf/templates/serverless_revision/streaming/timestamping_config.ini @@ -0,0 +1,7 @@ +[DEFAULT] +MONGODB_IP = times-hybrid +MONGODB_PORT = 8000 +MONGODB_USER = jonatan +TESTS_POST_ENDPOINT = tests +EXPERIMENTS_POST_ENDPOINT = experiments +MAX_CONNECTION_TRIES = 10 \ No newline at end of file diff --git a/conf/timeseries_config.ini b/conf/timeseries_config.ini new file mode 100755 index 0000000..c7bcdb4 --- /dev/null +++ b/conf/timeseries_config.ini @@ -0,0 +1,3 @@ +[DEFAULT] +OPENTSDB_IP = 192.168.50.61 +OPENTSDB_PORT = 4242 \ No newline at end of file diff --git a/conf/timestamping_config.ini b/conf/timestamping_config.ini new file mode 100755 index 0000000..bc99da2 --- /dev/null +++ b/conf/timestamping_config.ini @@ -0,0 +1,7 @@ +[DEFAULT] +MONGODB_IP = 192.168.50.61 +MONGODB_PORT = 8000 +MONGODB_USER = root +TESTS_DATABASE_NAME = tests +EXPERIMENTS_DATABASE_NAME = experiments +MAX_CONNECTION_TRIES = 10 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..1143968 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +pandas +matplotlib +numpy +requests +configparser +tabulate \ No newline at end of file diff --git a/scripts/convert_to_png.sh b/scripts/convert_to_png.sh new file mode 100755 index 0000000..b918fa0 --- /dev/null +++ b/scripts/convert_to_png.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -e +DPI=300 +PARALLELL_DEGREE=4 + +function convert { + i=0 + for filename in $(find *.svg 2> /dev/null); do + new_file=$(basename $filename .svg) + + # No parallel option + #inkscape -z -e $new_file.png -d $DPI $filename -D &> /dev/null + + # Parallelized + inkscape -z -e $new_file.png -d $DPI $filename -D &> /dev/null & + pids[${i}]=$! + i=$(($i+1)) + if (( $i % $PARALLELL_DEGREE == 0 )) + then + for pid in ${pids[*]}; do + wait $pid + done + fi + done + for pid in ${pids[*]}; do + wait $pid + done +} + +function folder_convert { + for d in $1_*/ ; do + echo "Converting figures in $d to $DPI dpi" + cd $d + convert + cd .. + done + +} + +folder_convert pagerank + +echo "FINISHED" diff --git a/scripts/energy/generate_metagenomics.sh b/scripts/energy/generate_metagenomics.sh new file mode 100755 index 0000000..ca4d984 --- /dev/null +++ b/scripts/energy/generate_metagenomics.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +function baseline1 { + cp conf/templates/energy/metagenomics/report_generator_config.ini conf/report_generator_config.ini + sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini + sed -i 's/{XLIM}/aux_user0:3300,pre_user0:3300,comp_user0:1600/g' conf/report_generator_config.ini + sed -i 's/__PRINT_ENERGY_MAX__/PRINT_ENERGY_MAX/g' conf/report_generator_config.ini + sed -i 's/{PRINT_ENERGY_MAX}/false/g' conf/report_generator_config.ini + bash scripts/generate_report.sh 00:00_MG + bash scripts/generate_report.sh 01:00_MG + bash scripts/generate_report.sh 02:00_MG + bash scripts/generate_report.sh 03:00_MG +} + +function serverless1 { + cp conf/templates/energy/metagenomics/report_generator_config.ini conf/report_generator_config.ini + sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini + sed -i 's/{XLIM}/aux_user0:3300,pre_user0:3300,comp_user0:1600/g' conf/report_generator_config.ini + sed -i 's/__PRINT_ENERGY_MAX__/PRINT_ENERGY_MAX/g' conf/report_generator_config.ini + sed -i 's/{PRINT_ENERGY_MAX}/true/g' conf/report_generator_config.ini + bash scripts/generate_report.sh 10:00_MG + bash scripts/generate_report.sh 11:00_MG + bash scripts/generate_report.sh 12:00_MG +} + +function baseline2 { + cp conf/templates/energy/metagenomics/report_generator_config.ini conf/report_generator_config.ini + sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini + sed -i 's/{XLIM}/aux_user0:3300,pre_user0:3300,comp_user0:1700/g' conf/report_generator_config.ini + sed -i 's/__PRINT_ENERGY_MAX__/PRINT_ENERGY_MAX/g' conf/report_generator_config.ini + sed -i 's/{PRINT_ENERGY_MAX}/false/g' conf/report_generator_config.ini + #bash scripts/generate_report.sh 20:00_MG + #bash scripts/generate_report.sh 21:00_MG + #bash scripts/generate_report.sh 22:00_MG + bash scripts/generate_report.sh 23:00_MG + #bash scripts/generate_report.sh 24:00_MG + #bash scripts/generate_report.sh 25:00_MG + #bash scripts/generate_report.sh 26:00_MG + #bash scripts/generate_report.sh 27:00_MG +} + + + +function serverless2 { + cp conf/templates/energy/metagenomics/report_generator_config.ini conf/report_generator_config.ini + sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini + sed -i 's/{XLIM}/aux_user0:3300,pre_user0:3300,comp_user0:1700/g' conf/report_generator_config.ini + sed -i 's/__PRINT_ENERGY_MAX__/PRINT_ENERGY_MAX/g' conf/report_generator_config.ini + sed -i 's/{PRINT_ENERGY_MAX}/true/g' conf/report_generator_config.ini + #bash scripts/generate_report.sh 30:00_MG + #bash scripts/generate_report.sh 31:00_MG + #bash scripts/generate_report.sh 32:00_MG + #bash scripts/generate_report.sh 33:00_MG + #bash scripts/generate_report.sh 34:00_MG + #bash scripts/generate_report.sh 35:00_MG + #bash scripts/generate_report.sh 36:00_MG + bash scripts/generate_report.sh 37:00_MG + bash scripts/generate_report.sh 38:00_MG + #bash scripts/generate_report.sh 39:00_MG + +} + +cp conf/templates/energy/metagenomics/timestamping_config.ini conf/timestamping_config.ini +cp conf/templates/energy/metagenomics/timeseries_config.ini conf/timeseries_config.ini +baseline2 +serverless2 + + + + diff --git a/scripts/energy/generate_microbenchmarks.sh b/scripts/energy/generate_microbenchmarks.sh new file mode 100755 index 0000000..68b6e1a --- /dev/null +++ b/scripts/energy/generate_microbenchmarks.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +cp conf/templates/energy/microbenchmarks/timestamping_config.ini conf/timestamping_config.ini +cp conf/templates/energy/microbenchmarks/timeseries_config.ini conf/timeseries_config.ini + +#cp conf/templates/energy/microbenchmarks/report_generator_config.ini conf/report_generator_config.ini +#sed -i 's/__XTICKS_STEP__/XTICKS_STEP/g' conf/report_generator_config.ini +#sed -i 's/{XTICKS_STEP}/200/g' conf/report_generator_config.ini +#sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini +#sed -i 's/{XLIM}/default:2000/g' conf/report_generator_config.ini +#sed -i 's/__PRINT_ENERGY_MAX__/PRINT_ENERGY_MAX/g' conf/report_generator_config.ini +#sed -i 's/{PRINT_ENERGY_MAX}/false/g' conf/report_generator_config.ini +#sed -i 's/{__DOWNSAMPLE__}/DOWNSAMPLE/g' conf/report_generator_config.ini +#sed -i 's/{DOWNSAMPLE}/10/g' conf/report_generator_config.ini +#bash scripts/generate_report.sh 00:00_EPR #CHOSEN +# +# +#cp conf/templates/energy/microbenchmarks/report_generator_config.ini conf/report_generator_config.ini +#sed -i 's/__XTICKS_STEP__/XTICKS_STEP/g' conf/report_generator_config.ini +#sed -i 's/{XTICKS_STEP}/200/g' conf/report_generator_config.ini +#sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini +#sed -i 's/{XLIM}/default:2000/g' conf/report_generator_config.ini +#sed -i 's/__PRINT_ENERGY_MAX__/PRINT_ENERGY_MAX/g' conf/report_generator_config.ini +#sed -i 's/{PRINT_ENERGY_MAX}/true/g' conf/report_generator_config.ini +#sed -i 's/{__DOWNSAMPLE__}/DOWNSAMPLE/g' conf/report_generator_config.ini +#sed -i 's/{DOWNSAMPLE}/10/g' conf/report_generator_config.ini +##bash scripts/generate_report.sh 01:00_EPR +#bash scripts/generate_report.sh 02:00_EPR #CHOSEN +##bash scripts/generate_report.sh 03:00_EPR + + +cp conf/templates/energy/microbenchmarks/report_generator_config.ini conf/report_generator_config.ini +sed -i 's/__XTICKS_STEP__/XTICKS_STEP/g' conf/report_generator_config.ini +sed -i 's/{XTICKS_STEP}/200/g' conf/report_generator_config.ini +sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini +sed -i 's/{XLIM}/default:1200/g' conf/report_generator_config.ini +sed -i 's/__PRINT_ENERGY_MAX__/PRINT_ENERGY_MAX/g' conf/report_generator_config.ini +sed -i 's/{PRINT_ENERGY_MAX}/false/g' conf/report_generator_config.ini +sed -i 's/{__DOWNSAMPLE__}/DOWNSAMPLE/g' conf/report_generator_config.ini +sed -i 's/{DOWNSAMPLE}/5/g' conf/report_generator_config.ini +bash scripts/generate_report.sh 00:00_EKM #CHOSEN + +cp conf/templates/energy/microbenchmarks/report_generator_config.ini conf/report_generator_config.ini +sed -i 's/__XTICKS_STEP__/XTICKS_STEP/g' conf/report_generator_config.ini +sed -i 's/{XTICKS_STEP}/200/g' conf/report_generator_config.ini +sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini +sed -i 's/{XLIM}/default:1200/g' conf/report_generator_config.ini +sed -i 's/__PRINT_ENERGY_MAX__/PRINT_ENERGY_MAX/g' conf/report_generator_config.ini +sed -i 's/{PRINT_ENERGY_MAX}/true/g' conf/report_generator_config.ini +sed -i 's/{__DOWNSAMPLE__}/DOWNSAMPLE/g' conf/report_generator_config.ini +sed -i 's/{DOWNSAMPLE}/5/g' conf/report_generator_config.ini +bash scripts/generate_report.sh 01:00_EKM #CHOSEN +#bash scripts/generate_report.sh 02:00_EKM + + + + diff --git a/scripts/energy/generate_streaming.sh b/scripts/energy/generate_streaming.sh new file mode 100755 index 0000000..578d847 --- /dev/null +++ b/scripts/energy/generate_streaming.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +cp conf/templates/energy/streaming/timestamping_config.ini conf/timestamping_config.ini +cp conf/templates/energy/microbenchmarks/timeseries_config.ini conf/timeseries_config.ini + +cp conf/templates/energy/streaming/report_generator_config.ini conf/report_generator_config.ini +sed -i 's/__XTICKS_STEP__/XTICKS_STEP/g' conf/report_generator_config.ini +sed -i 's/{XTICKS_STEP}/500/g' conf/report_generator_config.ini +sed -i 's/__XLIM__/XLIM/g' conf/report_generator_config.ini +sed -i 's/{XLIM}/default:3700/g' conf/report_generator_config.ini +sed -i 's/__YLIM__/YLIM/g' conf/report_generator_config.ini +sed -i 's/{YLIM}/cpu:default:6000,energy:default:900/g' conf/report_generator_config.ini + +#bash scripts/generate_report.sh 00:00_ST +bash scripts/generate_report.sh 01:00_ST #CHOSEN +#bash scripts/generate_report.sh 02:00_ST +#bash scripts/generate_report.sh 03:00_ST +#bash scripts/generate_report.sh 04:00_ST +#bash scripts/generate_report.sh 05:00_ST + diff --git a/scripts/generate_example.sh b/scripts/generate_example.sh new file mode 100755 index 0000000..d20d02b --- /dev/null +++ b/scripts/generate_example.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +cp conf/report_generator_config_example.ini conf/report_generator_config.ini +bash generate_report.sh 18-12-14-15:13_Hadoop \ No newline at end of file diff --git a/scripts/generate_report.sh b/scripts/generate_report.sh new file mode 100755 index 0000000..bc8740b --- /dev/null +++ b/scripts/generate_report.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +export REPORT_GENERATOR_PATH=$HOME/development/ReportGenerator +source $REPORT_GENERATOR_PATH/set_pythonpath.sh +echo $PYTHONPATH + +echo "Generating report for experiment $1" +mkdir -p $REPORT_GENERATOR_PATH/pandoc_reports/$1 +cd $REPORT_GENERATOR_PATH/pandoc_reports/$1 +python3 ${REPORT_GENERATOR_PATH}/src/report_generator.py $1 > $1.txt +if [[ $? -eq 0 ]] +then + pandoc $1.txt --latex-engine=xelatex --variable=fontsize:8pt --number-sections --toc --template $REPORT_GENERATOR_PATH/templates/simple_report.template -o $1.pdf + if [[ $? -eq 0 ]] + then + echo "Successfully generated report" + fi + rm *.eps +fi +cd $REPORT_GENERATOR_PATH \ No newline at end of file diff --git a/scripts/installation/install-dependencies.sh b/scripts/installation/install-dependencies.sh new file mode 100644 index 0000000..dca4726 --- /dev/null +++ b/scripts/installation/install-dependencies.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +sudo apt install pandoc texlive-xetex \ No newline at end of file diff --git a/scripts/serverless/generate_microbenchmarks.sh b/scripts/serverless/generate_microbenchmarks.sh new file mode 100755 index 0000000..f4f72d3 --- /dev/null +++ b/scripts/serverless/generate_microbenchmarks.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +cp conf/microbenchmarks.ini conf/report_generator_config.ini +bash generate_report.sh 99-99-99-99:99_Microbenchmarks diff --git a/scripts/serverless/generate_serverless.sh b/scripts/serverless/generate_serverless.sh new file mode 100755 index 0000000..02d864f --- /dev/null +++ b/scripts/serverless/generate_serverless.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +cp conf/templates/serverless_revision/microbenchmarks/report_generator_config.ini conf/report_generator_config.ini +cp conf/templates/serverless_revision/microbenchmarks/timestamping_config.ini conf/timestamping_config.ini +#bash scripts/generate_report.sh 00:00_PR +#bash scripts/generate_report.sh 01:00_PR +#bash scripts/generate_report.sh 03:00_PR +bash scripts/generate_report.sh 02:00_PR #CHOSEN + +cp conf/templates/serverless_revision/streaming/report_generator_config.ini conf/report_generator_config.ini +cp conf/templates/serverless_revision/streaming/timestamping_config.ini conf/timestamping_config.ini +#bash scripts/generate_report.sh 00:00_FW +#bash scripts/generate_report.sh 01:00_FW +bash scripts/generate_report.sh 02:00_FW #CHOSEN + +cp conf/templates/serverless_revision/hybrid/report_generator_config.ini conf/report_generator_config.ini +cp conf/templates/serverless_revision/hybrid/timestamping_config.ini conf/timestamping_config.ini +#bash scripts/generate_report.sh 00:00_HYBRID +#bash scripts/generate_report.sh 01:00_HYBRID +#bash scripts/generate_report.sh 02:00_HYBRID +bash scripts/generate_report.sh 03:00_HYBRID #CHOSEN + +cp conf/templates/serverless_revision/concurrent/report_generator_config.ini conf/report_generator_config.ini +cp conf/templates/serverless_revision/concurrent/timestamping_config.ini conf/timestamping_config.ini +#bash scripts/generate_report.sh 00:00_CONCURRENT +bash scripts/generate_report.sh 01:00_CONCURRENT #CHOSEN diff --git a/scripts/serverless/generate_streaming.sh b/scripts/serverless/generate_streaming.sh new file mode 100755 index 0000000..004cbcd --- /dev/null +++ b/scripts/serverless/generate_streaming.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +cp conf/streaming.ini conf/report_generator_config.ini +bash generate_report.sh 99-99-99-99:99_Streaming diff --git a/set_pythonpath.sh b/set_pythonpath.sh new file mode 100755 index 0000000..f8b9339 --- /dev/null +++ b/set_pythonpath.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +export BDWATCHDOG_PATH=$HOME/development/BDWatchdog +export REPORT_GENERATOR_PATH=$HOME/development/ReportGenerator +export PYTHONPATH=$BDWATCHDOG_PATH:$REPORT_GENERATOR_PATH diff --git a/src/__init__.py b/src/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/src/opentsdb/__init__.py b/src/opentsdb/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/src/opentsdb/bdwatchdog.py b/src/opentsdb/bdwatchdog.py new file mode 100755 index 0000000..8adaa67 --- /dev/null +++ b/src/opentsdb/bdwatchdog.py @@ -0,0 +1,161 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . +import sys + +import requests +import json + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +class BDWatchdog: + NO_METRIC_DATA_DEFAULT_VALUE = 0 # -1 + + def __init__(self, config): + self.server = "http://{0}:{1}{2}".format( + config.getIp(), + str(int(config.getPort())), + config.getSubdir()) + self.session = requests.Session() + + def get_points(self, query, tries=3): + try: + r = self.session.post(self.server + "/api/query", + data=json.dumps(query), + headers={'content-type': 'application/json', 'Accept': 'application/json'}) + if r.status_code == 200: + try: + return json.loads(r.text) + except json.decoder.JSONDecodeError: + eprint("Error decoding the response from OpenTSDB. Text retrieved is next:") + eprint(r.text) + return [] + else: + r.raise_for_status() + except requests.ConnectionError as e: + tries -= 1 + if tries <= 0: + raise e + else: + self.get_points(query, tries) + + def get_structure_timeseries(self, structure_name, start, end, retrieve_metrics, downsample=5): + usages = dict() + subquery = list() + for metric in retrieve_metrics: + metric_name = metric[0] + metric_tag = metric[1] + usages[metric_name] = dict() + subquery.append(dict(aggregator='zimsum', metric=metric_name, tags={metric_tag: structure_name}, + downsample=str(downsample) + "s-avg")) + + query = dict(start=start, end=end, queries=subquery) + result = self.get_points(query) + if result: + for metric in result: + dps = metric["dps"] + metric_name = metric["metric"] + usages[metric_name] = dps + + return usages + + @staticmethod + def perform_hysteresis_aggregation(timeseries): + hysteresis_count = 0 + points = list(timeseries.items()) + if points: + # Perform the differentiation + previous_time = int(points[0][0]) + previous_value = points[0][1] + for point in points[1:]: + time = int(point[0]) + value = point[1] + diff_time = time - previous_time + diff_value = abs(value - previous_value) + hysteresis_count += diff_value / diff_time + previous_time = time + previous_value = value + return hysteresis_count + + @staticmethod + def perform_timeseries_range_apply(timeseries, ymin=0, ymax=None): + check_range = True + try: + if ymin: + int(ymin) + if ymax: + int(ymax) + if (not ymax and ymin == 0) or ymin >= ymax: + check_range = False + except ValueError: + check_range = False + + if check_range: + points = list(timeseries.items()) + for point in points: + key = point[0] + value = point[1] + if value > ymax: + timeseries[key] = ymax + elif ymin and value < ymin: + timeseries[key] = ymin + return timeseries + + @staticmethod + def perform_check_for_missing_metric_info(timeseries, max_diff_time=30): + misses = list() + if timeseries: + points = list(timeseries.items()) + previous_timestamp = int(points[0][0]) + for point in points[1:]: + timestamp = int(point[0]) + diff_time = timestamp - previous_timestamp + if diff_time >= max_diff_time: + misses.append({"time": previous_timestamp, "diff_time": diff_time}) + previous_timestamp = timestamp + return misses + + @staticmethod + def perform_structure_metrics_aggregations(start, end, metrics): + usages = dict() + for metric in metrics: + summatory = 0 + points = list(metrics[metric].items()) + if points: + # Perform the integration through trapezoidal steps + previous_time = int(points[0][0]) + previous_value = points[0][1] + for point in points[1:]: + time = int(point[0]) + value = point[1] + diff_time = time - previous_time + added_value = value + previous_value + summatory += (added_value / 2) * diff_time + previous_time = time + previous_value = value + average = summatory / (end - start) + usages[metric] = dict() + usages[metric]["AVG"] = average + usages[metric]["SUM"] = summatory + return usages diff --git a/src/plotting/__init__.py b/src/plotting/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/src/plotting/barplots.py b/src/plotting/barplots.py new file mode 100755 index 0000000..07ae05f --- /dev/null +++ b/src/plotting/barplots.py @@ -0,0 +1,364 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + + +from __future__ import print_function +import pandas as pd +import matplotlib.pyplot as plt + +from src.plotting.utils import translate_test_run_name_by_conf_number, \ + BARPLOTS_FIGURE_SIZE, OVERHEAD_VALUE_SIZE, save_figure, get_y_limit +from src.reporting.config import ReporterConfig +from src.reporting.utils import translate_metric, some_test_has_missing_aggregate_information + +# Get the config +cfg = ReporterConfig() + + +def translate_shares_to_vcore_minutes(bars): + return [x / (100 * 60) for x in bars] + + +def translate_MBseconds_to_GBminutes(bars): + return [x / (1024 * 60) for x in bars] + + +ylabels = {"cpu": "CPU (Vcore-minutes)", "mem": "Memory (GB-minutes)"} +convert_functions = {"cpu": translate_shares_to_vcore_minutes, "mem": translate_MBseconds_to_GBminutes} + + +def save_barplot_figure(figure_name, fig, benchmark_type): + figure_filepath_directory = "resource_barplots/{0}".format(benchmark_type) + save_figure(figure_filepath_directory, figure_name, fig) + + +def plot_tests_resource_usage(tests): + width, height = int(len(tests) / 3), 8 + figure_size = (width, height) + benchmark_type = tests[0]["test_name"].split("_")[0] + resource_list = ["structure.cpu.current", + "structure.cpu.usage", + "structure.mem.current", + "structure.mem.usage", + "structure.energy.max", + "structure.energy.usage"] + + for resource in resource_list: + labels = [] + values_sum, values_avg = [], [] + splits = resource.split(".") + resource_label, resource_metric = splits[1], splits[2] + + for test in tests: + labels.append(test["test_name"].split("_")[1]) + if test["resource_aggregates"] == "n/a": + values_sum.append(0) + values_avg.append(0) + else: + resource_aggregate = test["resource_aggregates"]["ALL"][resource] + if resource_label == "cpu": + values_sum.append(resource_aggregate["SUM"] / (100 * 60)) + values_avg.append(resource_aggregate["AVG"] / 100) + elif resource_label == "mem": + values_sum.append(resource_aggregate["SUM"] / (1024 * 60)) + values_avg.append(resource_aggregate["AVG"] / 1024) + elif resource_label == "energy": + values_sum.append(resource_aggregate["SUM"]) + values_avg.append(resource_aggregate["AVG"]) + else: + values_sum.append(resource_aggregate["SUM"]) + values_avg.append(resource_aggregate["AVG"]) + + # Plot the data + df = pd.DataFrame({'SUM': values_sum, 'AVG': values_avg}, index=labels) + ax = df.plot.bar( + rot=0, + title=[translate_metric(resource), ""], + subplots=True, + figsize=figure_size, + sharex=False) + + # Set the labels + if resource_label == "cpu": + ax[0].set_ylabel("Vcore-minutes") + ax[1].set_ylabel("Vcore-seconds/second") + elif resource_label == "mem": + ax[0].set_ylabel("GB-minutes") + ax[1].set_ylabel("GB-second/second") + elif resource_label == "energy": + ax[0].set_ylabel("Watts·h") + ax[1].set_ylabel("Watts·h/s") + else: + ax[0].set_ylabel("Unknown") + ax[1].set_ylabel("Unknown") + ax[0].set_xlabel("# test-run") + ax[1].set_xlabel("# test-run") + + # Set the Y limits + top, bottom = get_y_limit("resource_usage", max(values_sum), + benchmark_type=benchmark_type, resource_label=resource, static_limits=False) + ax[0].set_ylim(top=top, bottom=bottom) + + # Save the plot + figure_name = "{0}_{1}.{2}".format(resource_label, resource_metric, "svg") + fig = ax[0].get_figure() + save_barplot_figure(figure_name, fig, benchmark_type) + plt.close() + + +def plot_tests_resource_usage_with_stepping(tests, num_base_experiments): + resource_tuples = [("cpu used", "structure.cpu.usage"), + ("cpu allocated", "structure.cpu.current"), + ("mem used", "structure.mem.usage"), + ("mem allocated", "structure.mem.current")] + + labels = [] + bars, overheads, base_values = dict(), dict(), dict() + benchmark_type = tests[0]["test_name"].split("_")[0] + + if some_test_has_missing_aggregate_information(tests): + return + + # Compute the overheads for the baseline tests + for resource_tuple in resource_tuples: + resource, usage_metric = resource_tuple + if resource not in bars: + bars[resource] = [] + base_values[resource] = 0 + overheads[resource] = [] + for test in tests[:num_base_experiments]: + base_values[resource] += test["resource_aggregates"]["ALL"][usage_metric]["SUM"] + + base_values[resource] = base_values[resource] / num_base_experiments + bars[resource].append(base_values[resource]) + overheads[resource].append(0) + + configuration = 0 + labels.append(translate_test_run_name_by_conf_number(configuration, benchmark_type)) + configuration += 1 + + # Compute the overheads for the remaining tests using the step (number of same configuration tests) + index = num_base_experiments + step = cfg.TEST_TYPE_STEPPING + while index < len(tests): + for resource_tuple in resource_tuples: + resource, usage_metric = resource_tuple + summary = 0 + for test in tests[index:index + step]: + summary += test["resource_aggregates"]["ALL"][usage_metric]["SUM"] + summary = summary / step + overhead = summary / (base_values[resource]) + overheads[resource].append(str(int((overhead - 1) * 100)) + "%") + bars[resource].append(summary) + + labels.append(translate_test_run_name_by_conf_number(configuration, benchmark_type)) + configuration += 1 + index += step + + for resource in ["cpu", "mem"]: + + # Transform the data + bars["allocated"] = convert_functions[resource](bars["{0} allocated".format(resource)]) + bars["used"] = convert_functions[resource](bars["{0} used".format(resource)]) + + # Plot the data + df = pd.DataFrame({"Allocated": bars["allocated"], 'Used': bars["used"]}, index=labels) + ax = df.plot.bar( + rot=0, + title="", + figsize=BARPLOTS_FIGURE_SIZE, + legend=True) + + # Set the labels + ax.set_ylabel(ylabels[resource]) + if resource == "cpu": + ax.set_xlabel("CPU usage") + elif resource == "mem": + ax.set_xlabel("Memory usage") + else: + ax.set_xlabel("") + + # Set the Y limits + top, bottom = get_y_limit("resource_usage_with_stepping", max(bars["allocated"]), + benchmark_type=benchmark_type, resource_label=resource, + static_limits=cfg.STATIC_LIMITS) + ax.set_ylim(top=top, bottom=bottom) + + # Set the numbers for the used and allocated values + bars_positions = dict() + bars_positions["allocated"] = [x for x in range(len(bars["allocated"]))] + bars_positions["used"] = [x + 0.22 for x in range(len(bars["used"]))] + for i in range(len(bars["used"]))[1:]: + plt.text(x=bars_positions["used"][i], + y=bars["used"][i], + s=overheads["{0} used".format(resource)][i], + size=OVERHEAD_VALUE_SIZE) + plt.text(x=bars_positions["allocated"][i], + y=bars["allocated"][i], + s=overheads["{0} allocated".format(resource)][i], + size=OVERHEAD_VALUE_SIZE) + + # Generate the utilization ratios + bars["utilization"] = [] + for i in range(len(bars["allocated"])): + bars["utilization"].append(int(100 * bars["used"][i] / bars["allocated"][i])) + + # Plot utilization ratios + df = pd.DataFrame({'Utilization': bars["utilization"]}, index=labels) + ax = df['Utilization'].plot( + secondary_y=True, + color='k', + marker='o', + label="Utilization", + legend=True + ) + + # Plot utilization numeric ratios for each point + bars["utilization_string"] = [str(x) + "%" for x in bars["utilization"]] # Convert to string labels + ax.set_ylabel('Utilization (%)', style="italic", weight="bold") + plt.ylim(top=100, bottom=0) + + if cfg.STATIC_LIMITS: + if benchmark_type == "terasort": + plt.xlim(left=-0.5, right=4.75) + elif benchmark_type == "fixwindow": + plt.xlim(left=-0.5, right=4.75) + elif benchmark_type == "pagerank": + plt.xlim(left=-0.5, right=1.5) + + else: + plt.xlim(left=-0.5, right=len(bars["utilization_string"])) + + for i in range(len(bars["utilization"])): + plt.text(x=bars_positions["used"][i], + y=bars["utilization"][i], + s=bars["utilization_string"][i], + style="italic", + weight="bold", + size=OVERHEAD_VALUE_SIZE) + + # Save the plot + figure_name = "{0}_{1}.{2}".format(resource, "grouped", "svg") + fig = ax.get_figure() + save_barplot_figure(figure_name, fig, benchmark_type) + plt.close() + + +def plot_tests_times_with_stepping(tests, num_base_experiments, basetime): + overheads, durations, bars, labels = [], [], [], [] + configuration = 0 + benchmark_type = tests[0]["test_name"].split("_")[0] + + if some_test_has_missing_aggregate_information(tests): + return + + labels.append(translate_test_run_name_by_conf_number(configuration, benchmark_type)) + configuration += 1 + + bars.append(basetime) + overheads.append(0) + durations.append(basetime) + + index = num_base_experiments + while index < len(tests): + duration = 0 + for test in tests[index:index + cfg.TEST_TYPE_STEPPING]: + duration += test["duration"] + average_duration = duration / cfg.TEST_TYPE_STEPPING + overhead = str(int((average_duration / basetime - 1) * 100)) + "%" + + durations.append(average_duration) + overheads.append(overhead) + bars.append(average_duration) + labels.append(translate_test_run_name_by_conf_number(configuration, benchmark_type)) + + configuration += 1 + index += cfg.TEST_TYPE_STEPPING + + # Translate from seconds to minutes + bars = [x / 60 for x in bars] + + # Plot the data + df = pd.DataFrame(bars, index=labels) + ax = df.plot.bar( + rot=0, + title="", + figsize=BARPLOTS_FIGURE_SIZE, + legend=False) + + # Set the labels + ax.set_ylabel("Time (minutes)") + ax.set_xlabel("Overhead") + # ax.set_xlabel(translate_benchmark(benchmark_type)) + + # Set the Y limits + top, bottom = get_y_limit("times_with_stepping", max(bars), benchmark_type=benchmark_type, + static_limits=cfg.STATIC_LIMITS) + ax.set_ylim(top=top, bottom=bottom) + + # Label the overheads with a number + bars_positions = [x for x in range(len(bars))] + for i in range(len(bars))[1:]: + plt.text(x=bars_positions[i], # x=bars_positions[i] - 0.15, + y=bars[i], # y=bars[i] + 0.5 + s=overheads[i], + size=OVERHEAD_VALUE_SIZE) + + # Save the plot + figure_name = "{0}_{1}.{2}".format("times", "grouped", "svg") + fig = ax.get_figure() + save_barplot_figure(figure_name, fig, benchmark_type) + plt.close() + + +def plot_tests_times(tests): + labels, durations_seconds, durations_minutes = [], [], [] + width, height = 8, int(len(tests) / 3) + figure_size = (width, height) + benchmark_type = tests[0]["test_name"].split("_")[0] + + for test in tests: + seconds, minutes, overhead = 0, 0, 0 + labels.append(test["test_name"].split("_")[1]) + if test["duration"] != "n/a": + seconds = test["duration"] + minutes = "{:.2f}".format((test["duration"]) / 60) + + durations_seconds.append(seconds) + durations_minutes.append(minutes) + + # Plot the data + df = pd.DataFrame({'time': durations_seconds}, index=labels) + ax = df.plot.barh( + rot=0, + title="Time and overheads", + figsize=figure_size) + + # Set the labels + ax.set_ylabel("test-run") + ax.set_xlabel("Time (seconds)") + + # Save the plot + figure_name = "{0}.{1}".format("times", "svg") + fig = ax.get_figure() + save_barplot_figure(figure_name, fig, benchmark_type) + plt.close() diff --git a/src/plotting/timeseries_plots.py b/src/plotting/timeseries_plots.py new file mode 100755 index 0000000..0d7147b --- /dev/null +++ b/src/plotting/timeseries_plots.py @@ -0,0 +1,321 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + +from __future__ import print_function + +import sys + +import numpy as np +import matplotlib.pyplot as plt + +from src.opentsdb import bdwatchdog +from src.plotting.utils import translate_plot_name_to_ylabel, line_style, dashes_dict, \ + line_marker, save_figure, TIMESERIES_FIGURE_SIZE, LEGEND_FONTSIZE +from src.reporting.config import ReporterConfig, OpenTSDBConfig +from src.reporting.utils import translate_metric + +# Get the config +cfg = ReporterConfig() + +# initialize the OpenTSDB handler +bdwatchdog_handler = bdwatchdog.BDWatchdog(OpenTSDBConfig()) + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +def plot_document(doc, structure, plots, start_time, end_time, plotted_resources): + if "test_name" in doc: + doc_name = doc["test_name"] + benchmark_type = doc["test_name"].split("_")[0] + else: + doc_name = doc["experiment_id"] + benchmark_type = "EXPERIMENT" + + structure_name, structure_type = structure + + for resource in plots: + if resource not in plotted_resources: + continue + + # Pre-Check for empty plot (no timeseries for all the metrics) + empty_plot = True + for metric in plots[resource]: + metric_name = metric[0] + if metric_name not in doc["resources"][structure_name] or doc["resources"][structure_name][metric_name]: + empty_plot = False + if empty_plot: + eprint("Plot '{0}' for doc {1} has no data, skipping".format(resource, doc_name)) + continue + + fig = plt.figure(figsize=TIMESERIES_FIGURE_SIZE) + ax1 = fig.add_subplot(111) + + # TODO This should be moved to a function "trim" + # Values used for trimming time series if necessary # + check_range, ymin, ymax = False, 0, None + if resource == "disk": + check_range, ymin, ymax = True, 0, 200 + if structure_type == "node": + check_range, ymin, ymax = True, 0, 200 + elif structure_type == "app": + check_range, ymin, ymax = True, 0, 1200 + ##################################################### + + # Values used for setting the X and Y limits, without depending on actual time series values #### + if cfg.STATIC_LIMITS: + if structure_name not in cfg.XLIM: + max_x_ts_point_value = cfg.XLIM["default"] + else: + max_x_ts_point_value = cfg.XLIM[structure_name] + if structure_name not in cfg.YLIM or resource not in cfg.YLIM[structure_name]: + max_y_ts_point_value = cfg.YLIM["default"][resource] + else: + max_y_ts_point_value = cfg.YLIM[structure_name][resource] + else: + max_y_ts_point_value, max_x_ts_point_value = 0, 0 + ########################################################### + + for metric in plots[resource]: + metric_name = metric[0] + structure_resources = doc["resources"][structure_name] + + # Get the time series data + if metric_name not in structure_resources or not structure_resources[metric_name]: + continue + + timeseries = structure_resources[metric_name] + + # Apply range trimming if necessary + if check_range: + timeseries = bdwatchdog_handler.perform_timeseries_range_apply(timeseries, ymin, ymax) + + # Convert the time stamps to times relative to 0 (basetime) + basetime = int(list(timeseries.keys())[0]) + x = list(map(lambda point: int(point) - basetime, timeseries)) + + # # TODO HOTFIX + # PADDING = 0 + # if structure_name == "comp_user0": + # if len(plots[resource]) == 1 and ( + # metric_name == "structure.energy.usage" or metric_name == "structure.cpu.usage"): + # x = list(map(lambda point: 2800 + int(point) - basetime, timeseries)) + # PADDING = 2800 + # else: + # x = list(map(lambda point: 3100 + int(point) - basetime, timeseries)) + # PADDING = 3100 + + # Get the time series points and rebase them if necessary + if resource == "mem": + # Translate from MiB to GiB + y = list(map(lambda point: int(int(point) / 1024), timeseries.values())) + else: + y = list(timeseries.values()) + + # Set the maximum time series time and value points + # max_y_ts_point_value = max(max_y_ts_point_value, max(y)) + max_x_ts_point_value = max(max_x_ts_point_value, max(x)) + + # Plot a time series + linestyle = line_style[resource][metric_name] + + if len(plots[resource]) == 1 and metric_name == "structure.energy.usage": + # TODO Hotfix + ax1.plot(x, y, + label=translate_metric(metric_name), + linestyle=linestyle, + dashes=dashes_dict[linestyle], + marker=line_marker[resource][metric_name], + markersize=6, + markevery=5, + color="tab:orange") + else: + ax1.plot(x, y, + label=translate_metric(metric_name), + linestyle=linestyle, + dashes=dashes_dict[linestyle], + marker=line_marker[resource][metric_name], + markersize=6, + markevery=5) + + # Set x and y limits + top, bottom = max_y_ts_point_value, 0 + left, right = -30, max_x_ts_point_value + 30 + + # If not static limits apply an amplification factor or the max timeseries value will be at the plot "ceiling" + if not cfg.STATIC_LIMITS: + top = int(float(top * cfg.Y_AMPLIFICATION_FACTOR)) + + plt.xlim(left=left, right=right) + plt.ylim(top=top, bottom=bottom) + + # Set properties to the whole plot + plt.xlabel('Time(s)', fontsize=11) + #plt.xlabel('Time(s)', fontsize=13) + plt.ylabel(translate_plot_name_to_ylabel(resource), style="italic", weight="bold", fontsize=13) + plt.title('') + plt.grid(True) + plt.legend(loc='upper right', + shadow=False, + fontsize=LEGEND_FONTSIZE, + fancybox=True, + facecolor='#afeeee', + labelspacing=0.15, + handletextpad=0.18, + borderpad=0.22) + + if cfg.STATIC_LIMITS: + plt.xticks(np.arange(0, right, step=cfg.XTICKS_STEP)) + else: + # May be inaccurate up to +- 'downsample' seconds, + # because the data may start a little after the specified 'start' time or end + # a little before the specified 'end' time + plt.xticks(np.arange(0, int(end_time) - int(start_time), step=cfg.XTICKS_STEP)) + + # Save the plot + if "svg" in cfg.PLOTTING_FORMATS: + figure_name = "{0}_{1}.{2}".format(structure_name, resource, "svg") + figure_filepath_directory = "{0}/{1}/{2}".format("timeseries_plots", benchmark_type, doc_name) + save_figure(figure_filepath_directory, figure_name, fig, format="svg") + + # Save the plot + if "png" in cfg.PLOTTING_FORMATS: + figure_name = "{0}_{1}.{2}".format(structure_name, resource, "png") + figure_filepath_directory = "{0}/{1}/{2}".format("timeseries_plots", benchmark_type, doc_name) + save_figure(figure_filepath_directory, figure_name, fig, format="png") + plt.close() + + +def plot_user(doc, user, plots, start_time, end_time, plotted_resources): + if "test_name" in doc: + doc_name = doc["test_name"] + benchmark_type = doc["test_name"].split("_")[0] + else: + doc_name = doc["experiment_id"] + benchmark_type = "EXPERIMENT" + + for resource in plots: + if resource not in plotted_resources: + continue + + # Pre-Check for empty plot (no timeseries for all the metrics) + empty_plot = True + for metric in plots[resource]: + metric_name = metric[0] + if metric_name not in doc["users"][user] or doc["users"][user][metric_name]: + empty_plot = False + if empty_plot: + eprint("Plot '{0}' for doc {1} has no data, skipping".format(resource, doc_name)) + continue + + fig = plt.figure(figsize=TIMESERIES_FIGURE_SIZE) + ax1 = fig.add_subplot(111) + + ##################################################### + + # Values used for setting the X and Y limits, without depending on actual time series values #### + max_x_ts_point_value = 0 + if cfg.STATIC_LIMITS: + max_y_ts_point_value = cfg.YLIM["default"][resource] + else: + max_y_ts_point_value = 0 + ########################################################### + + for metric in plots[resource]: + metric_name = metric[0] + user_resources = doc["users"][user] + + # Get the time series data + if metric_name not in user_resources or not user_resources[metric_name]: + continue + + ts = user_resources[metric_name] + timeseries = bdwatchdog_handler.perform_timeseries_range_apply(ts, 0, None) + + # Convert the time stamps to times relative to 0 (basetime) + basetime = int(list(timeseries.keys())[0]) + x = list(map(lambda point: int(point) - basetime, timeseries)) + + y = list(timeseries.values()) + + # Set the maximum time series time and value points + # max_y_ts_point_value = max(max_y_ts_point_value, max(y)) + max_x_ts_point_value = max(max_x_ts_point_value, max(x)) + + # Plot a time series + linestyle = line_style[resource][metric_name] + + ax1.plot(x, y, + label=translate_metric(metric_name), + linestyle=linestyle, + dashes=dashes_dict[linestyle], + marker=line_marker[resource][metric_name], + markersize=6, + markevery=5) + + # Set x and y limits + top, bottom = max_y_ts_point_value, 0 + left, right = -30, max_x_ts_point_value + 30 + + # If not static limits apply an amplification factor or the max timeseries value will be at the plot "ceiling" + if not cfg.STATIC_LIMITS: + top = int(float(top * cfg.Y_AMPLIFICATION_FACTOR)) + + plt.xlim(left=left, right=right) + plt.ylim(top=top, bottom=bottom) + + # Set properties to the whole plot + plt.xlabel('Time(s)', fontsize=11) + #plt.xlabel('Time(s)', fontsize=13) + plt.ylabel(translate_plot_name_to_ylabel(resource), style="italic", weight="bold", fontsize=13) + plt.title('') + plt.grid(True) + plt.legend(loc='upper right', + shadow=False, + fontsize=LEGEND_FONTSIZE, + fancybox=True, + facecolor='#afeeee', + labelspacing=0.15, + handletextpad=0.18, + borderpad=0.22) + + if cfg.STATIC_LIMITS: + plt.xticks(np.arange(0, right, step=cfg.XTICKS_STEP)) + else: + # May be inaccurate up to +- 'downsample' seconds, + # because the data may start a little after the specified 'start' time or end + # a little before the specified 'end' time + plt.xticks(np.arange(0, int(end_time) - int(start_time), step=cfg.XTICKS_STEP)) + + # Save the plot + if "svg" in cfg.PLOTTING_FORMATS: + figure_name = "{0}_{1}.{2}".format(user, resource, "svg") + figure_filepath_directory = "{0}/{1}/{2}".format("timeseries_plots", benchmark_type, doc_name) + save_figure(figure_filepath_directory, figure_name, fig, format="svg") + + # Save the plot + if "png" in cfg.PLOTTING_FORMATS: + figure_name = "{0}_{1}.{2}".format(user, resource, "png") + figure_filepath_directory = "{0}/{1}/{2}".format("timeseries_plots", benchmark_type, doc_name) + save_figure(figure_filepath_directory, figure_name, fig, format="png") + plt.close() diff --git a/src/plotting/utils.py b/src/plotting/utils.py new file mode 100755 index 0000000..a3ee46d --- /dev/null +++ b/src/plotting/utils.py @@ -0,0 +1,279 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + + +from __future__ import print_function +import pathlib + +from src.reporting.config import ReporterConfig + +OVERHEAD_VALUE_SIZE = 10 +BARPLOTS_FIGURE_SIZE = (6, 4) +#TIMESERIES_FIGURE_SIZE = (8, 2.5) +TIMESERIES_FIGURE_SIZE = (8, 2.1) +LEGEND_FONTSIZE="medium" + +# Get the config +cfg = ReporterConfig() + +line_marker = { + "cpu": { + "user.cpu.current": "x", + "user.cpu.usage": "o", + "structure.cpu.current": "x", + "structure.cpu.usage": "o", + "limit.cpu.lower": "v", + "limit.cpu.upper": "^", + "proc.cpu.user": "*", + "proc.cpu.kernel": "*"}, + "mem": { + "structure.mem.current": "x", + "structure.mem.usage": "o", + "limit.mem.lower": "v", + "limit.mem.upper": "^", + "proc.mem.resident": "o"}, + "disk": { + "structure.disk.current": "x", + "structure.disk.usage": "*", + "limit.disk.lower": "v", + "limit.disk.upper": "^", + "proc.disk.writes.mb": "*", + "proc.disk.reads.mb": "*"}, + "net": { + "structure.net.current": "x", + "structure.net.usage": "*", + "limit.net.lower": "v", + "limit.net.upper": "^", + "proc.net.tcp.in.mb": "*", + "proc.net.tcp.out.mb": "*"}, + "energy": { + "structure.energy.max": "x", + "structure.energy.usage": "o", + "user.energy.max": "x", + "user.energy.used": "o" + } +} + +dashes_dict = {"-": (1, 0), "--": (5, 7)} +line_style = { + "cpu": { + "user.cpu.usage": "-", + "user.cpu.current": "-", + "structure.cpu.current": "-", + "structure.cpu.usage": "-", + "limit.cpu.lower": "--", + "limit.cpu.upper": "--", + "proc.cpu.user": "-", + "proc.cpu.kernel": "-"}, + "mem": { + "structure.mem.current": "-", + "structure.mem.usage": "-", + "limit.mem.lower": "--", + "limit.mem.upper": "--", + "proc.mem.resident": "-"}, + "disk": { + "structure.disk.current": "-", + "structure.disk.usage": "-", + "limit.disk.lower": "--", + "limit.disk.upper": "--", + "proc.disk.writes.mb": "-", + "proc.disk.reads.mb": "-"}, + "net": { + "structure.net.current": "-", + "structure.net.usage": "-", + "limit.net.lower": "--", + "limit.net.upper": "--", + "proc.net.tcp.in.mb": "-", + "proc.net.tcp.out.mb": "-"}, + "energy": { + "structure.energy.max": "-", + "structure.energy.usage": "-", + "user.energy.max": "-", + "user.energy.used": "-" + } +} + + +def translate_test_run_name_by_conf_number(conf_number, benchmark_type): + workload_map = { + "terasort": {0: "baseline", 1: "cpu_mem", 2: "CPU_mem", 3: "cpu_MEM", 4: "CPU_MEM"}, + "pagerank": {0: "baseline", 1: "CPU_MEM"}, + "fixwindow": {0: "baseline", 1: "cpu_mem", 2: "CPU_mem", 3: "cpu_MEM", 4: "CPU_MEM"} + } + try: + return workload_map[benchmark_type][conf_number] + except KeyError: + return conf_number + + +def get_x_limit(plotting_method, max_x_limit, benchmark_type=None, static_limits=False): + left, right = (-30, max_x_limit) + if static_limits: + if plotting_method == "plot_structure": + left, right = -30, 1000 + if benchmark_type == "terasort": + left, right = (left, 800) + elif benchmark_type == "pagerank": + left, right = (left, 1200) + elif benchmark_type == "fixwindow": + left, right = (left, 1000) + elif benchmark_type == "EXPERIMENT": + pass + + return left, right + + +def get_y_limit(plotting_method, max_y_limit, benchmark_type=None, resource_label=None, static_limits=False): + if static_limits: + if plotting_method == "resource_usage_with_stepping": + top, bottom = 100, 100 + if resource_label == "cpu": + if benchmark_type == "terasort": + top, bottom = (320, 0) + elif benchmark_type == "pagerank": + top, bottom = (500, 0) + elif benchmark_type == "fixwindow": + top, bottom = (350, 0) + elif resource_label == "mem": + if benchmark_type == "terasort": + top, bottom = (1600, 0) + elif benchmark_type == "pagerank": + top, bottom = (2600, 0) + elif benchmark_type == "fixwindow": + top, bottom = (2600, 0) + return top, bottom + elif plotting_method == "times_with_stepping": + top, bottom = 100, 100 + if benchmark_type == "terasort": + top, bottom = (15, 0) + elif benchmark_type == "pagerank": + top, bottom = (23, 0) + elif benchmark_type == "fixwindow": + top, bottom = (20, 0) + return top, bottom + + limit = max_y_limit * cfg.Y_AMPLIFICATION_FACTOR + top, bottom = (limit, 0) + return top, bottom + + +def translate_plot_name_to_ylabel(plot_name): + if plot_name == "cpu": + return "CPU (shares)" + elif plot_name == "mem": + return "Memory (GiB)" + elif plot_name == "energy": + return "Energy (J)" + else: + return plot_name + + +def save_figure(figure_filepath_directory, figure_name, figure, format="svg"): + figure_filepath = "{0}/{1}".format(figure_filepath_directory, figure_name) + create_output_directory(figure_filepath_directory) + # figure.savefig(figure_filepath, transparent=True, bbox_inches='tight', pad_inches=0, format=format) + # figure.savefig(figure_filepath, transparent=True, bbox_inches='tight', pad_inches=0, format=format) + figure.savefig(figure_filepath, bbox_inches='tight', pad_inches=0, format=format) + + +def create_output_directory(figure_filepath_directory): + pathlib.Path(figure_filepath_directory).mkdir(parents=True, exist_ok=True) + + +def get_plots(): + plots = dict() + plots["user"] = dict() + + plots["user"]["untreated"] = {"cpu": [], "energy": []} + plots["user"]["energy"] = {"cpu": [], "energy": []} + plots["user"]["serverless"] = {"cpu": [], "energy": []} + + plots["user"]["untreated"]["cpu"] = [('user.cpu.current', 'structure'), ('user.cpu.usage', 'structure')] + plots["user"]["serverless"]["cpu"] = plots["user"]["untreated"]["cpu"] + plots["user"]["energy"]["cpu"] = plots["user"]["untreated"]["cpu"] + + plots["user"]["untreated"]["energy"] = [('user.energy.max', 'user'), ('user.energy.used', 'user')] + plots["user"]["serverless"]["energy"] = plots["user"]["untreated"]["energy"] + plots["user"]["energy"]["energy"] = plots["user"]["untreated"]["energy"] + + plots["app"] = dict() + + plots["app"]["untreated"] = {"cpu": [], "mem": [], "disk": [], "net": [], "energy": []} + plots["app"]["serverless"] = {"cpu": [], "mem": [], "disk": [], "net": [], "energy": []} + plots["app"]["energy"] = {"cpu": [], "mem": [], "disk": [], "net": [], "energy": []} + + plots["app"]["untreated"]["cpu"] = [('structure.cpu.current', 'structure'), ('structure.cpu.usage', 'structure')] + plots["app"]["serverless"]["cpu"] = plots["app"]["untreated"]["cpu"] + plots["app"]["energy"]["cpu"] = plots["app"]["untreated"]["cpu"] + + plots["app"]["untreated"]["mem"] = [('structure.mem.current', 'structure'), ('structure.mem.usage', 'structure')] + plots["app"]["serverless"]["mem"] = plots["app"]["untreated"]["mem"] + plots["app"]["energy"]["mem"] = plots["app"]["untreated"]["mem"] + + plots["app"]["untreated"]["disk"] = [('structure.disk.current', 'structure'), ('structure.disk.usage', 'structure')] + plots["app"]["serverless"]["disk"] = plots["app"]["untreated"]["disk"] + plots["app"]["energy"]["disk"] = plots["app"]["untreated"]["disk"] + + plots["app"]["untreated"]["net"] = [('structure.net.current', 'structure'), ('structure.net.usage', 'structure')] + plots["app"]["serverless"]["net"] = plots["app"]["untreated"]["net"] + plots["app"]["energy"]["net"] = plots["app"]["untreated"]["net"] + + if cfg.PRINT_ENERGY_MAX: + plots["app"]["untreated"]["energy"] = [('structure.energy.max', 'structure')] + plots["app"]["untreated"]["energy"].append(('structure.energy.usage', 'structure')) + plots["app"]["serverless"]["energy"] = plots["app"]["untreated"]["energy"] + plots["app"]["energy"]["energy"] = plots["app"]["untreated"]["energy"] + + plots["node"] = dict() + plots["node"]["untreated"] = {"cpu": [], "mem": [], "disk": [], "net": [], "energy": []} + plots["node"]["untreated"] = {"cpu": [], "mem": [], "disk": [], "net": [], "energy": []} + plots["node"]["serverless"] = {"cpu": [], "mem": [], "disk": [], "net": [], "energy": []} + plots["node"]["energy"] = {"cpu": [], "mem": [], "disk": [], "net": [], "energy": []} + + plots["node"]["untreated"]["cpu"] = [('structure.cpu.current', 'structure'), ('structure.cpu.usage', 'structure') + # ('proc.cpu.user', 'host'),('proc.cpu.kernel', 'host') + ] + plots["node"]["serverless"]["cpu"] = [('structure.cpu.current', 'structure'), ('structure.cpu.usage', 'structure'), + # ('proc.cpu.user', 'host'),('proc.cpu.kernel', 'host'), + ('limit.cpu.lower', 'structure'), ('limit.cpu.upper', 'structure')] + plots["node"]["energy"]["cpu"] = plots["node"]["untreated"]["cpu"] + + plots["node"]["untreated"]["mem"] = [('structure.mem.current', 'structure'), ('structure.mem.usage', 'structure')] + # ('proc.mem.resident', 'host')] + plots["node"]["serverless"]["mem"] = [('structure.mem.current', 'structure'), ('structure.mem.usage', 'structure'), + ('limit.mem.lower', 'structure'), ('limit.mem.upper', 'structure')] + # ('proc.mem.resident', 'host'), + plots["node"]["energy"]["mem"] = plots["node"]["untreated"]["mem"] + + plots["node"]["untreated"]["disk"] = [('structure.disk.current', 'structure'), ('proc.disk.reads.mb', 'host'), + ('proc.disk.writes.mb', 'host')] + plots["node"]["serverless"]["disk"] = plots["node"]["untreated"]["disk"] + plots["node"]["energy"]["disk"] = plots["node"]["untreated"]["disk"] + + plots["node"]["untreated"]["net"] = [('structure.net.current', 'structure'), ('proc.net.tcp.in.mb', 'host'), + ('proc.net.tcp.out.mb', 'host')] + plots["node"]["serverless"]["net"] = plots["node"]["untreated"]["net"] + plots["node"]["energy"]["net"] = plots["node"]["untreated"]["net"] + + plots["node"]["energy"]["energy"] = [('structure.energy.usage', 'structure')] + + return plots diff --git a/src/report_generator.py b/src/report_generator.py new file mode 100755 index 0000000..c52befc --- /dev/null +++ b/src/report_generator.py @@ -0,0 +1,64 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + + +from __future__ import print_function + +import sys +import time + +from src.reporting.config import MongoDBConfig +from src.reporting.ExperimentReporter import ExperimentReporter +from TimestampsSnitch.src.mongodb.mongodb_agent import MongoDBTimestampAgent + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +mongoDBConfig = MongoDBConfig() +timestampingAgent = MongoDBTimestampAgent(mongoDBConfig.get_config_as_dict()) +experimentReporter = ExperimentReporter() + + +def report_all_experiments(): + experiments = timestampingAgent.get_all_experiments(mongoDBConfig.get_username()) + if experiments: + for exp in experiments: + time_start = time.time() + experimentReporter.report_experiment(exp) + time_end = time.time() + eprint("Reporting of experiment {0} took {1} seconds".format(exp["experiment_name"], time_end - time_start)) + + +if __name__ == '__main__': + eprint("[INFO] If you are running the 'generate_report.py', remember that the output is markdown for latex generation!!") + eprint("[INFO] To get a nice report in PDF format, run the script 'generate_report.sh' with the same input as this python") + if len(sys.argv) < 2: + print("Must specify an experiment name") + else: + experiment_name = sys.argv[1] + experiment = timestampingAgent.get_experiment(experiment_name, mongoDBConfig.get_username()) + if experiment: + experimentReporter.report_experiment(experiment) + else: + eprint("No experiment '{0}' found".format(experiment_name)) diff --git a/src/reporting/ExperimentReporter.py b/src/reporting/ExperimentReporter.py new file mode 100755 index 0000000..db7ac2d --- /dev/null +++ b/src/reporting/ExperimentReporter.py @@ -0,0 +1,166 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + + +from __future__ import print_function +import sys +import time + +from src.plotting.timeseries_plots import plot_document, plot_user +from src.plotting.utils import get_plots +from src.reporting.config import ReporterConfig, MongoDBConfig + +from src.reporting.latex_output import print_latex_section +from src.reporting.TestReporter import TestReporter +from TimestampsSnitch.src.mongodb.mongodb_agent import MongoDBTimestampAgent +from src.reporting.utils import generate_duration, print_basic_doc_info, split_tests_by_test_type, \ + generate_resources_timeseries + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +class ExperimentReporter(): + def __init__(self): + self.cfg = ReporterConfig() + mongoDBConfig = MongoDBConfig() + self.timestampingAgent = MongoDBTimestampAgent(mongoDBConfig.get_config_as_dict()) + + def process_experiment(self, exp): + exp = generate_duration(exp) + exp = generate_resources_timeseries(exp, self.cfg) + return exp + + def print_experiment_report(self, exp): + # PRINT EXPERIMENT INFO + print_latex_section("Experiment basic information") + print_basic_doc_info(exp) + + def report_tests_serverless(self, processed_tests): + testRepo = TestReporter() + + # PRINT TESTS RESOURCE INFO + # (durations with overheads, resource usages, utilization, overheads, hysteresis and basic each test info) + benchmarks = split_tests_by_test_type(processed_tests) + test_reports = [ + ("Resource usages", testRepo.print_tests_resource_usage, [], True), + ("Tests durations", testRepo.print_tests_times, [], True), + ("Tests basic information", testRepo.print_test_report, [self.cfg.PRINT_NODE_INFO], + self.cfg.PRINT_TEST_BASIC_INFORMATION), + ("Missing information report", testRepo.report_resources_missing_data, [], + self.cfg.PRINT_MISSING_INFO_REPORT), + ("Resource utilization", testRepo.print_tests_resource_utilization, [], True), + ("Tests durations and overheads", testRepo.print_summarized_tests_info, [self.cfg.NUM_BASE_EXPERIMENTS], + False), + ("Resource overheads", testRepo.print_tests_resource_overhead_report, [self.cfg.NUM_BASE_EXPERIMENTS], + False and self.cfg.NUM_BASE_EXPERIMENTS != 0)] + # ("Resource hysteresis", testRepo.print_tests_resource_hysteresis_report, [], False)] + + for test_type in benchmarks: + for report in test_reports: + report_name, report_function, report_function_extra, bool_apply = report + if bool_apply: + eprint("Doing {0} for {1} at {2}".format( + report_name, test_type, time.strftime("%D %H:%M:%S", time.localtime()))) + print_latex_section("{0} for {1}".format(report_name, test_type)) + args = tuple([benchmarks[test_type]] + report_function_extra) + report_function(*args) + + if self.cfg.GENERATE_APP_PLOTS or self.cfg.GENERATE_NODES_PLOTS: + for test_type in benchmarks: + eprint("Plotting resource plots for {0} at {1}".format( + test_type, time.strftime("%D %H:%M:%S", time.localtime()))) + testRepo.generate_test_resource_plot(benchmarks[test_type]) + + def report_tests_energy(self, processed_tests): + testRepo = TestReporter() + # PRINT TESTS RESOURCE INFO + # (durations with overheads, resource usages, utilization, overheads, hysteresis and basic each test info) + test_reports = [ + ("Resource usages", testRepo.print_tests_resource_usage, [], True), + ("Tests durations", testRepo.print_tests_times, [], True), + ("Tests basic information", testRepo.print_test_report, [self.cfg.PRINT_NODE_INFO], + self.cfg.PRINT_TEST_BASIC_INFORMATION), + ("Missing information report", testRepo.report_resources_missing_data, [], + self.cfg.PRINT_MISSING_INFO_REPORT), + ("Resource utilization", testRepo.print_tests_resource_utilization, [], True)] + + for report in test_reports: + report_name, report_function, report_function_extra, bool_apply = report + if bool_apply: + eprint("Doing {0} at {1}".format( + report_name, time.strftime("%D %H:%M:%S", time.localtime()))) + print_latex_section("{0}".format(report_name)) + args = tuple([processed_tests] + report_function_extra) + report_function(*args) + + if self.cfg.GENERATE_APP_PLOTS or self.cfg.GENERATE_NODES_PLOTS: + testRepo.generate_test_resource_plot(processed_tests) + + def report_experiment(self, exp): + testRepo = TestReporter() + report_type = self.cfg.EXPERIMENT_TYPE + + # GENERATE ALL ADDED INFO ABOUT EXPERIMENT + experiment = self.process_experiment(exp) + if self.cfg.GENERATE_EXPERIMENT_PLOT: + if "end_time" not in experiment or "start_time" not in experiment: + return + + start, end = experiment["start_time"], experiment["end_time"] + plots = get_plots() + + if self.cfg.GENERATE_NODES_PLOTS: + for node in self.cfg.NODES_LIST: + test_plots = plots["node"][report_type] + structure = (node, "node") + plot_document(experiment, structure, test_plots, start, end, self.cfg.REPORTED_RESOURCES) + + if self.cfg.GENERATE_APP_PLOTS: + for app in self.cfg.APPS_LIST + ["ALL"]: + app_plots = plots["app"][report_type] + structure = (app, "app") + plot_document(experiment, structure, app_plots, start, end, self.cfg.REPORTED_RESOURCES) + + if self.cfg.GENERATE_USER_PLOTS: + for user in self.cfg.USERS_LIST: + user_plots = plots["user"][report_type] + plot_user(experiment, user, user_plots, start, end, self.cfg.REPORTED_RESOURCES) + + # GENERATE ALL ADDED INFO ABOUT TESTS + tests = self.timestampingAgent.get_experiment_tests(experiment["experiment_id"], experiment["username"]) + + processed_tests = list() + for test in tests: + processed_tests.append(testRepo.process_test(test)) + + # PRINT BASIC EXPERIMENT INFO + eprint("Generating experiment info at {0}".format(time.strftime("%D %H:%M:%S", time.localtime()))) + self.print_experiment_report(experiment) + + if report_type == "serverless": + self.report_tests_serverless(processed_tests) + elif report_type == "energy": + self.report_tests_energy(processed_tests) + else: + pass diff --git a/src/reporting/TestReporter.py b/src/reporting/TestReporter.py new file mode 100755 index 0000000..9a8452f --- /dev/null +++ b/src/reporting/TestReporter.py @@ -0,0 +1,403 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + + +from __future__ import print_function + +import sys + +from src.opentsdb import bdwatchdog +from src.plotting.utils import get_plots +from src.reporting.config import ReporterConfig, OpenTSDBConfig +from src.reporting.latex_output import latex_print, print_latex_stress + +from src.plotting.barplots import plot_tests_resource_usage, plot_tests_times, \ + plot_tests_times_with_stepping +from src.plotting.timeseries_plots import plot_document + +from src.reporting.utils import generate_duration, translate_metric, format_metric, flush_table, \ + print_basic_doc_info, some_test_has_missing_aggregate_information, generate_resources_timeseries + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +class TestReporter: + def __init__(self): + # Get the config + self.cfg = ReporterConfig() + self.bdwatchdog_handler = bdwatchdog.BDWatchdog(OpenTSDBConfig()) + + def process_test(self, test): + test = generate_duration(test) + test = generate_resources_timeseries(test, self.cfg) + return test + + def generate_test_resource_plot(self, tests): + report_type = self.cfg.EXPERIMENT_TYPE + + for test in tests: + if "end_time" not in test or "start_time" not in test: + return + + start, end = test["start_time"], test["end_time"] + plots = get_plots() + + if self.cfg.GENERATE_NODES_PLOTS: + for node in self.cfg.NODES_LIST: + test_plots = plots["node"][report_type] + structure = (node, "node") + plot_document(test, structure, test_plots, start, end, self.cfg.REPORTED_RESOURCES) + + if self.cfg.GENERATE_APP_PLOTS: + for app in self.cfg.APPS_LIST + ["ALL"]: + app_plots = plots["app"][report_type] + structure = (app, "app") + plot_document(test, structure, app_plots, start, end, self.cfg.REPORTED_RESOURCES) + + # PRINT TEST RESOURCE USAGES + def print_test_resources(self, test, structures_list): + if not test["resource_aggregates"] or test["resource_aggregates"] == "n/a": + latex_print("RESOURCE INFO NOT AVAILABLE") + return + + max_columns = self.cfg.MAX_COLUMNS["print_test_resources"] + headers, rows, remaining_data, num_columns = ["structure", "aggregation"], dict(), False, 0 + for metric_name in self.cfg.PRINTED_METRICS: + headers.append(translate_metric(metric_name)) + for structure_name in structures_list: + + # Initialize + if structure_name not in rows: + rows[structure_name] = dict() + + for agg in ["SUM", "AVG"]: + if agg not in rows[structure_name]: + rows[structure_name][agg] = [structure_name, agg] + + try: + rows[structure_name][agg].append( + format_metric(test["resource_aggregates"][structure_name][metric_name][agg], metric_name, + agg)) + except KeyError: + rows[structure_name][agg].append("n/a") + + num_columns += 1 + remaining_data = True + if num_columns >= max_columns: + self.flush_rows_with_aggregations(rows, headers) + headers, rows, remaining_data, num_columns = ["structure", "aggregation"], dict(), False, 0 + + if remaining_data: + self.flush_rows_with_aggregations(rows, headers) + + def flush_rows_with_aggregations(self, rows, headers, table_caption=None): + final_rows = list() + for row in rows: + final_rows += list(rows[row].values()) + flush_table(final_rows, headers, table_caption) + + # PRINT TEST RESOURCE OVERHEAD + def print_tests_resource_overhead_report(self, tests, num_base_experiments=3, print_with_stepping=True): + table_caption = "TESTs resource overhead" + max_columns = self.cfg.MAX_COLUMNS["print_tests_resource_overhead_report"] + resource_tuples = self.cfg.METRICS_FOR_OVERHEAD_REPORT + + overheads, base_values, headers, num_columns, remaining_data = dict(), dict(), ["resource"], 0, False + + for test in tests[:num_base_experiments]: + headers.append(test["test_name"]) + for resource_tuple in resource_tuples: + resource, usage_metric = resource_tuple + if resource not in overheads: + overheads[resource] = [resource] + base_values[resource] = 0 + + if test["resource_aggregates"] != "n/a": + overheads[resource].append("---") + base_values[resource] += test["resource_aggregates"]["ALL"][usage_metric]["SUM"] + else: + overheads[resource].append("n/a") + for resource in base_values: + base_values[resource] = base_values[resource] / num_base_experiments + + num_columns += num_base_experiments + + for test in tests[num_base_experiments:]: + headers.append(test["test_name"]) + for resource_tuple in resource_tuples: + resource, usage_metric = resource_tuple + + if resource not in overheads: + overheads[resource] = [resource] + + if test["resource_aggregates"] != "n/a": + overhead = test["resource_aggregates"]["ALL"][usage_metric]["SUM"] / base_values[resource] + resource_overhead = str(int((overhead - 1) * 100)) + "%" + else: + resource_overhead = "n/a" + + overheads[resource].append(resource_overhead) + + num_columns += 1 + remaining_data = True + if num_columns >= max_columns: + flush_table(overheads.values(), headers, table_caption) + table_caption = None + overheads, headers, num_columns, remaining_data = dict(), ["resource"], 0, False + if remaining_data: + flush_table(overheads.values(), headers, table_caption) + + plot_tests_resource_usage(tests) + + # PRINT TEST RESOURCE UTILIZATION + def print_tests_resource_utilization(self, tests): + max_columns = self.cfg.MAX_COLUMNS["print_tests_resource_utilization_report"] + table_caption = "TESTs resource utilization" + + headers, rows, num_columns, remaining_data = ["resource"], dict(), 0, False + + for test in tests: + headers.append(test["test_name"]) + + for resource_tuple in self.cfg.RESOURCE_UTILIZATION_TUPLES: + resource, current, usage = resource_tuple + if resource not in rows: + rows[resource] = [resource] + if test["resource_aggregates"] == "n/a": + rows[resource].append("n/a") + else: + try: + available = test["resource_aggregates"]["ALL"][current]["SUM"] + used = test["resource_aggregates"]["ALL"][usage]["SUM"] + if available <= 0: + raise KeyError + else: + rows[resource].append(str(int(100 * used / available) - 1) + '%') + except KeyError: + eprint("Resource utilization for '{0}' skipped as no value for applied resource limits are " + "present and thus not utilization ratio can be computed".format(resource)) + continue + + num_columns += 1 + remaining_data = True + if num_columns >= max_columns: + flush_table(rows.values(), headers, table_caption) + table_caption = None + headers, rows, num_columns, remaining_data = ["resource"], dict(), 0, False + + if remaining_data: + flush_table(rows.values(), headers) + + # PRINT TEST RESOURCE MISSING DATA + def report_resources_missing_data(self, tests): + for test in tests: + if "end_time" not in test or "start_time" not in test: + return + + structures_list = self.cfg.NODES_LIST + misses = dict() + for metric in self.cfg.METRICS_TO_CHECK_FOR_MISSING_DATA: + metric_name = metric[0] + for structure in structures_list: + if metric_name in test["resources"][structure]: + timeseries = test["resources"][structure][metric_name] + else: + timeseries = None + if bool(timeseries): + structure_misses_list = self.bdwatchdog_handler.perform_check_for_missing_metric_info( + timeseries) + if not structure_misses_list: + continue + else: + # No timeseries were retrieved, so it is a 100% lost + structure_misses_list = [{"time": 0, "diff_time": test["duration"]}] + + if metric_name not in misses: + misses[metric_name] = dict() + misses[metric_name][structure] = structure_misses_list + + if misses: + print("\\textbf{TEST:}" + " {0}".format(test["test_name"])) + + aggregated_misses = dict() + for metric in misses: + aggregated_misses[metric] = dict() + for structure in misses[metric]: + aggregated_misses[metric][structure] = sum( + miss['diff_time'] for miss in misses[metric][structure]) + + for metric in aggregated_misses: + latex_print("For metric: {0}".format(metric)) + total_missed_time = 0 + for structure in aggregated_misses[metric]: + structure_missed_time = aggregated_misses[metric][structure] + + latex_print( + "Silence of {0} seconds at node {1} accounting for a total of {2:.2f}\%".format( + structure_missed_time, structure, + float(100 * structure_missed_time / test["duration"]))) + total_missed_time += structure_missed_time + + print_latex_stress( + "Silence of {0} seconds at for ALL nodes accounting for a total of {1:.2f}\%".format( + total_missed_time, + float(100 * total_missed_time / (len(structures_list) * test["duration"])))) + latex_print(" ") + + def print_tests_resource_usage(self, tests): + table_caption = "TESTs total resource usages" + max_columns = self.cfg.MAX_COLUMNS["print_tests_by_resource_report"] + headers, rows, num_columns, remaining_data = ["resource", "aggregation"], dict(), 0, False + for test in tests: + headers.append(test["test_name"]) + metrics = list() + for t in self.cfg.RESOURCE_UTILIZATION_TUPLES: + metrics.append(t[1]) + metrics.append(t[2]) + for resource in metrics: + if resource not in rows: + rows[resource] = dict() + + for agg in ["SUM", "AVG"]: + if agg not in rows[resource]: + rows[resource][agg] = [translate_metric(resource), agg] + + if test["resource_aggregates"] == "n/a": + rows[resource][agg].append("n/a") + else: + try: + rows[resource][agg].append( + format_metric(test["resource_aggregates"]["ALL"][resource][agg], resource, agg)) + except KeyError: + rows[resource][agg].append("n/a") + + num_columns += 1 + remaining_data = True + if num_columns >= max_columns: + self.flush_rows_with_aggregations(rows, headers, table_caption) + table_caption = None + headers, rows, num_columns, remaining_data = ["resource", "aggregation"], dict(), 0, False + + if remaining_data: + self.flush_rows_with_aggregations(rows, headers, table_caption) + + def print_test_report(self, tests, print_node_info): + # PRINT BASIC INFO ABOUT THE TEST + for test in tests: + print_basic_doc_info(test) + + # PRINT SPECIFIC RESOURCE INFO FOR EVERY NODE (OPTIONAL) AND FOR THE AGGREGATION + if print_node_info: + structures_list = list() + for node in self.cfg.NODES_LIST: + structures_list.append(node) + self.print_test_resources(test, structures_list) + print("") + + structures_list = ["ALL"] + self.cfg.APPS_LIST + self.print_test_resources(test, structures_list) + print("") + + def print_tests_times(self, tests): + max_columns = self.cfg.MAX_COLUMNS["print_summarized_tests_info"] + table_caption = "TESTs durations and time benchmarking " + + headers, durations_seconds, durations_minutes, num_columns, remaining_data = \ + ["time"], ["seconds"], ["minutes"], 0, False + + for test in tests: + headers.append(test["test_name"]) + seconds, minutes, overhead = "n/a", "n/a", "n/a" + if test["duration"] != "n/a": + seconds = test["duration"] + minutes = "{:.2f}".format((test["duration"]) / 60) + + durations_seconds.append(seconds) + durations_minutes.append(minutes) + + num_columns += 1 + remaining_data = True + if num_columns >= max_columns: + flush_table([durations_seconds, durations_minutes], headers, table_caption) + table_caption = None + headers, durations_seconds, durations_minutes, num_columns, remaining_data = \ + ["time"], ["seconds"], ["minutes"], 0, False + + if remaining_data: + flush_table([durations_seconds, durations_minutes], headers, table_caption) + + def print_summarized_tests_info(self, tests, num_base_experiments, print_with_stepping=True): + max_columns = self.cfg.MAX_COLUMNS["print_summarized_tests_info"] + table_caption = "TESTs durations and time benchmarking (over the first {0} experiments)".format( + num_base_experiments) + + headers, overheads, durations_seconds, durations_minutes, num_columns, remaining_data = \ + ["time"], ["overhead"], ["seconds"], ["minutes"], 0, False + basetime = 0 + + if num_base_experiments == 0: + basetime = 1 + else: + for test in tests[:num_base_experiments]: + headers.append(test["test_name"]) + basetime += test["duration"] + overheads.append("---") + durations_seconds.append(test["duration"]) + durations_minutes.append("{:.2f}".format((test["duration"]) / 60)) + + num_columns += 1 + remaining_data = True + if num_columns >= max_columns: + flush_table([durations_seconds, durations_minutes, overheads], headers, table_caption) + table_caption = None + headers, overheads, durations_seconds, durations_minutes, num_columns, remaining_data = \ + ["time"], ["overhead"], ["seconds"], ["minutes"], 0, False + + basetime = basetime / num_base_experiments + + for test in tests[num_base_experiments:]: + headers.append(test["test_name"]) + seconds, minutes, overhead = "n/a", "n/a", "n/a" + if test["duration"] != "n/a": + seconds = test["duration"] + minutes = "{:.2f}".format((test["duration"]) / 60) + overhead = test["duration"] / basetime + overhead = str(int((overhead - 1) * 100)) + "%" + + durations_seconds.append(seconds) + durations_minutes.append(minutes) + overheads.append(overhead) + + num_columns += 1 + remaining_data = True + if num_columns >= max_columns: + flush_table([durations_seconds, durations_minutes, overheads], headers, table_caption) + table_caption = None + headers, overheads, durations_seconds, durations_minutes, num_columns, remaining_data = \ + ["time"], ["overhead"], ["seconds"], ["minutes"], 0, False + + if remaining_data: + flush_table([durations_seconds, durations_minutes, overheads], headers, table_caption) + + plot_tests_times(tests) diff --git a/src/reporting/__init__.py b/src/reporting/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/src/reporting/config.py b/src/reporting/config.py new file mode 100755 index 0000000..2e1e00b --- /dev/null +++ b/src/reporting/config.py @@ -0,0 +1,406 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + + +from __future__ import print_function + +import configparser +import os +import sys + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +def get_numeric_value(d, key, default, numeric_type): + try: + return numeric_type(d[key]) + except KeyError: + eprint("Invalid configuration for {0}, using default value '{1}'".format(key, default)) + return default + + +def get_float_value(d, key, default): + return get_numeric_value(d, key, default, float) + + +def get_int_value(d, key, default): + return get_numeric_value(d, key, default, int) + + +class ConfigParams: + base_path = os.path.dirname(os.path.abspath(__file__)) + config_path = "../../conf/config.ini" + config_keys = [] + default_config_values = {} + + +class DatabaseConfig: + config = None + + def __init__(self, params): + config_dict = {} + config = configparser.ConfigParser() + config_file_path = os.path.join(params.base_path, params.config_path) + try: + config.read(config_file_path) + except (IOError, FileNotFoundError): + print("Config file does not exist on {0} or is not accessible".format(config_file_path)) + + for key in params.config_keys: + try: + config_dict[key] = config['DEFAULT'][key] + except KeyError: + config_dict[key] = params.default_config_values[key] + self.config = config_dict + + def get_config_as_dict(self): + return self.config + + +class OpenTSDBConfig(DatabaseConfig): + def __init__(self): + params = ConfigParams() + params.config_path = "../../conf/timeseries_config.ini" + params.config_keys = [ + "OPENTSDB_IP", + "OPENTSDB_PORT", + "OPENTSDB_SUBDIR" + ] + params.default_config_values = { + "OPENTSDB_IP": "opentsdb", + "OPENTSDB_PORT": 4242, + "OPENTSDB_SUBDIR": "" + } + DatabaseConfig.__init__(self, params) + + def getIp(self): + return self.config["OPENTSDB_IP"] + + def getPort(self): + return self.config["OPENTSDB_PORT"] + + def getSubdir(self): + return self.config["OPENTSDB_SUBDIR"] + + +class MongoDBConfig(DatabaseConfig): + + def __init__(self): + params = ConfigParams() + params.config_path = "../../conf/timestamping_config.ini" + params.config_keys = [ + "TESTS_POST_ENDPOINT", + "EXPERIMENTS_POST_ENDPOINT", + "MAX_CONNECTION_TRIES", + "MONGODB_IP", + "MONGODB_PORT", + "MONGODB_USER" + ] + params.default_config_values = { + "TESTS_POST_ENDPOINT": "tests", + "EXPERIMENTS_POST_ENDPOINT": "experiments", + "MAX_CONNECTION_TRIES": 3, + "MONGODB_IP": "times", + "MONGODB_PORT": 8000, + "MONGODB_USER": "root" + } + DatabaseConfig.__init__(self, params) + + def get_username(self): + return self.config["MONGODB_USER"] + + +class ReporterConfig: + __base_path = os.path.dirname(os.path.abspath(__file__)) + __config_path = "../../conf/report_generator_config.ini" + __config_keys = [ + "MAX_DIFF_TIME", + "PRINT_MISSING_INFO_REPORT", + "PRINT_NODE_INFO", + "GENERATE_APP_PLOTS", + "GENERATE_NODES_PLOTS", + "GENERATE_EXPERIMENT_PLOT", + "GENERATE_USER_PLOTS", + "PLOTTING_FORMATS", + "NODES_LIST", + "APPS_LIST", + "USERS_LIST", + "NUM_BASE_EXPERIMENTS", + "TEST_TYPE_STEPPING", + "PRINT_TEST_BASIC_INFORMATION", + "STATIC_LIMITS", + "Y_AMPLIFICATION_FACTOR", + "XLIM", + "YLIM", + "XTICKS_STEP", + "REPORTED_RESOURCES", + "EXPERIMENT_TYPE", + "PRINT_ENERGY_MAX", + "DOWNSAMPLE" + ] + __default_environment_values = { + "NUM_BASE_EXPERIMENTS": 3, + "MAX_DIFF_TIME": 10, + "PRINT_MISSING_INFO_REPORT": "true", + "PRINT_NODE_INFO": "true", + "GENERATE_APP_PLOTS": "true", + "GENERATE_NODES_PLOTS": "true", + "GENERATE_EXPERIMENT_PLOT": "false", + "GENERATE_USER_PLOTS": "false", + "PLOTTING_FORMATS": "svg", + "TEST_TYPE_STEPPING": 3, + "PRINT_TEST_BASIC_INFORMATION": "false", + "STATIC_LIMITS": "true", + "NODES_LIST": "node1,node2,node3,node4,node5,node6,node7,node8,node9", + "USERS_LIST": "user0", + "APPS_LIST": "app1", + "Y_AMPLIFICATION_FACTOR": 1.2, + "XLIM": 2000, + "YLIM": "cpu:default:10000,energy:default:2000", + "XTICKS_STEP": 100, + "REPORTED_RESOURCES": "cpu,mem", + "EXPERIMENT_TYPE": "serverless", + "PRINT_ENERGY_MAX": "true", + "DOWNSAMPLE": 5 + } + + __ALLOWED_EXPERIMENT_TYPES = ["serverless", "untreated", "energy"] + __DEFAULT_EXPERIMENT_TYPE = "serverless" + + def read_config(self): + config_dict = {} + config = configparser.ConfigParser() + config_file_path = os.path.join(self.__base_path, self.__config_path) + + try: + config.read(config_file_path) + except (IOError, FileNotFoundError): + print('Config file does not exist or is not accessible') + + for key in self.__config_keys: + try: + config_dict[key] = config['DEFAULT'][key] + except KeyError: + pass # Key is not configured, leave it + return config_dict + + def create_environment(self): + custom_environment = os.environ.copy() + config_dict = self.read_config() + for key in self.__config_keys: + if key in config_dict.keys(): + custom_environment[key] = config_dict[key] + else: + custom_environment[key] = self.__default_environment_values[key] + return custom_environment + + def __init__(self): + ENV = self.create_environment() + + self.REPORTED_RESOURCES = ENV["REPORTED_RESOURCES"].rstrip('"').lstrip('"').split(",") + self.MAX_DIFF_TIME = get_int_value(ENV, "MAX_DIFF_TIME", self.__default_environment_values["MAX_DIFF_TIME"]) + + self.EXPERIMENT_TYPE = ENV["EXPERIMENT_TYPE"] + if self.EXPERIMENT_TYPE not in self.__ALLOWED_EXPERIMENT_TYPES: + self.EXPERIMENT_TYPE = self.__DEFAULT_EXPERIMENT_TYPE + + self.BDWATCHDOG_USER_METRICS = list() + if "cpu" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_USER_METRICS.append(('user.cpu.current', 'user')) + self.BDWATCHDOG_USER_METRICS.append(('user.cpu.usage', 'user')) + if "energy" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_USER_METRICS.append(('user.energy.max', 'user')) + self.BDWATCHDOG_USER_METRICS.append(('user.energy.used', 'user')) + + self.BDWATCHDOG_APP_METRICS = list() + if "cpu" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_APP_METRICS.append(('structure.cpu.current', 'structure')) + self.BDWATCHDOG_APP_METRICS.append(('structure.cpu.usage', 'structure')) + if "mem" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_APP_METRICS.append(('structure.mem.current', 'structure')) + self.BDWATCHDOG_APP_METRICS.append(('structure.mem.usage', 'structure')) + if "disk" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_APP_METRICS.append(('structure.disk.current', 'structure')) + self.BDWATCHDOG_APP_METRICS.append(('structure.disk.usage', 'structure')) + if "net" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_APP_METRICS.append(('structure.net.current', 'structure')) + self.BDWATCHDOG_APP_METRICS.append(('structure.net.usage', 'structure')) + if "energy" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_APP_METRICS.append(('structure.energy.max', 'structure')) + self.BDWATCHDOG_APP_METRICS.append(('structure.energy.usage', 'structure')) + + self.BDWATCHDOG_NODE_METRICS = list() + if "cpu" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_NODE_METRICS += [('structure.cpu.current', 'structure'), ('proc.cpu.user', 'host'), + ('proc.cpu.kernel', 'host'), ('limit.cpu.upper', 'structure'), + ('limit.cpu.lower', 'structure')] + if "mem" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_NODE_METRICS += [('structure.mem.current', 'structure'), ('proc.mem.resident', 'host'), + ('proc.mem.virtual', 'host'), ('limit.mem.upper', 'structure'), + ('limit.mem.lower', 'structure')] + if "disk" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_NODE_METRICS += [('structure.disk.current', 'structure'), ('proc.disk.reads.mb', 'host'), + ('proc.disk.writes.mb', 'host'), ('limit.disk.upper', 'structure'), + ('limit.disk.lower', 'structure')] + if "net" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_NODE_METRICS += [('structure.net.current', 'structure'), ('proc.net.tcp.out.mb', 'host'), + ('limit.net.upper', 'structure'), ('limit.net.lower', 'structure'), + ('proc.net.tcp.in.mb', 'host'), ('structure.energy.usage', 'structure')] + if "energy" in self.REPORTED_RESOURCES: + self.BDWATCHDOG_NODE_METRICS += [('sys.cpu.energy', 'host')] + + self.PRINT_ENERGY_MAX = ENV["PRINT_ENERGY_MAX"] == "true" + self.PRINTED_METRICS = list() + if "cpu" in self.REPORTED_RESOURCES: + self.PRINTED_METRICS += ['structure.cpu.current', 'structure.cpu.usage'] + self.PRINTED_METRICS += ['proc.cpu.user', 'proc.cpu.kernel'] + + if "mem" in self.REPORTED_RESOURCES: + self.PRINTED_METRICS += ['structure.mem.current', 'structure.mem.usage'] + self.PRINTED_METRICS += ['proc.mem.resident', 'proc.mem.virtual'] + + if "disk" in self.REPORTED_RESOURCES: + self.PRINTED_METRICS += ['structure.disk.current', 'structure.disk.usage'] + self.PRINTED_METRICS += ['proc.disk.reads.mb', 'proc.disk.writes.mb'] + + if "net" in self.REPORTED_RESOURCES: + self.PRINTED_METRICS += ['structure.net.current', 'structure.net.usage'] + self.PRINTED_METRICS += ['proc.net.tcp.out.mb', 'proc.net.tcp.in.mb'] + + if "energy" in self.REPORTED_RESOURCES: + self.PRINTED_METRICS += ['structure.energy.usage'] + self.PRINTED_METRICS += ['structure.energy.max'] + + self.MAX_COLUMNS = {"print_test_resources": 5, + "print_summarized_tests_info": 8, + "print_tests_resource_utilization_report": 8, + "print_tests_resource_overhead_report": 8, + "print_tests_by_resource_report": 5, + "print_tests_resource_hysteresis_report": 8, + "print_tests_resource_overhead_report_with_stepping": 6, + "print_tests_resource_utilization_with_stepping": 6} + + self.STATIC_LIMITS = ENV["STATIC_LIMITS"] == "true" + + self.Y_AMPLIFICATION_FACTOR = get_float_value(ENV, "Y_AMPLIFICATION_FACTOR", + self.__default_environment_values["Y_AMPLIFICATION_FACTOR"]) + + # self.XLIM = get_int_value(ENV, "XLIM", self.__default_environment_values["XLIM"]) + + self.XLIM = {"default": 1000} + for pair in ENV["XLIM"].rstrip('"').lstrip('"').split(","): + structure_name, limit = pair.split(":") + try: + self.XLIM[structure_name] = int(limit) + except ValueError: + pass + + self.YLIM = dict() + for pair in ENV["YLIM"].rstrip('"').lstrip('"').split(","): + resource, structure_name, limit = pair.split(":") + if resource in ["cpu", "energy"]: + try: + if structure_name not in self.YLIM: + self.YLIM[structure_name] = dict() + self.YLIM[structure_name][resource] = int(limit) + except ValueError: + pass + + self.XTICKS_STEP = get_int_value(ENV, "XTICKS_STEP", self.__default_environment_values["XTICKS_STEP"]) + + self.PRINT_MISSING_INFO_REPORT = ENV["PRINT_MISSING_INFO_REPORT"] == "true" + self.PRINT_NODE_INFO = ENV["PRINT_NODE_INFO"] == "true" + self.GENERATE_APP_PLOTS = ENV["GENERATE_APP_PLOTS"] == "true" + self.GENERATE_NODES_PLOTS = ENV["GENERATE_NODES_PLOTS"] == "true" + self.GENERATE_EXPERIMENT_PLOT = ENV["GENERATE_EXPERIMENT_PLOT"] == "true" + self.GENERATE_USER_PLOTS = ENV["GENERATE_USER_PLOTS"] == "true" + + self.PLOTTING_FORMATS = list() + plotting_formats = ENV["PLOTTING_FORMATS"].rstrip('"').lstrip('"').split(",") + if "png" in plotting_formats: + self.PLOTTING_FORMATS.append("png") + if "svg" in plotting_formats: + self.PLOTTING_FORMATS.append("svg") + + self.NUM_BASE_EXPERIMENTS = get_int_value(ENV, "NUM_BASE_EXPERIMENTS", + self.__default_environment_values["NUM_BASE_EXPERIMENTS"]) + + self.TEST_TYPE_STEPPING = get_int_value(ENV, "TEST_TYPE_STEPPING", + self.__default_environment_values["TEST_TYPE_STEPPING"]) + + # self.bdwatchdog_handler = bdwatchdog.BDWatchdog() + + self.DOWNSAMPLE = get_int_value(ENV, "DOWNSAMPLE", self.__default_environment_values["DOWNSAMPLE"]) + + self.RESOURCE_UTILIZATION_TUPLES = list() + if "cpu" in self.REPORTED_RESOURCES: + self.RESOURCE_UTILIZATION_TUPLES.append(("cpu", "structure.cpu.current", "structure.cpu.usage")) + + if "mem" in self.REPORTED_RESOURCES: + self.RESOURCE_UTILIZATION_TUPLES.append(("mem", "structure.mem.current", "structure.mem.usage")) + + if "energy" in self.REPORTED_RESOURCES: + self.RESOURCE_UTILIZATION_TUPLES.append(("energy", "structure.energy.max", "structure.energy.usage")) + + self.USAGE_METRICS_SOURCE = list() + if "cpu" in self.REPORTED_RESOURCES: + self.USAGE_METRICS_SOURCE.append(("structure.cpu.usage", ['proc.cpu.user', 'proc.cpu.kernel'])) + if "mem" in self.REPORTED_RESOURCES: + self.USAGE_METRICS_SOURCE.append(("structure.mem.usage", ['proc.mem.resident'])) + if "disk" in self.REPORTED_RESOURCES: + self.USAGE_METRICS_SOURCE.append(("structure.disk.usage", ['proc.disk.writes.mb', 'proc.disk.reads.mb'])) + if "net" in self.REPORTED_RESOURCES: + self.USAGE_METRICS_SOURCE.append(("structure.net.usage", ['proc.net.tcp.in.mb', 'proc.net.tcp.out.mb'])) + if "energy" in self.REPORTED_RESOURCES: + self.USAGE_METRICS_SOURCE.append(("structure.energy.usage", ['sys.cpu.energy'])) + + self.METRICS_TO_CHECK_FOR_MISSING_DATA = list() + if "cpu" in self.REPORTED_RESOURCES: + self.METRICS_TO_CHECK_FOR_MISSING_DATA += [('structure.cpu.current', 'structure'), + ('proc.cpu.user', 'host'), + ('proc.cpu.kernel', 'host')] + if "mem" in self.REPORTED_RESOURCES: + self.METRICS_TO_CHECK_FOR_MISSING_DATA += [('structure.mem.current', 'structure'), + ('proc.mem.resident', 'host')] + + if "energy" in self.REPORTED_RESOURCES: + self.METRICS_TO_CHECK_FOR_MISSING_DATA += [('structure.energy.usage', 'structure')] + + self.METRICS_FOR_OVERHEAD_REPORT = list() + if "cpu" in self.REPORTED_RESOURCES: + self.METRICS_FOR_OVERHEAD_REPORT += [("cpu used", "structure.cpu.usage"), + ("cpu allocated", "structure.cpu.current")] + if "mem" in self.REPORTED_RESOURCES: + self.METRICS_FOR_OVERHEAD_REPORT += [("mem used", "structure.mem.usage"), + ("mem allocated", "structure.mem.current")] + if "energy" in self.REPORTED_RESOURCES: + self.METRICS_FOR_OVERHEAD_REPORT += [("energy allowed", "structure.energy.max"), + ("energy used", "structure.energy.usage")] + + self.PRINT_TEST_BASIC_INFORMATION = ENV["PRINT_TEST_BASIC_INFORMATION"] == "true" + self.NODES_LIST = ENV["NODES_LIST"].rstrip('"').lstrip('"').split(",") + + self.APPS_LIST = ENV["APPS_LIST"].rstrip('"').lstrip('"').split(",") + + self.USERS_LIST = ENV["USERS_LIST"].rstrip('"').lstrip('"').split(",") diff --git a/src/reporting/latex_output.py b/src/reporting/latex_output.py new file mode 100755 index 0000000..7ff3043 --- /dev/null +++ b/src/reporting/latex_output.py @@ -0,0 +1,49 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + + +def print_latex_section(section_name, section_label=None): + if not section_label: + section_label = section_name.replace(" ", "_") + print("\\section{" + section_name + "}\label{" + section_label + "}") + print("") + + +# def print_latex_vertical_space(): +# print("\\vspace{-0.3cm}") +# print("") + + +def print_latex_stress(s): + # Print the \newline string so that it is detected as a Latex newline + # print(str + "\\newline") + # print two additional spaces so that it is detected by markdown and pandoc as a newline + print("\\textbf{" + s + "}" + " ") + print("") + + +def latex_print(s): + # Print the \newline string so that it is detected as a Latex newline + # print(str + "\\newline") + # print two additional spaces so that it is detected by markdown and pandoc as a newline + print(s + " ") + print("") diff --git a/src/reporting/utils.py b/src/reporting/utils.py new file mode 100755 index 0000000..f922f1a --- /dev/null +++ b/src/reporting/utils.py @@ -0,0 +1,335 @@ +# Copyright (c) 2019 Universidade da Coruña +# Authors: +# - Jonatan Enes [main](jonatan.enes@udc.es, jonatan.enes.alvarez@gmail.com) +# - Roberto R. Expósito +# - Juan Touriño +# +# This file is part of the BDWatchdog framework, from +# now on referred to as BDWatchdog. +# +# BDWatchdog is free software: you can redistribute it +# and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 +# of the License, or (at your option) any later version. +# +# BDWatchdog is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BDWatchdog. If not, see . + + +from __future__ import print_function + +import sys +import time +from tabulate import tabulate + +from src.opentsdb import bdwatchdog +from src.reporting.config import OpenTSDBConfig +from src.reporting.latex_output import latex_print + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +# initialize the OpenTSDB handler +bdwatchdog_handler = bdwatchdog.BDWatchdog(OpenTSDBConfig()) + + +# Generate the resource information of both tests and experiments +def generate_resources_timeseries(document, cfg): + # Check that the needed start and end time are present, otherwise abort + if "end_time" not in document or "start_time" not in document: + document["resource_aggregates"] = "n/a" + return document + + # Initialize variables + document["resource_aggregates"], document["resources"], document["users"] = dict(), dict(), dict() + start, end = document["start_time"], document["end_time"] + + for user in cfg.USERS_LIST: + document["users"][user] = \ + bdwatchdog_handler.get_structure_timeseries(user, start, end, cfg.BDWATCHDOG_USER_METRICS, + downsample=cfg.DOWNSAMPLE) + # TODO rename this function or create a get_user_timeseries + + # Retrieve the timeseries from OpenTSDB and perform the per-structure aggregations + # Slow loop due to network call + for node_name in cfg.NODES_LIST: + document["resources"][node_name] = \ + bdwatchdog_handler.get_structure_timeseries(node_name, start, end, cfg.BDWATCHDOG_NODE_METRICS, + downsample=cfg.DOWNSAMPLE) + + metrics_to_agregate = document["resources"][node_name] + document["resource_aggregates"][node_name] = \ + bdwatchdog_handler.perform_structure_metrics_aggregations(start, end, metrics_to_agregate) + + # Generate the per-node time series 'usage' metrics (e.g., structure.cpu.usage) + for node_name in cfg.NODES_LIST: + for agg_metric in cfg.USAGE_METRICS_SOURCE: + agg_metric_name, metric_list = agg_metric + metrics_to_agregate = document["resources"][node_name] + + # Initialize + if agg_metric_name not in metrics_to_agregate: + metrics_to_agregate[agg_metric_name] = dict() + + # Get the first metric as the time reference, considering that all metrics should have + # the same timestamps + first_metric = metrics_to_agregate[metric_list[0]] + for time_point in first_metric: + # Iterate through the metrics + for metric in metric_list: + # Timestamp from the first 'reference' metric is not present in other metric, + # this may be due to the head and tail data points of the time series + if time_point not in metrics_to_agregate[metric]: + continue + # Initialize + if time_point not in metrics_to_agregate[agg_metric_name]: + metrics_to_agregate[agg_metric_name][time_point] = 0 + + # Sum + metrics_to_agregate[agg_metric_name][time_point] += \ + metrics_to_agregate[metric][time_point] + + # Generate the per-node aggregated 'usage' metrics (e.g., structure.cpu.usage) + for node_name in cfg.NODES_LIST: + for agg_metric in cfg.USAGE_METRICS_SOURCE: + agg_metric_name, metrics_to_aggregate = agg_metric + aggregates = document["resource_aggregates"][node_name] + + # Initialize + if agg_metric_name not in aggregates: + aggregates[agg_metric_name] = {"SUM": 0, "AVG": 0} + + # Add up to create the SUM + for metric in metrics_to_aggregate: + aggregates[agg_metric_name]["SUM"] += aggregates[metric]["SUM"] + + # Create the AVG from the SUM + aggregates[agg_metric_name]["AVG"] = aggregates[agg_metric_name]["SUM"] / document["duration"] + + # Generate the ALL pseudo-metrics for the overall application (all the container nodes) + document["resources"]["ALL"] = dict() + for node_name in cfg.NODES_LIST: + for metric in document["resources"][node_name]: + if metric not in document["resources"]["ALL"]: + document["resources"]["ALL"][metric] = document["resources"][node_name][metric] + continue + + for time_point in document["resources"][node_name][metric]: + try: + document["resources"]["ALL"][metric][time_point] += \ + document["resources"][node_name][metric][time_point] + except KeyError: + pass + + # Generate the aggregated ALL pseudo-metrics for the overall application (all the container nodes) + document["resource_aggregates"]["ALL"] = dict() + for node_name in cfg.NODES_LIST: + for metric in document["resource_aggregates"][node_name]: + # Initialize + if metric not in document["resource_aggregates"]["ALL"]: + document["resource_aggregates"]["ALL"][metric] = dict() + + metric_global_aggregates = document["resource_aggregates"]["ALL"][metric] + node_agg_metric = document["resource_aggregates"][node_name][metric] + + for aggregation in node_agg_metric: + # Initialize + if aggregation not in metric_global_aggregates: + metric_global_aggregates[aggregation] = 0 + + # Add up + metric_global_aggregates[aggregation] += node_agg_metric[aggregation] + + for app in cfg.APPS_LIST: + document["resources"][app] = \ + bdwatchdog_handler.get_structure_timeseries(app, start, end, cfg.BDWATCHDOG_APP_METRICS, + downsample=cfg.DOWNSAMPLE) + + document["resource_aggregates"][app] = \ + bdwatchdog_handler.perform_structure_metrics_aggregations(start, end, document["resources"][app]) + + # This metric is manually added because container structures do not have it, only application structures + if "energy" in cfg.REPORTED_RESOURCES: + document["resource_aggregates"]["ALL"]["structure.energy.max"] = {"SUM": 0, "AVG": 0} + document["resources"]["ALL"]["structure.energy.max"] = {} + for app in cfg.APPS_LIST: + for time_point in document["resources"][app]["structure.energy.max"]: + try: + document["resources"]["ALL"]["structure.energy.max"][time_point] += \ + document["resources"][app]["structure.energy.max"][time_point] + except KeyError: + document["resources"]["ALL"]["structure.energy.max"][time_point] = \ + document["resources"][app]["structure.energy.max"][time_point] + + document["resource_aggregates"]["ALL"]["structure.energy.max"]["SUM"] += \ + document["resource_aggregates"][app]["structure.energy.max"]["SUM"] + document["resource_aggregates"]["ALL"]["structure.energy.max"]["AVG"] += \ + document["resource_aggregates"][app]["structure.energy.max"]["AVG"] + + return document + + +def generate_duration(document): + document["duration"] = "n/a" + if "end_time" in document and "start_time" in document: + document["duration"] = document["end_time"] - document["start_time"] + return document + + +# PRINT EXPERIMENT OR TEST DOCUMENT INFORMATION +def print_basic_doc_info(doc): + start_time_string, end_time_string, duration, duration_minutes = get_times_from_doc(doc) + if "test_name" in doc: + latex_print("\\textbf{TEST:}" + " {0}".format(doc["test_name"])) + else: + latex_print("\\textbf{EXPERIMENT:}" + " {0}".format(doc["experiment_id"])) + latex_print("\\textbf{USER:}" + "{0}".format(doc["username"])) + + latex_print("\\textbf{START TIME:}" + " {0}".format(start_time_string)) + latex_print("\\textbf{END TIME:}" + " {0}".format(end_time_string)) + latex_print("\\textbf{DURATION:}" + " {0} seconds (about {1} minutes)".format(duration, duration_minutes) + " ") + + +def flush_table(table, header, table_caption=None): + # print_latex_vertical_space() + print(tabulate(table, header)) + print("") + if table_caption: + latex_print("Table: " + table_caption) + + +def format_metric(value, label, aggregation): + if aggregation == "AVG": + number_format = "{:.2f}" + else: + number_format = "{:.0f}" + + if label.startswith("structure.cpu") or label.startswith("proc.cpu"): + formatted_metric = "{0} vcore-s".format(number_format.format(value / 100)) + elif label.startswith("structure.mem") or label.startswith("proc.mem"): + formatted_metric = "{0} GB-s".format(number_format.format(value / 1024)) + elif label.startswith("structure.disk") or label.startswith("proc.disk"): + formatted_metric = "{0} GB".format(number_format.format(value / 1024)) + elif label.startswith("structure.net") or label.startswith("proc.net"): + formatted_metric = "{0} Gbit".format(number_format.format(value / 1024)) + elif label.startswith("structure.energy"): + if value >= 10000: + value = value / 1000 + formatted_metric = "{0} KJoule".format(number_format.format(value)) + else: + formatted_metric = value + + if aggregation == "AVG": + formatted_metric += "/s" + return formatted_metric + + +def some_test_has_missing_aggregate_information(tests): + for test in tests: + if test["resource_aggregates"] == "n/a": + return True + return False + + +def get_test_type(test_name, step): + return "serverless" + + +def translate_benchmark(benchmark): + if benchmark == "pagerank": + return "PageRank" + elif benchmark == "terasort": + return "TeraSort" + elif benchmark == "fixwindow": + return "FixWindow" + else: + return benchmark + + +def translate_metric(metric): + translated_metric = list() + metric_fields = metric.split(".") + + metric_type = metric_fields[0] + resource = metric_fields[1] + measure_kind = metric_fields[2] + + if metric_type == "user": + if measure_kind == "used": + # translated_metric.append("{0} used".format(resource)) + translated_metric.append("Used".format(resource)) + elif measure_kind == "current": + # translated_metric.append("{0} allocated".format(resource)) + translated_metric.append("Allocated".format(resource)) + elif measure_kind == "max": + # TODO Hotfix + if metric == "user.energy.max": + translated_metric.append("Power budget".format(resource)) + else: + # translated_metric.append("{0} reserved".format(resource)) + translated_metric.append("Reserved".format(resource)) + else: + translated_metric.append(measure_kind) + elif metric_type == "structure": + if measure_kind == "usage": + # translated_metric.append("{0} used".format(resource)) + translated_metric.append("Used".format(resource)) + elif measure_kind == "current": + # translated_metric.append("{0} allocated".format(resource)) + translated_metric.append("Allocated".format(resource)) + elif measure_kind == "max": + # TODO Hotfix + if metric == "structure.energy.max": + translated_metric.append("Power budget".format(resource)) + else: + # translated_metric.append("{0} reserved".format(resource)) + translated_metric.append("Reserved".format(resource)) + else: + translated_metric.append(measure_kind) + + elif metric_type == "limit": + if measure_kind == "upper": + translated_metric.append("upper") + elif measure_kind == "lower": + translated_metric.append("lower") + else: + translated_metric.append(measure_kind) + translated_metric.append("limit") + + elif metric_type == "proc": + translated_metric.append(" ".join(metric_fields[2:])) + + return " ".join(translated_metric).capitalize() + + +def get_times_from_doc(doc): + start_time_string, end_time_string, duration, duration_minutes = "n/a", "n/a", "n/a", "n/a" + + if "start_time" in doc: + start_time_string = time.strftime("%D %H:%M:%S", time.localtime(doc["start_time"])) + + if "end_time" in doc: + end_time_string = time.strftime("%D %H:%M:%S", time.localtime(doc["end_time"])) + + if "end_time" in doc and "start_time" in doc: + duration = doc["duration"] + duration_minutes = "{:.2f}".format(duration / 60) + + return start_time_string, end_time_string, duration, duration_minutes + + +def split_tests_by_test_type(tests): + benchmarks = dict() + for test in tests: + test_benchmark = test["test_name"].split("_")[0] + if test_benchmark not in benchmarks: + benchmarks[test_benchmark] = list() + benchmarks[test_benchmark].append(test) + return benchmarks diff --git a/templates/simple_report.template b/templates/simple_report.template new file mode 100755 index 0000000..984ed9c --- /dev/null +++ b/templates/simple_report.template @@ -0,0 +1,360 @@ +%!TEX TS-program = xelatex +%\documentclass[12pt]{scrartcl} + + +% The declaration of the document class: + +% The second line here, i.e. +% \documentclass[12pt]{scrartcl} +% is a standard LaTeX document class declaration: +% we say what kind of document we are making in curly brackets, +% and specify any options in square brackets. + +% (The previous line is a pseudo-comment, declaring that we will +% use the special XeTeX machinery for its more extensive font list +% and its use of unicode; +% in general, LaTeX 'comments' like this one +% begin with % and end with a linebreak.) + +% Note that there we have nothing in the nature of a template; +% it's just a standard bit of LaTeX pandoc will copy unaltered into the +% LaTeX file it is writing. But suppose you wrote something +% more akin to the corresponding line in Pandoc's default +% latex.template file, say: + +\documentclass$if(fontsize)$[$fontsize$]$endif${scrartcl} + +% then you would have invented a 'variable', fontsize, +% and could write things like + +% `markdown2pdf my.txt --xetex --variable=fontsize:12pt -o my.pdf` or +% `pandoc -r markdown -w html my.txt -s --xetex --variable=fontsize:24pt -o my.tex`. + +% If we specified --variable-fontsize:12, then template substitution +% would yield a LaTeX document beginning +% \documentclass[12pt]{scrarcl} +% which is just what we said anyway. +% But we could also specify a different fontsize. + +% I don't use this `--variable=....`functionality myself; +% I have a couple of basic templates I call with +% `--template=whatever.template` which I can also +% easily inspect to adjust things like font size as I please. + +% While we are discussing the declaration of the document class... +% here's an alternative command for two column landscape, +% not bad for some purposes. (If you strike the word 'landscape' +% you will have two narrow newspaperlike +% columns; scientists like that, because irrationality must +% show itself somewhere): +%\documentclass[12pt,twocolumn,landscape]{scrartcl} +% Columns are too close together in LaTeX so we add this +% `columnsep` command: +%\setlength{\columnsep}{.5in} + + +% I use the special 'komascript' article class "scrartcl" +% reasons I can't entirely remember; I'm not sure it's that great. +% One reason is the unimportant one that, like many classes, +% it allows very big fonts which are convenient for booklet printing +% in the idiotic American way by shrinking letterpaper pages. + +% the standard minimal LaTeX 'article' class declaration would be something like: + +% \documentclass[12pt]{article} + +% or for big type: + +% \documentclass[24pt]{extarticle} + +% but these restrict you to old-fashioned LaTeX materials. +% Note that Kieran Healy uses the swank 'Memoir' class, +% \documentclass[11pt,article,oneside]{memoir} +% which might be worth a look. + +% Enough about the document class. + +\usepackage{longtable} +\usepackage{booktabs} + +% -- We are in swanky unicode, XeTeX land, and must now import these packages: +\usepackage{fontspec,xltxtra,xunicode} +% fontspec means we can specify pretty much any font. +% Because we are using XeTeX material, +% this template needs to be called with the `--xetex` flag. + + +% Symbols: +% Pandoc imports the extensive `amsmath` collection of symbols +% for typesetting ordinary math. +\usepackage{amsmath} +% if you use exotic symbols you need to import specific packages, eg. for +% electrical engineering diagrams, musical notation, exotic currency symbols, +% the unspeakable rites of freemasonry etc. + + +% `babel`: +% The `babel` package, among other things, lets you determine what +% language you are using in a given stretch of text, so that typesetting +% will go well. Here we specify that mostly, we are speaking English: +\usepackage[english]{babel} + + +% Margins, etc: +% the `geometry` package makes for convenient adjusting of margins, which is what +% you asked about. Of course it can do much more, even make coffee for you: +%\usepackage{geometry} +\usepackage[left=0.5cm, right=0.5cm, top=0.5cm, bottom=0.5cm]{geometry} +%\geometry{verbose,letterpaper,tmargin=3cm,bmargin=3cm,lmargin=3cm,rmargin=2cm} +% so if you just keep a copy of this template in the directory you are working in, you +% can adjust the margins by going into this file and messing with the margins. +% the syntax is very unforgiving, but permits 3cm and 2.5in and some other things. + + +% Font: +% Here I set my main font, which is an Apple Corporation Exclusive, golly. + +% \setmainfont{Hoefler Text} +% \setromanfont[Mapping=tex-text,Contextuals={NoWordInitial,NoWordFinal,NoLineInitial,NoLineFinal},Ligatures={NoCommon}]{Hoefler Text} + +% Hoefler Text is okay, but note the long discussion of 'contextuals' which is necessary to cools off +% some of its show-offy properties. (You can make your essay look like the +% Declaration of Independence by specifying e.g. Ligatures={Rare} ) +% If you have a copy you might try it; as it is +% I will comment it out and supply something more certain to be around: + +%\setmainfont{Times Roman} +\setmainfont{DejaVuSans} + + +% Properly one should specify a sanserif font and a monospace font +% see e.g. the example of Kieran Healy: +% \setromanfont[Mapping=tex-text,Numbers=OldStyle]{Minion Pro} +% \setsansfont[Mapping=tex-text]{Minion Pro} +% \setmonofont[Mapping=tex-text,Scale=0.8]{Pragmata} + +% But I hate sanserif fonts, and anyway there are defaults. + + + +% Heading styles: +% These commands keep the koma system from making stupid sans serif section headings +\setkomafont{title}{\rmfamily\mdseries\upshape\normalsize} +\setkomafont{sectioning}{\rmfamily\mdseries\upshape\normalsize} +\setkomafont{descriptionlabel}{\rmfamily\mdseries\upshape\normalsize} + + + +% I'm puzzled why I have this foonote speciality, +% I wonder if it's part of my problem I've been having, but wont look +% into it now. +\usepackage[flushmargin]{footmisc} +% \usepackage[hang,flushmargin]{footmisc} + + +% So much for my personal template. + + +% Everything that follows is copied from the pandoc default template: +% I will interpolate a few comments, the comments that are in +% the default template will be marked % -- + +% Paragraph format: +% Pandoc prefers unindented paragraphs in the European style: +\setlength{\parindent}{0pt} +% ... with paragraph breaks marked by a slight lengthening of +% the space between paragraphs: +%\setlength{\parskip}{6pt plus 2pt minus 1pt} +\setlength{\parskip}{4pt plus 2pt minus 1pt} + +% Page format: + \pagestyle{plain} +% The default `plain` pagestyle just numbers the pages, +% whereas +%\pagestyle{empty} +% would give you no numbering. +% After one-million man-years of macro-composition, +% there are also fancy pagestyles with much wilder options +% for headers and footers, of course. + +% Footnotes +% if you have code in your footnotes, the million macro march +% kind of bumps into itself. +% Pandoc, having just rendered your text into LaTeX, +% knows whether the 'variable' `verbatim-in-note` is True, and +% If it is, it asks for a LaTeX package that solves the dilemma: +$if(verbatim-in-note)$ +\usepackage{fancyvrb} +$endif$ + +% Lists formatting: +% note sure what 'fancy enums' are; something to do with lists, +% as the further comment suggests: +$if(fancy-enums)$ +% -- Redefine labelwidth for lists; otherwise, the enumerate package will cause +% -- markers to extend beyond the left margin. +\makeatletter\AtBeginDocument{% + \renewcommand{\@listi} + {\setlength{\labelwidth}{4em}} +}\makeatother +\usepackage{enumerate} +$endif$ + + +% Table formatting: +% What if you make a table? -- Pandoc knows, of course, and +% then declares that its variable `table` is True and +% imports a table package suitable to its pleasantly simple tables. +% Needless to say infinitely complicated tables are possible in +% LaTeX with suitable packages. We are spared the temptation: + +$if(tables)$ +\usepackage{array} + +% Continuing on the topic of tables ... (we havent reached `endif`). +% The commented out line below is in the default pandoc latex.template. +% Some unpleasantness with table formatting must be corrected. + +% -- This is needed because raggedright in table elements redefines \\: +\newcommand{\PreserveBackslash}[1]{\let\temp=\\#1\let\\=\temp} +\let\PBS=\PreserveBackslash + +$endif$ + + +% Subscripts: +% Pandoc remembers whether you used subscripts, assigning True to +% its `subscript` variable +% It then needs to adopt a default with an incantation like this: +$if(subscript)$ +\newcommand{\textsubscr}[1]{\ensuremath{_{\scriptsize\textrm{#1}}}} +$endif$ + + +% Web-style links: + +% markdown inclines us to use links, since our texts can be made into html. +% Why not have clickable blue links even in +% learned, scientific, religious, juridical, poetical and other suchlike texts? +% Never mind that they have been proven to destroy the nervous system! + +% First, what about the fact that links like http://example.com are +% technically code and thus must not be broken across lines? +% [breaklinks=true] to the rescue! + +% Nowadays LaTeX can handle all of this with another half million macros: + +\usepackage[breaklinks=true]{hyperref} +\hypersetup{colorlinks,% +citecolor=blue,% +filecolor=blue,% +linkcolor=blue,% +urlcolor=blue} +$if(url)$ +\usepackage{url} +$endif$ + + + +% Images. +% In ye olde LaTeX one could only import a limited range of image +% types, e.g. the forgotten .eps files. Or else one simply drew the image with suitable +% commands and drawing packages. Today we want to import .jpg files we make with +% our smart phones or whatever: + +$if(graphics)$ +\usepackage{graphicx} +% -- We will generate all images so they have a width \maxwidth. This means +% -- that they will get their normal width if they fit onto the page, but +% -- are scaled down if they would overflow the margins. +\makeatletter +\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth +\else\Gin@nat@width\fi} +\makeatother +\let\Oldincludegraphics\includegraphics +\renewcommand{\includegraphics}[1]{\Oldincludegraphics[width=\maxwidth]{#1}} +$endif$ + + + +% Section numbering. +% Here again is a variable you can specify on the commandline +% `markdown2pdf my.txt --number-sections --xetex --template=/wherever/this/is -o my.pdf` +$if(numbersections)$ +$else$ +\setcounter{secnumdepth}{0} +$endif$ + +% Footnotes: +% Wait, didn't we already discuss the crisis of code in footnotes? +% Evidently the order of unfolding of macros required that +% we import a package to deal with them earlier +% and issue a command it defines now. (Or maybe that's not the reason; +% very often the order does matter as the insane system of macro expansion +% must take place by stages.) +$if(verbatim-in-note)$ +\VerbatimFootnotes % -- allows verbatim text in footnotes +$endif$ + +% Other stuff you specify on the command line: +% You can include stuff for the header from a file specified on the command line; +% I've never done this, but that stuff will go here: +$for(header-includes)$ +$header-includes$ +$endfor$ + +% Title, authors, date. +% If you specified title authors and date at the start of +% your pandoc-markdown file, pandoc knows the 'values' of the +% variables: title authors date and fills them in. + +$if(title)$ +\title{$title$} +$endif$ +\author{$for(author)$$author$$sep$\\$endfor$} +$if(date)$ +\date{$date$} +$endif$ + +% At last: +% The document itself!: + +% After filling in all these blanks above, or erasing them +% where they are not needed, Pandoc has finished writing the +% famous LaTeX *preamble* for your document. +% Now comes the all-important command \begin{document} +% which as you can see, will be paired with an \end{document} at the end. +% Pandoc knows whether you have a title, and has already +% specified what it is; if so, it demands that the title be rendered. +% Pandoc knows whether you want a table of contents, you +% specify this on the command line. +% Then, after fiddling with alignments, there comes the real +% business: pandoc slaps its rendering of your text in the place of +% the variable `body` +% It then concludes the document it has been writing. + +\begin{document} + + +$if(title)$ +\maketitle +$endif$ + +$if(toc)$ +\tableofcontents + +$endif$ + + +$if(alignment)$ +\begin{$alignment$} +$endif$ + +$body$ + +%$if(alignment)$ +\end{$alignment$} +$endif$ + + +\end{document}