-
Notifications
You must be signed in to change notification settings - Fork 366
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DP Auditorium divergence testers update
Privacy on Beam: * Bump golang.org/x/net from 0.22.0 to 0.23.0 DP Auditorium: * Unify RenyiPropertyTester under new divergence tester class * Add interface for divergence based testers * Add example of testing PipelineDP mean mechanism in IPython * Upgrade Histogram tester * Update dependencies DP Accounting: * Increment patch version of DP accounting library for PyPi release Change-Id: I3a513cf7d7c7e144b11c778f792f723dff53132f GitOrigin-RevId: 1833e65df37c76d756d3a62ed6ba99112c0a5dd0
- Loading branch information
Showing
23 changed files
with
997 additions
and
504 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
""" Version of the current release of DP Accounting """ | ||
0.4.3 | ||
0.4.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
235 changes: 235 additions & 0 deletions
235
python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "I0Z7vNS_ybbU" | ||
}, | ||
"source": [ | ||
"This colab notebook uses DP-auditorium to test differentially private mechanisms computing aggregate statistics using PipelineDP." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "wUtLsXpF9q4D" | ||
}, | ||
"source": [ | ||
"\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", | ||
" \u003ctd\u003e\n", | ||
" \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", | ||
" \u003c/td\u003e\n", | ||
" \u003ctd\u003e\n", | ||
" \u003ca target=\"_blank\" href=\"https://github.com/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", | ||
" \u003c/td\u003e\n", | ||
"\u003c/table\u003e\n", | ||
"\n", | ||
"\u003cbr\u003e\n", | ||
"\u003cbr\u003e" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "WPLSKwjEHfXI" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#@title Install and import dp_auditorium and all necessary libraries.\n", | ||
"!pip install google-vizier equinox pipeline_dp\n", | ||
"!git clone https://github.com/google/differential-privacy.git\n", | ||
"import sys\n", | ||
"sys.path.append('differential-privacy/python/dp_auditorium')\n", | ||
"\n", | ||
"from dp_auditorium import privacy_test_runner\n", | ||
"from dp_auditorium.generators import pipeline_dp_vizier_dataset_generator\n", | ||
"from dp_auditorium.configs import dataset_generator_config\n", | ||
"from dp_auditorium.configs import privacy_property\n", | ||
"from dp_auditorium.configs import privacy_test_runner_config\n", | ||
"from dp_auditorium.configs import property_tester_config\n", | ||
"from dp_auditorium.mechanisms.pipeline_dp import aggregation as pipeline_dp_mechanism\n", | ||
"from dp_auditorium.testers import hockey_stick_tester\n", | ||
"\n", | ||
"import pipeline_dp\n", | ||
"import tensorflow as tf\n", | ||
"tf.compat.v1.enable_eager_execution()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "dr5A5W7Aq2SO" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#@title Example of testing PipelineDP mean mechanism\n", | ||
"import time\n", | ||
"\n", | ||
"def pipeline_dp_mean_mechanism_report(\n", | ||
" epsilon: float,\n", | ||
" delta: float,\n", | ||
" seed: int,\n", | ||
" max_number_partitions: int = 10,\n", | ||
") -\u003e privacy_test_runner_config.PrivacyTestRunnerResults:\n", | ||
" \"\"\"Runs the example code for a mean mechanism.\n", | ||
"\n", | ||
" Args:\n", | ||
" epsilon: standard approximate DP parameter.\n", | ||
" delta: standard approximate DP parameter.\n", | ||
" seed: seed to initialize the random number generator.\n", | ||
" max_number_partitions: maximum number of partitions which can be used by\n", | ||
" dataset generator.\n", | ||
"\n", | ||
" Returns:\n", | ||
" The result of the example code as PrivacyTestRunnerResults.\n", | ||
" \"\"\"\n", | ||
" tf.random.set_seed(seed)\n", | ||
"\n", | ||
" # Specify a config for computing with PipeineDP Mean aggregation, namely\n", | ||
" # computing mean aggregation per partition, i.e. in SQL terms DP version of\n", | ||
" # SELECT partition_key, mean(value)\n", | ||
" # GROUP BY partition_key\n", | ||
" # is computed.\n", | ||
" # See https://pipelinedp.io/key-definitions/ on more details of PipelineDP terminology.\n", | ||
" mech_config = pipeline_dp.AggregateParams(\n", | ||
" metrics=[pipeline_dp.Metrics.MEAN],\n", | ||
" # Laplace noise is used for ensuring DP\n", | ||
" noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n", | ||
" # Set contribution bounds:\n", | ||
"\n", | ||
" # 1. If some privacy unit contributes more than to 1 partition then\n", | ||
" # PipelineDP will choose randomly 1 partition, contributions to others\n", | ||
" # will be dropped.\n", | ||
" max_partitions_contributed=1,\n", | ||
" # 2. If some privacy unit contributes to more than 1 time to some\n", | ||
" # partition then PipelineDP will choose randomly 1 contribution, others\n", | ||
" # contribution will be dropped\n", | ||
" max_contributions_per_partition=1,\n", | ||
"\n", | ||
" # 3. Each contributions will be clipped to [-1, 1].\n", | ||
" min_value=-1.0,\n", | ||
" max_value=1.0)\n", | ||
"\n", | ||
" # Initialize the mechanism.\n", | ||
" public_partitions = list(range(max_number_partitions))\n", | ||
" mechanism = pipeline_dp_mechanism.AggregationMechanism(mech_config,\n", | ||
" privacy_property.ApproximateDp(\n", | ||
" epsilon=epsilon,\n", | ||
" delta=delta,\n", | ||
" ), public_partitions)\n", | ||
"\n", | ||
" # Configuration for a Hockey-Stick property tester. Given arrays s1 and s2\n", | ||
" # with samples from two distributions it will estimate the hockey-stick\n", | ||
" # divergence between the underlying distributions. It checks if the estimated\n", | ||
" # divergence is bounded by delta.\n", | ||
" tester_config = property_tester_config.HockeyStickPropertyTesterConfig(\n", | ||
" training_config=hockey_stick_tester.make_default_hs_training_config(),\n", | ||
" approximate_dp=privacy_property.ApproximateDp(\n", | ||
" epsilon=epsilon,\n", | ||
" delta=delta,\n", | ||
" ),\n", | ||
" )\n", | ||
"\n", | ||
" # Initialize a classifier model for the Hockey-Stick property tester.\n", | ||
" # This classifier will learn to distinguish between samples of the mechanism\n", | ||
" # on adjacent datasets. Its accuracy level should be controlled by the privacy\n", | ||
" # guarantee.\n", | ||
" base_model = hockey_stick_tester.make_default_hs_base_model()\n", | ||
" # Initialize a property tester.\n", | ||
" property_tester = hockey_stick_tester.HockeyStickPropertyTester(\n", | ||
" config=tester_config,\n", | ||
" base_model=base_model,\n", | ||
" )\n", | ||
"\n", | ||
" # Configuration for dataset generator. It generates neighboring datasets under\n", | ||
" # the add/remove definition. Unique study name prevents using cached results\n", | ||
" # from previous runs.\n", | ||
" generator_config = dataset_generator_config.VizierDatasetGeneratorConfig(\n", | ||
" study_name=str(time.time()),\n", | ||
" study_owner=\"owner\",\n", | ||
" num_vizier_parameters=2,\n", | ||
" data_type=dataset_generator_config.DataType.DATA_TYPE_FLOAT,\n", | ||
" min_value=-1.0,\n", | ||
" max_value=1.0,\n", | ||
" search_algorithm=\"RANDOM_SEARCH\",\n", | ||
" metric_name=\"hockey_stick_divergence\",\n", | ||
" )\n", | ||
"\n", | ||
" # Dataset generator will generate datasets of not more than\n", | ||
" # max_number_partitions partitions and not more than 10 privacy units.\n", | ||
" # The same partitions are used as public_partitions and as partitions in\n", | ||
" # dataset. So the mechanism will not drop the partitions. We do not check\n", | ||
" # partition selection. We focus only on checking noise.\n", | ||
" pipeline_dp_generator_config = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGeneratorConfig(\n", | ||
" max_num_privacy_ids=10, max_num_partitions=max_number_partitions)\n", | ||
"\n", | ||
" # Initialize the dataset generator.\n", | ||
" dataset_generator = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGenerator(\n", | ||
" generator_config, pipeline_dp_generator_config)\n", | ||
"\n", | ||
" # Configuration for the test runner.\n", | ||
" # The test runner coordinates how the test is evaluated. It receives a\n", | ||
" # dataset generator, a property tester and a configuration (see base class for\n", | ||
" # details on these parameters), and runs privacy tests using the property\n", | ||
" # tester on datasets generated by the dataset generator.\n", | ||
" test_runner_config = privacy_test_runner_config.PrivacyTestRunnerConfig(\n", | ||
" property_tester=privacy_test_runner_config.PropertyTester.HOCKEY_STICK_TESTER,\n", | ||
" max_num_trials=10,\n", | ||
" failure_probability=0.05,\n", | ||
" num_samples=10_000,\n", | ||
" # Apply a hyperbolic tangent function to the output of the mechanism\n", | ||
" post_processing=privacy_test_runner_config.PostProcessing.TANH,\n", | ||
" )\n", | ||
" # Initialize the test runner.\n", | ||
" test_runner = privacy_test_runner.PrivacyTestRunner(\n", | ||
" config=test_runner_config,\n", | ||
" dataset_generator=dataset_generator,\n", | ||
" property_tester=property_tester,\n", | ||
" )\n", | ||
"\n", | ||
" return test_runner.test_privacy(mechanism, \"pipeline_dp-mean-mechanism\")\n", | ||
"\n", | ||
"\n", | ||
"EPSILON = 1.0\n", | ||
"DELTA = 1e-5\n", | ||
"SEED = 1\n", | ||
"\n", | ||
"# The results indicate whether a privacy violation was identified within the\n", | ||
"# designated number of trials defined in the configuration. In the absence of a\n", | ||
"# violation, a message is returned indicating that the limit of the number of\n", | ||
"# trials has been reached. For reference, all computed divergences across all\n", | ||
"# trials are also reported.\n", | ||
"results = pipeline_dp_mean_mechanism_report(EPSILON, DELTA, SEED)\n", | ||
"print(f\" \\nResults: \\n{results}\")\n", | ||
"if results.found_privacy_violation is not None:\n", | ||
" print(\"Privacy violations found!\")\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"colab": { | ||
"private_outputs": true, | ||
"provenance": [ | ||
{ | ||
"file_id": "1QyFD_doucyHewiRMtxGvFxNrFlgbCqQa", | ||
"timestamp": 1708693099970 | ||
}, | ||
{ | ||
"file_id": "1pBgTlH19OwJ3diUYf3m3QaZcVNQGeB8B", | ||
"timestamp": 1708692052606 | ||
} | ||
] | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.