From b8df7c833408317c8a9dc41d0c41ea9c21a4c53e Mon Sep 17 00:00:00 2001 From: Thamindu Randil <45028707+ThaminduR@users.noreply.github.com> Date: Tue, 1 Sep 2020 12:48:13 +0530 Subject: [PATCH] setup and readme updated --- README.md | 3 ++- setup.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6edc06b..dbd3752 100644 --- a/README.md +++ b/README.md @@ -496,6 +496,7 @@ ________________________________________________________________________________ * PySpark 2.4.5. You can easily install it with `pip install pyspark` * PyArrow `pip install pyarrow` * Pandas `pip intall pandas` +* kmodes `pip install kmodes` ### Clustering Based K Anonymity @@ -580,7 +581,7 @@ Recommended small number of l to minimum the data loss. Default value is l = 2. StructField("column4", StringType()), ]) -l_df = Anonymizer.l_diverse(k_df,schema, QI, l=2) +l_df = Anonymizer.l_diverse(k_df,schema, QI,SA, l=2) l_df.show() ``` diff --git a/setup.py b/setup.py index 1ff2014..7e285c0 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="spark_privacy_preserver", # Replace with your own username - version="0.2.1", + version="0.3.1", author="thamindu", author_email="thamindu.randil@gmail.com", description="Anonymizing Library for Apache Spark", @@ -19,4 +19,16 @@ "Operating System :: OS Independent", ], python_requires='>=3.6', + install_requires=[ + 'pandas>=1.1', + 'pyspark==2.4.5', + 'pyarrow==0.17.1', + 'diffprivlib==0.2.1', + 'tabulate==0.8.7', + 'mypy>=0.770', + 'kmodes' + ], + extras_requires={ + 'DPLib': ['notebook'] + } ) \ No newline at end of file