From d64e6f75607942020a25f62e9a91d09f30111bff Mon Sep 17 00:00:00 2001 From: ajinkyaghonge Date: Wed, 5 Apr 2023 16:35:16 -0700 Subject: [PATCH 1/4] Create pc_translator library. Differential Revision: D44617759 fbshipit-source-id: 3e57d6d307bc160a9b4e9626466eb3f6205750f6 --- fbpcs/pc_translator/PCTranslator.cpp | 39 +++++++++++++++++++++ fbpcs/pc_translator/PCTranslator.h | 52 ++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 fbpcs/pc_translator/PCTranslator.cpp create mode 100644 fbpcs/pc_translator/PCTranslator.h diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp new file mode 100644 index 000000000..abc23e908 --- /dev/null +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "fbpcs/pc_translator/PCTranslator.h" + +namespace pc_translator { + +std::string PCTranslator::encode(const std::string& /* inputDataset */) { + throw std::runtime_error("Unimplemented"); +} + +std::string PCTranslator::decode( + const std::string& /* aggregatedOutputDataset */) { + throw std::runtime_error("Unimplemented"); +} + +void PCTranslator::retrieveInstructionSets( + std::vector& /* instructionSetNames */) { + throw std::runtime_error("Unimplemented"); +} + +std::vector PCTranslator::retrieveInstructionSetNamesForRun( + const std::string& /* pcsFeatures */) { + throw std::runtime_error("Unimplemented"); +} + +void PCTranslator::transformDataset(const std::string& /* input */) { + throw std::runtime_error("Unimplemented"); +} + +void PCTranslator::parseInstructionSet( + const std::string& /* instructionSet */) { + throw std::runtime_error("Unimplemented"); +} +} // namespace pc_translator diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h new file mode 100644 index 000000000..0107e4025 --- /dev/null +++ b/fbpcs/pc_translator/PCTranslator.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace pc_translator { + +/* + * This class contains functions required for PC Translator during actual run + * i.e. retrieving the PC instruction sets, filtering the set per active GK for + * run, encoding and decoding the dataset files input as per the instruction + * set. + */ +class PCTranslator { + public: + explicit PCTranslator(const std::string& pcsFeatures) + : pcsfeatures_(pcsFeatures) {} + + /* + * Method to encode the configurable fields in input dataset as per the active + * pc instruction sets for the run. This method will output the path of + * transformed input dataset, which can be used in further PC run. + */ + std::string encode(const std::string& inputDataset); + + /* + * Method to decode final aggregated output with the encoded breakdown Ids as + * the keys. This method will decode the breakdown Ids to original group Id + * values and format the aggregated output as per the new keys. Output of this + * method would be the path of the decoded aggregated output. + */ + std::string decode(const std::string& aggregatedOutputDataset); + + private: + std::string pcsfeatures_; + void retrieveInstructionSets(std::vector& instructionSetNames); + std::vector retrieveInstructionSetNamesForRun( + const std::string& pcsfeatures); + void parseInstructionSet(const std::string& instructionSet); + void transformDataset(const std::string& input); +}; + +} // namespace pc_translator From 0027e537de25c04dfe7f76f4f903319e200f26a1 Mon Sep 17 00:00:00 2001 From: ajinkyaghonge Date: Wed, 5 Apr 2023 16:35:16 -0700 Subject: [PATCH 2/4] Add classes to parse PC Instruction set. Differential Revision: D44618035 fbshipit-source-id: da2145cb18893c45561fdb83c62a5c5bc271814e --- fbpcs/pc_translator/PCTranslator.cpp | 58 +++++++++++++++---- fbpcs/pc_translator/PCTranslator.h | 20 +++++-- .../input_processing/FilterConstraint.cpp | 33 +++++++++++ .../input_processing/FilterConstraint.h | 43 ++++++++++++++ .../input_processing/PCInstructionSet.cpp | 51 ++++++++++++++++ .../input_processing/PCInstructionSet.h | 47 +++++++++++++++ .../input_processing/TestPCInstructionSet.cpp | 44 ++++++++++++++ .../test_instruction_set.json | 45 ++++++++++++++ 8 files changed, 324 insertions(+), 17 deletions(-) create mode 100644 fbpcs/pc_translator/input_processing/FilterConstraint.cpp create mode 100644 fbpcs/pc_translator/input_processing/FilterConstraint.h create mode 100644 fbpcs/pc_translator/input_processing/PCInstructionSet.cpp create mode 100644 fbpcs/pc_translator/input_processing/PCInstructionSet.h create mode 100644 fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp create mode 100644 fbpcs/pc_translator/tests/input_processing/test_instruction_set.json diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp index abc23e908..f5c640967 100644 --- a/fbpcs/pc_translator/PCTranslator.cpp +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -6,11 +6,22 @@ */ #include "fbpcs/pc_translator/PCTranslator.h" +#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" + +#include +#include +#include +#include "folly/String.h" namespace pc_translator { -std::string PCTranslator::encode(const std::string& /* inputDataset */) { - throw std::runtime_error("Unimplemented"); +std::string PCTranslator::encode(const std::string& inputDataset) { + auto validInstructionSetNames = + PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_); + auto pcInstructionSets = + PCTranslator::retrieveInstructionSets(validInstructionSetNames); + PCTranslator::transformDataset(inputDataset, pcInstructionSets); + return ""; } std::string PCTranslator::decode( @@ -18,22 +29,47 @@ std::string PCTranslator::decode( throw std::runtime_error("Unimplemented"); } -void PCTranslator::retrieveInstructionSets( - std::vector& /* instructionSetNames */) { - throw std::runtime_error("Unimplemented"); +std::vector> +PCTranslator::retrieveInstructionSets( + std::vector& instructionSetNames) { + std::vector> pcInstructionSets; + for (auto instructionSetName : instructionSetNames) { + auto file_path = instructionSetBasePath + instructionSetName + ".json"; + auto contents = fbpcf::io::FileIOWrappers::readFile(file_path); + pcInstructionSets.push_back(PCTranslator::parseInstructionSet(contents)); + } + return pcInstructionSets; } std::vector PCTranslator::retrieveInstructionSetNamesForRun( - const std::string& /* pcsFeatures */) { - throw std::runtime_error("Unimplemented"); + const std::string& pcsFeatures) { + std::set enabledFeatureFlags; + folly::splitTo( + ',', + pcsFeatures, + std::inserter(enabledFeatureFlags, enabledFeatureFlags.begin()), + true); + + std::vector validPCInstructionSets; + std::copy_if( + enabledFeatureFlags.begin(), + enabledFeatureFlags.end(), + std::back_inserter(validPCInstructionSets), + [](const std::string& feature) { return feature.find("pc_instr") == 0; }); + + return validPCInstructionSets; } -void PCTranslator::transformDataset(const std::string& /* input */) { +void PCTranslator::transformDataset( + const std::string& /* inputData */, + const std::vector>& + /* pcInstructionSets */) { throw std::runtime_error("Unimplemented"); } -void PCTranslator::parseInstructionSet( - const std::string& /* instructionSet */) { - throw std::runtime_error("Unimplemented"); +std::shared_ptr PCTranslator::parseInstructionSet( + std::string& instructionSet) { + return std::make_shared(PCInstructionSet::fromDynamic( + folly::parseJson(std::move(instructionSet)))); } } // namespace pc_translator diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h index 0107e4025..efa0497c8 100644 --- a/fbpcs/pc_translator/PCTranslator.h +++ b/fbpcs/pc_translator/PCTranslator.h @@ -11,6 +11,7 @@ #include #include #include +#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" namespace pc_translator { @@ -23,7 +24,7 @@ namespace pc_translator { class PCTranslator { public: explicit PCTranslator(const std::string& pcsFeatures) - : pcsfeatures_(pcsFeatures) {} + : pcsFeatures_(pcsFeatures) {} /* * Method to encode the configurable fields in input dataset as per the active @@ -41,12 +42,19 @@ class PCTranslator { std::string decode(const std::string& aggregatedOutputDataset); private: - std::string pcsfeatures_; - void retrieveInstructionSets(std::vector& instructionSetNames); + std::string pcsFeatures_; + const std::string instructionSetBasePath = + "https://pc-translator.s3.us-west-2.amazonaws.com/"; + std::vector> retrieveInstructionSets( + std::vector& instructionSetNames); std::vector retrieveInstructionSetNamesForRun( - const std::string& pcsfeatures); - void parseInstructionSet(const std::string& instructionSet); - void transformDataset(const std::string& input); + const std::string& pcsFeatures); + std::shared_ptr parseInstructionSet( + std::string& instructionSet); + void transformDataset( + const std::string& input_data, + const std::vector>& + pcInstructionSets); }; } // namespace pc_translator diff --git a/fbpcs/pc_translator/input_processing/FilterConstraint.cpp b/fbpcs/pc_translator/input_processing/FilterConstraint.cpp new file mode 100644 index 000000000..6bc8a0dac --- /dev/null +++ b/fbpcs/pc_translator/input_processing/FilterConstraint.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "fbpcs/pc_translator/input_processing/FilterConstraint.h" + +#include +#include +#include +#include + +namespace pc_translator { +FilterConstraint::FilterConstraint( + const std::string& name, + const std::string& type, + int value) + : name_(name), type_(type), value_(value) {} + +std::string FilterConstraint::getName() const { + return name_; +} + +std::string FilterConstraint::getType() const { + return type_; +} + +int FilterConstraint::getValue() const { + return value_; +} +} // namespace pc_translator diff --git a/fbpcs/pc_translator/input_processing/FilterConstraint.h b/fbpcs/pc_translator/input_processing/FilterConstraint.h new file mode 100644 index 000000000..00cbbc189 --- /dev/null +++ b/fbpcs/pc_translator/input_processing/FilterConstraint.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace pc_translator { + +/* + * Class to store each filter constraint include in the PC instruction set. + */ +class FilterConstraint { + public: + FilterConstraint(const std::string& name, const std::string& type, int value); + + /* + * Name of the filter constraint i.e. the field on which this filter is to be + * applied. + */ + std::string getName() const; + + /* + * Constraint type i.e. LT, LTE, EQ, NEQ etc. + */ + std::string getType() const; + + int getValue() const; + + private: + std::string name_; + std::string type_; + int value_; +}; + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/input_processing/PCInstructionSet.cpp b/fbpcs/pc_translator/input_processing/PCInstructionSet.cpp new file mode 100644 index 000000000..bd1c995d8 --- /dev/null +++ b/fbpcs/pc_translator/input_processing/PCInstructionSet.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" + +#include +#include +#include +#include +#include + +namespace pc_translator { + +const std::vector& PCInstructionSet::getGroupByIds() const { + return groupByIds; +} + +const std::vector& PCInstructionSet::getFilterConstraints() + const { + return filterConstraints; +} + +PCInstructionSet PCInstructionSet::fromDynamic(const folly::dynamic& obj) { + PCInstructionSet pcInstructionSet; + auto aggregationConfig = obj["aggregated_metrics"]; + auto groupByFields = aggregationConfig["group_by"]; + + for (auto groupByField : groupByFields) { + pcInstructionSet.groupByIds.push_back(groupByField.asString()); + } + + auto filterConstraintsFields = aggregationConfig["filter"]; + + for (auto& [key, constraints] : filterConstraintsFields.items()) { + std::string name = key.asString(); + for (auto constraint : constraints) { + auto constraintType = constraint["constraint_type"].asString(); + auto constraintValue = constraint["value"].asInt(); + FilterConstraint filterConstraint(name, constraintType, constraintValue); + pcInstructionSet.filterConstraints.push_back(filterConstraint); + } + } + + return pcInstructionSet; +} + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/input_processing/PCInstructionSet.h b/fbpcs/pc_translator/input_processing/PCInstructionSet.h new file mode 100644 index 000000000..01e710f54 --- /dev/null +++ b/fbpcs/pc_translator/input_processing/PCInstructionSet.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include "fbpcs/pc_translator/input_processing/FilterConstraint.h" + +namespace pc_translator { + +/* + * Class to store PC Instruction set. This class contains a list of group Ids as + * well as list of filter constraints. + */ +class PCInstructionSet { + public: + /* + * Method to all group Ids from the PC instruction set. + */ + const std::vector& getGroupByIds() const; + + /* + * Method to get all filter constraints from PC instruction set. + */ + const std::vector& getFilterConstraints() const; + + /* + * Method to get parse and create PCInstructionSet instance. + */ + static PCInstructionSet fromDynamic(const folly::dynamic& obj); + + private: + std::vector groupByIds; + std::vector filterConstraints; + + void parseJson(const std::string& json); +}; + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp new file mode 100644 index 000000000..32a38ff3a --- /dev/null +++ b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include +#include +#include "../../../emp_games/common/TestUtil.h" +#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" +#include "folly/Random.h" + +namespace pc_translator { +class TestPCInstructionSet : public ::testing::Test { + public: + protected: + std::string testInstructionSetPath_; + + void SetUp() override { + std::string baseDir = + private_measurement::test_util::getBaseDirFromPath(__FILE__); + testInstructionSetPath_ = baseDir + "test_instruction_set.json"; + } +}; + +TEST_F(TestPCInstructionSet, TestStandardWorkflowTest) { + auto pcInstructionSet = std::make_shared( + PCInstructionSet::fromDynamic(folly::parseJson( + fbpcf::io::FileIOWrappers::readFile(testInstructionSetPath_)))); + auto groupByIds = pcInstructionSet->getGroupByIds(); + auto filterConstraints = pcInstructionSet->getFilterConstraints(); + EXPECT_EQ(groupByIds.size(), 2); + EXPECT_EQ(filterConstraints.size(), 4); + EXPECT_EQ(filterConstraints[0].getName(), "gender"); + EXPECT_EQ(filterConstraints[0].getType(), "EQ"); + EXPECT_EQ(filterConstraints[0].getValue(), 0); +} + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json b/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json new file mode 100644 index 000000000..7a0008b19 --- /dev/null +++ b/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json @@ -0,0 +1,45 @@ +{ + "publisher_input": { + "num_impressions": "int", + "num_clicks": "int", + "total_spend": "int", + "opportunity_timstamp": "int", + "test_flag": "int", + "age": "int", + "gender": "Optional[int]", + "breakdown_id": "Optional[int]" + }, + "partner_input": { + "value": "int", + "event_timestamp": "int", + "partner_cohort_id": "Optional[int]" + }, + "aggregated_metrics": { + "filter": { + "age": [ + { + "constraint_type": "GTE", + "value": "25" + }, + { + "constraint_type": "LTE", + "value": "40" + } + ], + "gender": [ + { + "constraint_type": "EQ", + "value": "0" + }, + { + "constraint_type": "EQ", + "value": "1" + } + ] + }, + "group_by": [ + "age", + "gender" + ] + } +} From 92205558b93a30702ca27d976ca276702056d259 Mon Sep 17 00:00:00 2001 From: ajinkyaghonge Date: Wed, 5 Apr 2023 16:35:16 -0700 Subject: [PATCH 3/4] Called the OramEncoder library. Differential Revision: D44634384 fbshipit-source-id: 60b5ec02f4a94472bda4146e4222be1c5e222c31 --- fbpcs/pc_translator/PCTranslator.cpp | 70 ++++++++++++++++--- fbpcs/pc_translator/PCTranslator.h | 20 +++--- .../pc_translator/tests/TestPCTranslator.cpp | 36 ++++++++++ .../input_processing/TestPCInstructionSet.cpp | 5 +- ...son => pc_instr_test_instruction_set.json} | 4 -- .../tests/publisher_unittest.csv | 13 ++++ 6 files changed, 122 insertions(+), 26 deletions(-) create mode 100644 fbpcs/pc_translator/tests/TestPCTranslator.cpp rename fbpcs/pc_translator/tests/input_processing/{test_instruction_set.json => pc_instr_test_instruction_set.json} (89%) create mode 100644 fbpcs/pc_translator/tests/publisher_unittest.csv diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp index f5c640967..ea27e69eb 100644 --- a/fbpcs/pc_translator/PCTranslator.cpp +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -10,7 +10,13 @@ #include #include +#include +#include +#include +#include #include +#include +#include "fbpcs/emp_games/common/Csv.h" #include "folly/String.h" namespace pc_translator { @@ -20,8 +26,12 @@ std::string PCTranslator::encode(const std::string& inputDataset) { PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_); auto pcInstructionSets = PCTranslator::retrieveInstructionSets(validInstructionSetNames); - PCTranslator::transformDataset(inputDataset, pcInstructionSets); - return ""; + if (pcInstructionSets.empty()) { + // No instruction set found. return the input dataset path. + return inputDataset; + } + return PCTranslator::transformDataset( + inputDataset, pcInstructionSets.front()); } std::string PCTranslator::decode( @@ -34,7 +44,13 @@ PCTranslator::retrieveInstructionSets( std::vector& instructionSetNames) { std::vector> pcInstructionSets; for (auto instructionSetName : instructionSetNames) { - auto file_path = instructionSetBasePath + instructionSetName + ".json"; + instructionSetName.erase( + remove(instructionSetName.begin(), instructionSetName.end(), '\''), + instructionSetName.end()); + instructionSetName.erase( + remove(instructionSetName.begin(), instructionSetName.end(), ' '), + instructionSetName.end()); + auto file_path = instructionSetBasePath_ + instructionSetName + ".json"; auto contents = fbpcf::io::FileIOWrappers::readFile(file_path); pcInstructionSets.push_back(PCTranslator::parseInstructionSet(contents)); } @@ -55,16 +71,52 @@ std::vector PCTranslator::retrieveInstructionSetNamesForRun( enabledFeatureFlags.begin(), enabledFeatureFlags.end(), std::back_inserter(validPCInstructionSets), - [](const std::string& feature) { return feature.find("pc_instr") == 0; }); + [](const std::string& feature) { + return feature.find("pc_instr") != std::string::npos; + }); return validPCInstructionSets; } -void PCTranslator::transformDataset( - const std::string& /* inputData */, - const std::vector>& - /* pcInstructionSets */) { - throw std::runtime_error("Unimplemented"); +std::string PCTranslator::transformDataset( + const std::string& inputData, + std::shared_ptr pcInstructionSet) { + // Parse the input CSV + auto lineNo = 0; + std::vector> inputColums; + private_measurement::csv::readCsv( + inputData, + [&](const std::vector& header, + const std::vector& parts) { + std::vector inputColumnPerRow; + for (std::vector::size_type i = 0; i < header.size(); + ++i) { + auto& column = header[i]; + auto value = std::atoi(parts[i].c_str()); + auto iter = std::find( + pcInstructionSet->getGroupByIds().begin(), + pcInstructionSet->getGroupByIds().end(), + column); + if (iter != pcInstructionSet->getGroupByIds().end()) { + inputColumnPerRow.push_back(value); + } + } + + inputColums.push_back(inputColumnPerRow); + lineNo++; + }); + + auto filters = std::make_unique< + std::vector>>(0); + std::unique_ptr encoder = + std::make_unique( + std::move(filters)); + + auto encodedIndexes = encoder->generateORAMIndexes(inputColums); + + // TODO : Append the enodedIndexes at the end of publisher output and return + // output path. + return ""; } std::shared_ptr PCTranslator::parseInstructionSet( diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h index efa0497c8..3196d6304 100644 --- a/fbpcs/pc_translator/PCTranslator.h +++ b/fbpcs/pc_translator/PCTranslator.h @@ -26,11 +26,12 @@ class PCTranslator { explicit PCTranslator(const std::string& pcsFeatures) : pcsFeatures_(pcsFeatures) {} - /* - * Method to encode the configurable fields in input dataset as per the active - * pc instruction sets for the run. This method will output the path of - * transformed input dataset, which can be used in further PC run. - */ + explicit PCTranslator( + const std::string& pcsFeatures, + const std::string& instructionSetBasePath) + : pcsFeatures_(pcsFeatures), + instructionSetBasePath_(instructionSetBasePath) {} + std::string encode(const std::string& inputDataset); /* @@ -43,7 +44,7 @@ class PCTranslator { private: std::string pcsFeatures_; - const std::string instructionSetBasePath = + std::string instructionSetBasePath_ = "https://pc-translator.s3.us-west-2.amazonaws.com/"; std::vector> retrieveInstructionSets( std::vector& instructionSetNames); @@ -51,10 +52,9 @@ class PCTranslator { const std::string& pcsFeatures); std::shared_ptr parseInstructionSet( std::string& instructionSet); - void transformDataset( - const std::string& input_data, - const std::vector>& - pcInstructionSets); + std::string transformDataset( + const std::string& inputData, + std::shared_ptr pcInstructionSet); }; } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/TestPCTranslator.cpp b/fbpcs/pc_translator/tests/TestPCTranslator.cpp new file mode 100644 index 000000000..f182efa9a --- /dev/null +++ b/fbpcs/pc_translator/tests/TestPCTranslator.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include "../../emp_games/common/TestUtil.h" +#include "fbpcs/pc_translator/PCTranslator.h" + +namespace pc_translator { +class TestPCTranslator : public ::testing::Test { + public: + protected: + std::string pcs_features_; + std::string test_instruction_set_base_path_; + std::string test_publisher_input_path_; + + void SetUp() override { + pcs_features_ = + "'num_mpc_container_mutation', 'private_lift_unified_data_process', 'pc_instr_test_instruction_set'"; + std::string baseDir = + private_measurement::test_util::getBaseDirFromPath(__FILE__); + test_instruction_set_base_path_ = baseDir + "input_processing/"; + test_publisher_input_path_ = baseDir + "publisher_unittest.csv"; + } +}; + +TEST_F(TestPCTranslator, TestEncode) { + auto pcTranslator = std::make_shared( + pcs_features_, test_instruction_set_base_path_); + auto outputPath = pcTranslator->encode(test_publisher_input_path_); + EXPECT_EQ(outputPath, ""); +} +} // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp index 32a38ff3a..d4ea3fd57 100644 --- a/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp +++ b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp @@ -13,7 +13,6 @@ #include #include "../../../emp_games/common/TestUtil.h" #include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" -#include "folly/Random.h" namespace pc_translator { class TestPCInstructionSet : public ::testing::Test { @@ -24,7 +23,7 @@ class TestPCInstructionSet : public ::testing::Test { void SetUp() override { std::string baseDir = private_measurement::test_util::getBaseDirFromPath(__FILE__); - testInstructionSetPath_ = baseDir + "test_instruction_set.json"; + testInstructionSetPath_ = baseDir + "pc_instr_test_instruction_set.json"; } }; @@ -35,7 +34,7 @@ TEST_F(TestPCInstructionSet, TestStandardWorkflowTest) { auto groupByIds = pcInstructionSet->getGroupByIds(); auto filterConstraints = pcInstructionSet->getFilterConstraints(); EXPECT_EQ(groupByIds.size(), 2); - EXPECT_EQ(filterConstraints.size(), 4); + EXPECT_EQ(filterConstraints.size(), 3); EXPECT_EQ(filterConstraints[0].getName(), "gender"); EXPECT_EQ(filterConstraints[0].getType(), "EQ"); EXPECT_EQ(filterConstraints[0].getValue(), 0); diff --git a/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json b/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json similarity index 89% rename from fbpcs/pc_translator/tests/input_processing/test_instruction_set.json rename to fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json index 7a0008b19..7fa183d2f 100644 --- a/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json +++ b/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json @@ -30,10 +30,6 @@ { "constraint_type": "EQ", "value": "0" - }, - { - "constraint_type": "EQ", - "value": "1" } ] }, diff --git a/fbpcs/pc_translator/tests/publisher_unittest.csv b/fbpcs/pc_translator/tests/publisher_unittest.csv new file mode 100644 index 000000000..691ecff30 --- /dev/null +++ b/fbpcs/pc_translator/tests/publisher_unittest.csv @@ -0,0 +1,13 @@ +id_,opportunity,test_flag,opportunity_timestamp, age, gender +cfcd208495d565ef66e7dff9f98764da,1,0,1600000430, 25, 0 +c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401, 26, 1 +c81e728d9d4c2f636f067f89cc14862c,0,0,0, 44, 0 +eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0, 23, 0 +a87ff679a2f3e71d9181a67b7542122c,0,0,0, 25, 0 +e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461, 24, 1 +1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052, 25, 1 +8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831, 26, 0 +c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530, 50, 0 +45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972, 25, 1 +d3d9446802a44259755d38e6d163e820,0,0,0, 25, 0 +6512bd43d9caa6e02c990b0a82652dca,0,0,0, 25, 0 From 43322ab0c01cb07597fbd0246b80ef99b46c1551 Mon Sep 17 00:00:00 2001 From: Ajinkya Ghonge Date: Wed, 5 Apr 2023 16:35:38 -0700 Subject: [PATCH 4/4] Add logic to write output with encoded Ids. (#2286) Summary: Pull Request resolved: https://github.com/facebookresearch/fbpcs/pull/2286 # Context As per PC Translator design, we need a runtime library will be called during PC run. This library will be called at the beginning of PC run to encode specified fields in publisher side input into a encoded breakdown (aggregation) Ids based on active PC instruction sets for the run. The library will filter the active PC Instruction sets for the run based on parsing the pcs_features i.e. gatekeepers for the particular run. # Product decisions In this stack we would focus solely on functionality required for private lift runs. We would focus on the MVP implementation of the library and its integration with fbpcf ORAM encoder library in this stack. # Stack 1. Create runtime pc_translator library. 2. Add logic to retrieve and parse PC instruction set, filtered based on the active gatekeepers for the run. 3. Integrate pc_translator library with fbpcf ORAM encoder. 4. Add logic to generate transformed publisher output with encoded breakdown ID and write the output. 5. Add support for filter constraints in pc_translator. # In this diff Add logic to generate transformed publisher output with encoded breakdown ID and write the output. Differential Revision: D44645325 Privacy Context Container: L416713 fbshipit-source-id: e21b9fb2483eed0b133502a2ef3baa7ad7094a86 --- fbpcs/pc_translator/PCTranslator.cpp | 71 +++++++++++++++---- fbpcs/pc_translator/PCTranslator.h | 6 ++ .../pc_translator/tests/TestPCTranslator.cpp | 22 +++++- .../expected_transformed_publisher_input.csv | 13 ++++ 4 files changed, 95 insertions(+), 17 deletions(-) create mode 100644 fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp index ea27e69eb..af03f281d 100644 --- a/fbpcs/pc_translator/PCTranslator.cpp +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -14,24 +14,27 @@ #include #include #include +#include +#include #include #include +#include #include "fbpcs/emp_games/common/Csv.h" #include "folly/String.h" namespace pc_translator { -std::string PCTranslator::encode(const std::string& inputDataset) { +std::string PCTranslator::encode(const std::string& inputDatasetPath) { auto validInstructionSetNames = PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_); auto pcInstructionSets = PCTranslator::retrieveInstructionSets(validInstructionSetNames); if (pcInstructionSets.empty()) { // No instruction set found. return the input dataset path. - return inputDataset; + return inputDatasetPath; } return PCTranslator::transformDataset( - inputDataset, pcInstructionSets.front()); + inputDatasetPath, pcInstructionSets.front()); } std::string PCTranslator::decode( @@ -79,30 +82,43 @@ std::vector PCTranslator::retrieveInstructionSetNamesForRun( } std::string PCTranslator::transformDataset( - const std::string& inputData, + const std::string& inputDatasetPath, std::shared_ptr pcInstructionSet) { // Parse the input CSV auto lineNo = 0; std::vector> inputColums; + std::vector outputHeader; + std::vector> outputContent; private_measurement::csv::readCsv( - inputData, + inputDatasetPath, [&](const std::vector& header, const std::vector& parts) { std::vector inputColumnPerRow; + std::string column; + std::uint32_t value; + bool found = false; + std::vector outputContentPerRow; for (std::vector::size_type i = 0; i < header.size(); ++i) { - auto& column = header[i]; - auto value = std::atoi(parts[i].c_str()); - auto iter = std::find( - pcInstructionSet->getGroupByIds().begin(), - pcInstructionSet->getGroupByIds().end(), - column); - if (iter != pcInstructionSet->getGroupByIds().end()) { + column = header[i]; + value = std::atoi(parts[i].c_str()); + found = + (std::find( + pcInstructionSet->getGroupByIds().begin(), + pcInstructionSet->getGroupByIds().end(), + column) != pcInstructionSet->getGroupByIds().end()); + if (found) { inputColumnPerRow.push_back(value); + } else { + if (lineNo == 0) { + outputHeader.push_back(header[i]); + } + outputContentPerRow.push_back(parts[i]); } } inputColums.push_back(inputColumnPerRow); + outputContent.push_back(outputContentPerRow); lineNo++; }); @@ -114,9 +130,34 @@ std::string PCTranslator::transformDataset( auto encodedIndexes = encoder->generateORAMIndexes(inputColums); - // TODO : Append the enodedIndexes at the end of publisher output and return - // output path. - return ""; + auto dir = inputDatasetPath.substr(0, inputDatasetPath.rfind("/") + 1); + auto output_dataset_path = dir + "transformed_publisher_input.csv"; + + PCTranslator::putOutputData( + output_dataset_path, outputHeader, outputContent, encodedIndexes); + return output_dataset_path; +} + +void PCTranslator::putOutputData( + const std::string& output_dataset_path, + std::vector& outputHeader, + std::vector>& outputContent, + const std::vector& encodedIndexes) { + outputHeader.push_back("breakdown_id"); + + if (outputContent.size() != encodedIndexes.size()) { + throw std::runtime_error( + "Encoded index vector size should match the input vector size."); + } + + for (std::vector::size_type i = 0; i < encodedIndexes.size(); + ++i) { + auto indexVec = std::to_string(encodedIndexes[i]); + outputContent[i].push_back(indexVec); + } + + private_measurement::csv::writeCsv( + output_dataset_path, outputHeader, outputContent); } std::shared_ptr PCTranslator::parseInstructionSet( diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h index 3196d6304..d0a978855 100644 --- a/fbpcs/pc_translator/PCTranslator.h +++ b/fbpcs/pc_translator/PCTranslator.h @@ -55,6 +55,12 @@ class PCTranslator { std::string transformDataset( const std::string& inputData, std::shared_ptr pcInstructionSet); + + void putOutputData( + const std::string& output_dataset_path, + std::vector& outputHeader, + std::vector>& outputContent, + const std::vector& encodedIndexes); }; } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/TestPCTranslator.cpp b/fbpcs/pc_translator/tests/TestPCTranslator.cpp index f182efa9a..7febfea45 100644 --- a/fbpcs/pc_translator/tests/TestPCTranslator.cpp +++ b/fbpcs/pc_translator/tests/TestPCTranslator.cpp @@ -5,6 +5,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include "../../emp_games/common/TestUtil.h" #include "fbpcs/pc_translator/PCTranslator.h" @@ -16,6 +17,8 @@ class TestPCTranslator : public ::testing::Test { std::string pcs_features_; std::string test_instruction_set_base_path_; std::string test_publisher_input_path_; + std::string test_transformed_output_path_; + std::string expected_transformed_output_path_; void SetUp() override { pcs_features_ = @@ -23,7 +26,18 @@ class TestPCTranslator : public ::testing::Test { std::string baseDir = private_measurement::test_util::getBaseDirFromPath(__FILE__); test_instruction_set_base_path_ = baseDir + "input_processing/"; - test_publisher_input_path_ = baseDir + "publisher_unittest.csv"; + test_publisher_input_path_ = "/tmp/publisher_unittest.csv"; + test_transformed_output_path_ = "/tmp/transformed_publisher_input.csv"; + expected_transformed_output_path_ = + baseDir + "expected_transformed_publisher_input.csv"; + auto contents = + fbpcf::io::FileIOWrappers::readFile(baseDir + "publisher_unittest.csv"); + fbpcf::io::FileIOWrappers::writeFile(test_publisher_input_path_, contents); + } + + void TearDown() override { + std::remove(test_publisher_input_path_.c_str()); + std::remove(test_transformed_output_path_.c_str()); } }; @@ -31,6 +45,10 @@ TEST_F(TestPCTranslator, TestEncode) { auto pcTranslator = std::make_shared( pcs_features_, test_instruction_set_base_path_); auto outputPath = pcTranslator->encode(test_publisher_input_path_); - EXPECT_EQ(outputPath, ""); + auto contents = fbpcf::io::FileIOWrappers::readFile(outputPath); + auto expectedContents = + fbpcf::io::FileIOWrappers::readFile(expected_transformed_output_path_); + EXPECT_EQ(outputPath, test_transformed_output_path_); + EXPECT_EQ(contents, expectedContents); } } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv b/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv new file mode 100644 index 000000000..247407907 --- /dev/null +++ b/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv @@ -0,0 +1,13 @@ +id_,opportunity,test_flag,opportunity_timestamp,breakdown_id +cfcd208495d565ef66e7dff9f98764da,1,0,1600000430,0 +c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401,1 +c81e728d9d4c2f636f067f89cc14862c,0,0,0,2 +eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0,3 +a87ff679a2f3e71d9181a67b7542122c,0,0,0,0 +e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461,4 +1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052,5 +8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831,6 +c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530,7 +45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972,5 +d3d9446802a44259755d38e6d163e820,0,0,0,0 +6512bd43d9caa6e02c990b0a82652dca,0,0,0,0