-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathMinHash.cpp
67 lines (62 loc) · 1.53 KB
/
MinHash.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#include <iostream>
#include <cstdio>
#include <cstring>
#include <sodium.h>
#include <set>
#include <map>
#include <hash_map>
#include <cstdlib>
using namespace std;
template<int, bool[]>
class HashMap
{
int key;
bool value[];
};
class MinHash
{
private:
int *hash;
int numHash;
public:
static int Hash(int x, int a, int b, int c)
{
int hashValue = (int)((a * (x >> 4) + b * x + c) & 131071);
return abs(hashValue);
}
MinHash(int numHash)
{
this->numHash = numHash;
hash = new int[numHash];
int a = rand() % 11 + 1;
int b = rand() % 11 + 1;
int c = rand() % 11 + 1;
for (int i = 0; i < numHash; i++)
{
int x = Hash(a * b * c, a, b, c);
hash[i] = x;
}
}
map<int, bool[]> buildBitMap(set<int> set1, set<int> set2)
{
map<int, bool[]> bitArray = new HashMap<int, bool[]>();
for(T t : set1){
bitArray.put(t, new boolean[]{true,false});
}
for(T t : set2){
if(bitArray.containsKey(t)){
// item is not present in set1
bitArray.put(t, new boolean[]{true,true});
}else if(!bitArray.containsKey(t)){
// item is not present in set1
bitArray.put(t, new boolean[]{false,true});
}
}
return bitArray;
}
double similarity(set<int> set1, set<int> set2)
{
int numSets = 2;
map<int, bool []> bitMap = buildBitMap(set1, set2);
}
};