This repository was archived by the owner on Feb 7, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLocality_Sensitive_Hashing.cpp
More file actions
83 lines (83 loc) · 2.93 KB
/
Locality_Sensitive_Hashing.cpp
File metadata and controls
83 lines (83 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <iostream>
#include <vector>
#include <unordered_map>
#include <random>
#include <cmath>
class LSHHashFunction {
private:
std::vector<double> coefficients;
double bias;
public:
LSHHashFunction(int dimensions) : coefficients(dimensions), bias(0.0) {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<double> distribution(0.0, 1.0);
for (int i = 0; i < dimensions; ++i) {
coefficients[i] = distribution(gen);
}
bias = distribution(gen);
}
int hash(const std::vector<double>& vector) const {
double dotProduct = 0.0;
for (std::size_t i = 0; i < coefficients.size(); ++i) {
dotProduct += coefficients[i] * vector[i];
}
return (dotProduct + bias >= 0) ? 1 : 0;
}
};
class LocalitySensitiveHashing {
private:
int numHashFunctions;
int numBuckets;
int dimensions;
std::vector<std::vector<int>> hashTables;
std::vector<LSHHashFunction> hashFunctions;
public:
LocalitySensitiveHashing(int numHashFunctions, int numBuckets, int dimensions)
: numHashFunctions(numHashFunctions), numBuckets(numBuckets), dimensions(dimensions),
hashTables(numHashFunctions, std::vector<int>(numBuckets)), hashFunctions(numHashFunctions, LSHHashFunction(dimensions)) {}
void insert(const std::vector<double>& vector) {
for (int i = 0; i < numHashFunctions; ++i) {
int hashValue = hashFunctions[i].hash(vector);
hashTables[i][hashValue]++;
}
}
std::vector<std::vector<double>> query(const std::vector<double>& queryVector) const {
std::vector<std::vector<double>> candidates;
for (int i = 0; i < numHashFunctions; ++i) {
int hashValue = hashFunctions[i].hash(queryVector);
for (std::size_t j = 0; j < hashTables[i].size(); ++j) {
if (hashTables[i][j] > 0 && j != hashValue) {
candidates.push_back(std::vector<double>(1, j));
}
}
}
return candidates;
}
};
int main() {
int numHashFunctions = 5;
int numBuckets = 10;
int dimensions = 20;
LocalitySensitiveHashing lsh(numHashFunctions, numBuckets, dimensions);
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<double> distribution(0.0, 1.0);
for (int i = 0; i < 100; ++i) {
std::vector<double> vector(dimensions);
for (int j = 0; j < dimensions; ++j) {
vector[j] = distribution(gen);
}
lsh.insert(vector);
}
std::vector<double> queryVector(dimensions);
for (int j = 0; j < dimensions; ++j) {
queryVector[j] = distribution(gen);
}
std::vector<std::vector<double>> candidates = lsh.query(queryVector);
std::cout << "Candidates for query vector:\n";
for (const auto& candidate : candidates) {
std::cout << "Bucket Index: " << candidate[0] << "\n";
}
return 0;
}