Skip to content

Instantly share code, notes, and snippets.

Last active April 8, 2020 06:38
Show Gist options
  • Save longlongh4/4d8c53fbbc4f47a99d060ee39c34bb99 to your computer and use it in GitHub Desktop.
Save longlongh4/4d8c53fbbc4f47a99d060ee39c34bb99 to your computer and use it in GitHub Desktop.
A demo to use FAISS to build indexes for Hamming Distance based fingerprints.
#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <faiss/IndexBinaryIVF.h>
#include <faiss/AutoTune.h>
#include <faiss/IndexBinaryFlat.h>
using namespace std;
const string hashPath = "/home/hailong/labs/ads/hash.txt";
dataset example:
class VideoHashes
VideoHashes(const string line, int64_t id);
// dont't use UUID here, because it is too big as a payload for indexing
int64_t videoID;
// uint64 needs to be represented in vector of uint8 to be inserted into FAISS
vector<uint8_t> hashes;
VideoHashes::VideoHashes(const string line, int64_t id)
videoID = id;
istringstream iss(line);
iss.ignore(38, ':');
for (uint64_t hash; iss >> hash;)
if (hash != 0)
for (int i = 0; i < sizeof(uint64_t); i++)
hashes.push_back(uint8_t(hash >> 8 * (7 - i) & 0xFF));
if (iss.peek() == ',')
int main()
vector<VideoHashes> videoHashesArray;
ifstream input(hashPath);
int64_t videoIndex = 1;
for (string line; getline(input, line);)
videoHashesArray.push_back(VideoHashes(line, videoIndex++));
printf("parsed %lu videos\n", videoHashesArray.size());
vector<uint8_t> trainData;
for (VideoHashes video : videoHashesArray)
trainData.insert(end(trainData), begin(video.hashes), end(video.hashes));
// Dimension of the vectors
int d = 64;
// Initializing the quantizer.
faiss::IndexBinaryFlat quantizer(d);
// Number of clusters.
int nlist = 32;
// Initializing index.
faiss::IndexBinaryIVF index(&quantizer, d, nlist);
index.nprobe = 4; // Number of nearest clusters to be searched per query.
index.train(trainData.size() / 8,;
for (VideoHashes video : videoHashesArray)
vector<faiss::Index::idx_t> labels(video.hashes.size() / 8, video.videoID);
index.add_with_ids(video.hashes.size() / 8,,;
cout << "total fingerprints:" << index.ntotal << endl;
// How many neighbours to return for each fingerprint
int k = 10;
// how many fingerprints in the query(we can query with more than one fingerprint)
faiss::Index::idx_t n = videoHashesArray[0].hashes.size() / 8;
// use this vector to get the distance result
vector<int32_t> distance(k * n);
// use this vector to get the labels result
vector<faiss::Index::idx_t> labels(k * n);, videoHashesArray[0], k,,;
for (int i = 0; i < n; i++)
cout << "frame index:" << i << endl;
for (int j = 0; j < k; j++)
cout << "distance:" << distance[i * 8 + j] << ',' << "labels:" << labels[i * 8 + j] << endl;
return 0;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment