Created
July 17, 2011 13:45
-
-
Save markusl/1087604 to your computer and use it in GitHub Desktop.
Faster C++ class to map IP addresses to countries using database from http://software77.net/geo-ip/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <fstream> | |
#include <vector> | |
#include <sstream> | |
#include <algorithm> | |
#include <iostream> | |
#include <stdexcept> | |
#include <array> | |
std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) { | |
std::stringstream ss(s); | |
std::string item; | |
while(std::getline(ss, item, delim)) { | |
elems.push_back(item); | |
} | |
return elems; | |
} | |
std::vector<std::string> split(const std::string &s, char delim) { | |
std::vector<std::string> elems; | |
return split(s, delim, elems); | |
} | |
template <class T> | |
T stringTo(const std::string &str) | |
{ | |
T result; | |
std::stringstream ss(str); | |
ss >> result; | |
return result; | |
} | |
template <class Ty, class Container> | |
std::vector<Ty> convertContainerTo(const Container &source) | |
{ | |
std::vector<Ty> result; | |
std::for_each(source.begin(), source.end(), | |
[&result](const std::string &it){ result.push_back(stringTo<Ty>(it)); }); | |
return result; | |
} | |
typedef unsigned int uint32_t; | |
typedef uint32_t IpAddress_t; | |
/** IP address mapping entry */ | |
class IpAddressMapping { | |
public: | |
std::string country; | |
IpAddress_t startAddress; | |
}; | |
/** Class for mapping IP addresses to countries using database | |
* from http://software77.net/geo-ip/ */ | |
class IpToCountry | |
{ | |
std::array<std::vector<IpAddressMapping>, 256> m_countryIpList; | |
static unsigned char GetIndexFromAddress(IpAddress_t address) | |
{ | |
return address >> (3*8); | |
} | |
IpAddressMapping GetCountryFromIndex(const IpAddress_t address, unsigned char index) const | |
{ | |
const auto &list = m_countryIpList[index]; | |
auto it = std::find_if(list.rbegin(), list.rend(), | |
[address](IpAddressMapping it) { return it.startAddress <= address; }); | |
if(it == list.rend()) | |
return GetCountryFromIndex(address, index-1); | |
return *it; | |
} | |
public: | |
/** Construct new IP-to-country mapper from the specified file. */ | |
IpToCountry(const std::string &FileName = "IpToCountry.csv") | |
{ | |
std::ifstream file(FileName); | |
while(file.good() && !file.eof()) | |
{ | |
std::string line; | |
std::getline(file, line); | |
if(line.find_first_of('#') == std::string::npos && line.length() > 0) | |
{ | |
IpAddressMapping mapping = ParseSingleLine(line); | |
m_countryIpList[GetIndexFromAddress(mapping.startAddress)].push_back(mapping); | |
} | |
} | |
} | |
/** Find the country for given IP address or throw std::exception. */ | |
IpAddressMapping GetCountry(const std::string &address) const | |
{ | |
IpAddress_t integerIp = IntegerFromIp(address); | |
return GetCountryFromIndex(integerIp, GetIndexFromAddress(integerIp)); | |
} | |
/** Convert a human-readable ipv4 address to integer */ | |
static IpAddress_t IntegerFromIp(const std::string &ip) | |
{ | |
auto tokens = split(ip, '.'); | |
auto integers = convertContainerTo<uint32_t>(tokens); | |
return (integers[0] << (3*8)) + | |
(integers[1] << (2*8)) + | |
(integers[2] << (1*8)) + | |
integers[3]; | |
} | |
private: | |
// File format: | |
// "1464729600","1464860671","ripencc","1117497600","DE","DEU","Germany" | |
static IpAddressMapping ParseSingleLine(const std::string &line) | |
{ | |
IpAddressMapping mapping; | |
auto tokens = split(line, ','); | |
mapping.country = tokens[6].substr(1, tokens[6].length()-2); | |
mapping.startAddress = stringTo<uint32_t>(tokens[0].substr(1, tokens[0].length()-2)); | |
return mapping; | |
} | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Example usage, measuring the speed difference between optimized and first implementation | |
std::vector<std::string> GetIps() | |
{ | |
std::vector<std::string> ips; | |
std::ifstream file("ip_addresses.txt"); | |
while(file.good() && !file.eof()) | |
{ | |
std::string line; | |
std::getline(file, line); | |
ips.push_back(line); | |
} | |
return ips; | |
} | |
template <class T> void DoTimedRun(T &mapping, const char *description, const std::vector<std::string> &ips) | |
{ | |
time_t start, end; | |
std::time(&start); | |
std::vector<IpAddressMapping> mappings(ips.size()); | |
std::transform(ips.begin(), ips.end(), mappings.begin(), | |
[&mapping](const std::string &it) { | |
return mapping.GetCountry(it); | |
}); | |
std::time(&end); | |
std::cout << description << " took " << difftime(end, start) << " seconds" << std::endl; | |
} | |
void DoItFast(const std::vector<std::string> &ips) | |
{ | |
IpToCountry mapping("IpToCountry.csv"); | |
DoTimedRun(mapping, "Fast", ips); | |
} | |
void DoItSlow(const std::vector<std::string> &ips) | |
{ | |
// See https://gist.github.com/1086449 | |
IpToCountrySlow mapping("IpToCountry.csv"); | |
DoTimedRun(mapping, "Slow", ips); | |
} | |
// Example output with 100 000 ip addresses: | |
// Fast took 6 seconds | |
// Slow took 155 seconds | |
int main() | |
{ | |
std::vector<std::string> ips = GetIps(); | |
DoItFast(ips); | |
DoItSlow(ips); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment