Created
July 13, 2011 15:35
-
-
Save markusl/1080544 to your computer and use it in GitHub Desktop.
Fast F# API for IpToCountry.csv - Class for mapping IP addresses to countries in FSharp using GPL'd CSV database from http://software77.net/geo-ip/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module IpToCountry | |
open System | |
open System.IO | |
type CountryCode = { code : string } | |
type IpAddressMapping = { cc : CountryCode; startAddress : uint32; } | |
/// Class to store the IP-addresses in 255 different buckets | |
/// countryIpList = The list of IP address mappings to store in this intance | |
type IpAddressStore(countryIpList) = | |
let mutable arr = Array.create<IpAddressMapping list> 256 [] | |
do // Build the list in the constructor | |
for item in countryIpList do | |
let index = (int)(item.startAddress >>> (3*8)) | |
let prevList = arr.[index] | |
arr.[index] <- (item :: prevList); | |
/// Check if the given IP address mapping matches the given integral ip | |
let entryContainsTheIp integerIp ipMapping = | |
ipMapping.startAddress <= integerIp | |
/// Find the IP address mapping for the given integral ip or raise IndexOutOfRangeException | |
member this.FindMapping(integerIp) = | |
let index = (int)(integerIp >>> (3*8)) | |
let rec tryFindWithCurrentIndex index = | |
match arr.[index] |> List.tryFind (entryContainsTheIp integerIp) with | |
| Some(mapping) -> mapping | |
| None -> tryFindWithCurrentIndex (index-1) | |
tryFindWithCurrentIndex index | |
/// Construct new class for mapping ip addresses to countries using | |
/// database from http://software77.net/geo-ip/ | |
type IpToCountry(?fileName) = | |
let fileName = defaultArg fileName "IpToCountry.csv" | |
// Read all non-comment lines | |
let ipToCountryLines = File.ReadAllLines(fileName) |> Seq.filter (fun line -> not(line.StartsWith("#"))) | |
// File format: | |
// "1464729600","1464860671","ripencc","1117497600","DE","DEU","Germany" | |
let parseSingleLine (line : string) = | |
let lineSplit = line.Replace("\"", "").Split(',') | |
let ipStart = Convert.ToUInt32(lineSplit.[0]) | |
let ipEnd = Convert.ToUInt32(lineSplit.[1]) | |
{ cc = { code = lineSplit.[6] }; startAddress = ipStart } | |
// map to format useful for further processing | |
let countryIpList = ipToCountryLines |> Seq.map parseSingleLine | |
let store = IpAddressStore(countryIpList) | |
/// Convert a human-readable ipv4 address to integer | |
let ipToInteger (ip : string) = | |
let ipParts = ip.Split('.') |> Array.map (fun byte -> Byte.Parse(byte)) | |
let ipNumbers = ipParts |> Array.mapi (fun i part -> ((uint32)part <<< ((3-i)*8))) | |
ipNumbers |> Array.sum | |
/// Map an ipv4 address to country or throw KeyNotFoundException or | |
/// IndexOutOfRangeException in case of an error | |
member this.getCountry (ip) = | |
let integerIp = ipToInteger ip | |
store.FindMapping(integerIp).cc.code |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Example of processing large amount of data using parallel LINQ queries | |
open System | |
open System.Linq | |
open System.IO | |
open IpToCountry | |
let measureFunc f s = | |
let sw = System.Diagnostics.Stopwatch.StartNew() | |
let count = (f |> List.ofSeq).Length | |
printfn "%s completed for %d items in %fs" s count sw.Elapsed.TotalSeconds | |
sw.Stop() | |
let ips = File.ReadAllLines(@"ip_addresses.txt") |> Seq.take 10000 |> Array.ofSeq | |
let ipToCountryFast = | |
let ipToCountry = IpToCountry() | |
ips.AsParallel().Select(fun f -> (ipToCountry.getCountry f)) | |
let ipToCountrySlow = | |
let ipToCountry = IpToCountrySlow() | |
ips.AsParallel().Select(fun f -> (ipToCountry.getCountry f)) | |
measureFunc ipToCountryFast "IpToCountryFast" | |
measureFunc ipToCountrySlow "IpToCountrySlow" | |
// IpToCountryFast completed for 10000 items in 0.124317s | |
// IpToCountrySlow completed for 10000 items in 51.995045s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open IpToCountry | |
// Basic example usage | |
let testIps = ["205.188.215.229"; "74.55.102.12"; "94.245.116.7"; "158.127.18.60"; "192.71.238.76"; "194.106.111.42"] | |
let ipToCountry = IpToCountrySlow() | |
testIps |> List.map ipToCountry.getCountry |> List.iter (printfn "%s") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
IpToCountrySlow can be found at https://gist.github.com/1078445