pboothe · March 29, 2019 22:32
diff --git a/walk.py b/walk.py
 GeoIP_country_continent = [
    "--", "AS", "EU", "EU", "AS", "AS", "NA", "NA", "EU", "AS", "NA", "AF",
    "AN", "SA", "OC", "EU", "OC", "NA", "AS", "EU", "NA", "AS", "EU", "AF",
    "EU", "AS", "AF", "AF", "NA", "AS", "SA", "SA", "NA", "AS", "AN", "AF",
    "EU", "NA", "NA", "AS", "AF", "AF", "AF", "EU", "AF", "OC", "SA", "AF",
    "AS", "SA", "NA", "NA", "AF", "AS", "AS", "EU", "EU", "AF", "EU", "NA",
    "NA", "AF", "SA", "EU", "AF", "AF", "AF", "EU", "AF", "EU", "OC", "SA",
    "OC", "EU", "EU", "NA", "AF", "EU", "NA", "AS", "SA", "AF", "EU", "NA",
    "AF", "AF", "NA", "AF", "EU", "AN", "NA", "OC", "AF", "SA", "AS", "AN",
    "NA", "EU", "NA", "EU", "AS", "EU", "AS", "AS", "AS", "AS", "AS", "EU",
    "EU", "NA", "AS", "AS", "AF", "AS", "AS", "OC", "AF", "NA", "AS", "AS",
    "AS", "NA", "AS", "AS", "AS", "NA", "EU", "AS", "AF", "AF", "EU", "EU",
    "EU", "AF", "AF", "EU", "EU", "AF", "OC", "EU", "AF", "AS", "AS", "AS",
    "OC", "NA", "AF", "NA", "EU", "AF", "AS", "AF", "NA", "AS", "AF", "AF",
    "OC", "AF", "OC", "AF", "NA", "EU", "EU", "AS", "OC", "OC", "OC", "AS",
    "NA", "SA", "OC", "OC", "AS", "AS", "EU", "NA", "OC", "NA", "AS", "EU",
    "OC", "SA", "AS", "AF", "EU", "EU", "AF", "AS", "OC", "AF", "AF", "EU",
    "AS", "AF", "EU", "EU", "EU", "AF", "EU", "AF", "AF", "SA", "AF", "NA",
    "AS", "AF", "NA", "AF", "AN", "AF", "AS", "AS", "OC", "AS", "AF", "OC",
    "AS", "EU", "NA", "OC", "AS", "AF", "EU", "AF", "OC", "NA", "SA", "AS",
    "EU", "NA", "SA", "NA", "NA", "AS", "OC", "OC", "OC", "AS", "AF", "EU",
    "AF", "AF", "EU", "AF", "--", "--", "--", "EU", "EU", "EU", "EU", "NA",
    "NA", "NA", "AF", "--"]

 GeoIP_country_code = [
    "--", "AP", "EU", "AD", "AE", "AF", "AG", "AI", "AL", "AM", "CW", "AO",
    "AQ", "AR", "AS", "AT", "AU", "AW", "AZ", "BA", "BB", "BD", "BE", "BF",
    "BG", "BH", "BI", "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV", "BW",
    "BY", "BZ", "CA", "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM",
    "CN", "CO", "CR", "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", "DM",
    "DO", "DZ", "EC", "EE", "EG", "EH", "ER", "ES", "ET", "FI", "FJ", "FK",
    "FM", "FO", "FR", "SX", "GA", "GB", "GD", "GE", "GF", "GH", "GI", "GL",
    "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", "GW", "GY", "HK", "HM",
    "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IO", "IQ", "IR", "IS",
    "IT", "JM", "JO", "JP", "KE", "KG", "KH", "KI", "KM", "KN", "KP", "KR",
    "KW", "KY", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
    "LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK", "ML", "MM", "MN", "MO",
    "MP", "MQ", "MR", "MS", "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
    "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", "NR", "NU", "NZ", "OM",
    "PA", "PE", "PF", "PG", "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
    "PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA", "SB", "SC", "SD", "SE",
    "SG", "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
    "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", "TK", "TM", "TN", "TO",
    "TL", "TR", "TT", "TV", "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
    "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", "WS", "YE", "YT", "RS",
    "ZA", "ZM", "ME", "ZW", "A1", "A2", "O1", "AX", "GG", "IM", "JE", "BL",
    "MF", "BQ", "SS", "O1"]


 def toint(b):
    v = 0
    for i in range(0, 3):
        v += (ord(b[i]) << (i * 8))
    return v


 cache = open('20150108T080000Z-GeoLiteCity.dat').read()
 record_pair_length = 6
 # TODO: look for 0xff, 0xff, 0xff in last 20 bytes.
 # TODO: read first byte after 3 x 0xff and verify it matches GEOIP_CITY_EDITION_REV*
 # TODO: read last three bytes as int.
 databaseSegments = 2586637
 gi_size = len(cache)
 buf = cache

 # 74.73.49.157
 ipnum = 1246310813
 ipnum = 1

 def get_index(ipnum):
    offset = 0
    for depth in range(31, -1, -1):
        byte_offset = record_pair_length * offset
        if (byte_offset > gi_size - record_pair_length):
            print 'byte_offset:', byte_offset, gi_size, record_pair_length
            break

        # print 'offset: ', byte_offset
        # print 'x: ', offset
        buf = cache[byte_offset:byte_offset+6]
        rhs = ((ord(buf[3 * 1 + 0]) << (0 * 8)) +
                 (ord(buf[3 * 1 + 1]) << (1 * 8)) +
                 (ord(buf[3 * 1 + 2]) << (2 * 8)))
        lhs = ((ord(buf[3 * 0 + 0]) << (0 * 8)) +
                 (ord(buf[3 * 0 + 1]) << (1 * 8)) +
                 (ord(buf[3 * 0 + 2]) << (2 * 8)))

        if (ipnum & (1 << depth)):
            # /* Take the right-hand branch */
            x = rhs
        else:
            # /* Take the left-hand branch */
            x = lhs

        print 'offset, x:', lhs, rhs, offset, x, 32 - depth
        if (x >= databaseSegments):
            # print 'bx:', x
            break
        offset = x

    return x, 32-depth, depth



 record_length = 3
 full_record_length = 50

 def record(cache, x):
    record_pointer = x + (2 * record_length - 1) * databaseSegments
    r = cache[record_pointer:record_pointer+full_record_length]
    f = r.split('\x00')
    c = ord(f[0][0])
    lat = toint(f[3][0:3]) / 10000.0 - 180
    lon = toint(f[3][3:6]) / 10000.0 - 180
    # network,
    # geoname_id
    # registered_country_geoname_id
    # represented_country_geoname_id
    # is_anonymous_proxy
    # is_satellite_provider
    # postal_code
    # latitude
    # longitude
    # accuracy_radius

    return {
        'continent_code': GeoIP_country_continent[c],
        'country_code': GeoIP_country_code[c],
        'region': f[0][1:],
        'city': f[1],
        'zipcode': f[2],
        'latitude': lat,
        'longitude': lon,
    }


 #i = 0
 #prev = []
 #while i < 0xffffffff:
 #    curr = get_index(i)
 #    if prev:
 #        if prev[0] != curr[0]:
 #            print 'prev', prev
 #            print 'curr', curr
 #            break
 #    i+=1
 #    prev = curr

 import ipaddr
 def walk(record_offset, block, block_size, cache):
  byte_offset = record_offset * record_pair_length
  lhs = ((ord(cache[byte_offset+0]) << (0 * 8)) +
           (ord(cache[byte_offset+1]) << (1 * 8)) +
           (ord(cache[byte_offset+2]) << (2 * 8)))
  rhs = ((ord(cache[byte_offset+3]) << (0 * 8)) +
         (ord(cache[byte_offset+4]) << (1 * 8)) +
         (ord(cache[byte_offset+5]) << (2 * 8)))
  if lhs > databaseSegments:
    print record(cache, lhs), str(ipaddr.IPAddress(block, version=4)) + "/" + str(block_size)
  else:
    walk(lhs, block, block_size+1, cache)

  block |= 1 << (32-block_size)
  if rhs > databaseSegments:
    print record(cache, rhs), str(ipaddr.IPAddress(block, version=4)) + "/" + str(block_size)
  else:
    walk(rhs, block, block_size+1, cache)


 walk(0, 1<<24, 0, cache)
	GeoIP_country_continent = [
	"--", "AS", "EU", "EU", "AS", "AS", "NA", "NA", "EU", "AS", "NA", "AF",
	"AN", "SA", "OC", "EU", "OC", "NA", "AS", "EU", "NA", "AS", "EU", "AF",
	"EU", "AS", "AF", "AF", "NA", "AS", "SA", "SA", "NA", "AS", "AN", "AF",
	"EU", "NA", "NA", "AS", "AF", "AF", "AF", "EU", "AF", "OC", "SA", "AF",
	"AS", "SA", "NA", "NA", "AF", "AS", "AS", "EU", "EU", "AF", "EU", "NA",
	"NA", "AF", "SA", "EU", "AF", "AF", "AF", "EU", "AF", "EU", "OC", "SA",
	"OC", "EU", "EU", "NA", "AF", "EU", "NA", "AS", "SA", "AF", "EU", "NA",
	"AF", "AF", "NA", "AF", "EU", "AN", "NA", "OC", "AF", "SA", "AS", "AN",
	"NA", "EU", "NA", "EU", "AS", "EU", "AS", "AS", "AS", "AS", "AS", "EU",
	"EU", "NA", "AS", "AS", "AF", "AS", "AS", "OC", "AF", "NA", "AS", "AS",
	"AS", "NA", "AS", "AS", "AS", "NA", "EU", "AS", "AF", "AF", "EU", "EU",
	"EU", "AF", "AF", "EU", "EU", "AF", "OC", "EU", "AF", "AS", "AS", "AS",
	"OC", "NA", "AF", "NA", "EU", "AF", "AS", "AF", "NA", "AS", "AF", "AF",
	"OC", "AF", "OC", "AF", "NA", "EU", "EU", "AS", "OC", "OC", "OC", "AS",
	"NA", "SA", "OC", "OC", "AS", "AS", "EU", "NA", "OC", "NA", "AS", "EU",
	"OC", "SA", "AS", "AF", "EU", "EU", "AF", "AS", "OC", "AF", "AF", "EU",
	"AS", "AF", "EU", "EU", "EU", "AF", "EU", "AF", "AF", "SA", "AF", "NA",
	"AS", "AF", "NA", "AF", "AN", "AF", "AS", "AS", "OC", "AS", "AF", "OC",
	"AS", "EU", "NA", "OC", "AS", "AF", "EU", "AF", "OC", "NA", "SA", "AS",
	"EU", "NA", "SA", "NA", "NA", "AS", "OC", "OC", "OC", "AS", "AF", "EU",
	"AF", "AF", "EU", "AF", "--", "--", "--", "EU", "EU", "EU", "EU", "NA",
	"NA", "NA", "AF", "--"]

	GeoIP_country_code = [
	"--", "AP", "EU", "AD", "AE", "AF", "AG", "AI", "AL", "AM", "CW", "AO",
	"AQ", "AR", "AS", "AT", "AU", "AW", "AZ", "BA", "BB", "BD", "BE", "BF",
	"BG", "BH", "BI", "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV", "BW",
	"BY", "BZ", "CA", "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM",
	"CN", "CO", "CR", "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", "DM",
	"DO", "DZ", "EC", "EE", "EG", "EH", "ER", "ES", "ET", "FI", "FJ", "FK",
	"FM", "FO", "FR", "SX", "GA", "GB", "GD", "GE", "GF", "GH", "GI", "GL",
	"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", "GW", "GY", "HK", "HM",
	"HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IO", "IQ", "IR", "IS",
	"IT", "JM", "JO", "JP", "KE", "KG", "KH", "KI", "KM", "KN", "KP", "KR",
	"KW", "KY", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
	"LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK", "ML", "MM", "MN", "MO",
	"MP", "MQ", "MR", "MS", "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
	"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", "NR", "NU", "NZ", "OM",
	"PA", "PE", "PF", "PG", "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
	"PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA", "SB", "SC", "SD", "SE",
	"SG", "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
	"SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", "TK", "TM", "TN", "TO",
	"TL", "TR", "TT", "TV", "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
	"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", "WS", "YE", "YT", "RS",
	"ZA", "ZM", "ME", "ZW", "A1", "A2", "O1", "AX", "GG", "IM", "JE", "BL",
	"MF", "BQ", "SS", "O1"]


	def toint(b):
	v = 0
	for i in range(0, 3):
	v += (ord(b[i]) << (i * 8))
	return v


	cache = open('20150108T080000Z-GeoLiteCity.dat').read()
	record_pair_length = 6
	# TODO: look for 0xff, 0xff, 0xff in last 20 bytes.
	# TODO: read first byte after 3 x 0xff and verify it matches GEOIP_CITY_EDITION_REV*
	# TODO: read last three bytes as int.
	databaseSegments = 2586637
	gi_size = len(cache)
	buf = cache

	# 74.73.49.157
	ipnum = 1246310813
	ipnum = 1

	def get_index(ipnum):
	offset = 0
	for depth in range(31, -1, -1):
	byte_offset = record_pair_length * offset
	if (byte_offset > gi_size - record_pair_length):
	print 'byte_offset:', byte_offset, gi_size, record_pair_length
	break

	# print 'offset: ', byte_offset
	# print 'x: ', offset
	buf = cache[byte_offset:byte_offset+6]
	rhs = ((ord(buf[3 * 1 + 0]) << (0 * 8)) +
	(ord(buf[3 * 1 + 1]) << (1 * 8)) +
	(ord(buf[3 * 1 + 2]) << (2 * 8)))
	lhs = ((ord(buf[3 * 0 + 0]) << (0 * 8)) +
	(ord(buf[3 * 0 + 1]) << (1 * 8)) +
	(ord(buf[3 * 0 + 2]) << (2 * 8)))

	if (ipnum & (1 << depth)):
	# /* Take the right-hand branch */
	x = rhs
	else:
	# /* Take the left-hand branch */
	x = lhs

	print 'offset, x:', lhs, rhs, offset, x, 32 - depth
	if (x >= databaseSegments):
	# print 'bx:', x
	break
	offset = x

	return x, 32-depth, depth



	record_length = 3
	full_record_length = 50

	def record(cache, x):
	record_pointer = x + (2 * record_length - 1) * databaseSegments
	r = cache[record_pointer:record_pointer+full_record_length]
	f = r.split('\x00')
	c = ord(f[0][0])
	lat = toint(f[3][0:3]) / 10000.0 - 180
	lon = toint(f[3][3:6]) / 10000.0 - 180
	# network,
	# geoname_id
	# registered_country_geoname_id
	# represented_country_geoname_id
	# is_anonymous_proxy
	# is_satellite_provider
	# postal_code
	# latitude
	# longitude
	# accuracy_radius

	return {
	'continent_code': GeoIP_country_continent[c],
	'country_code': GeoIP_country_code[c],
	'region': f[0][1:],
	'city': f[1],
	'zipcode': f[2],
	'latitude': lat,
	'longitude': lon,
	}


	#i = 0
	#prev = []
	#while i < 0xffffffff:
	# curr = get_index(i)
	# if prev:
	# if prev[0] != curr[0]:
	# print 'prev', prev
	# print 'curr', curr
	# break
	# i+=1
	# prev = curr

	import ipaddr
	def walk(record_offset, block, block_size, cache):
	byte_offset = record_offset * record_pair_length
	lhs = ((ord(cache[byte_offset+0]) << (0 * 8)) +
	(ord(cache[byte_offset+1]) << (1 * 8)) +
	(ord(cache[byte_offset+2]) << (2 * 8)))
	rhs = ((ord(cache[byte_offset+3]) << (0 * 8)) +
	(ord(cache[byte_offset+4]) << (1 * 8)) +
	(ord(cache[byte_offset+5]) << (2 * 8)))
	if lhs > databaseSegments:
	print record(cache, lhs), str(ipaddr.IPAddress(block, version=4)) + "/" + str(block_size)
	else:
	walk(lhs, block, block_size+1, cache)

	block \|= 1 << (32-block_size)
	if rhs > databaseSegments:
	print record(cache, rhs), str(ipaddr.IPAddress(block, version=4)) + "/" + str(block_size)
	else:
	walk(rhs, block, block_size+1, cache)


	walk(0, 1<<24, 0, cache)