Skip to content

Instantly share code, notes, and snippets.

@CharlyWargnier
Created February 8, 2020 17:30
Show Gist options
  • Save CharlyWargnier/4b89759637292c95cda3fbad79fdb80f to your computer and use it in GitHub Desktop.
Save CharlyWargnier/4b89759637292c95cda3fbad79fdb80f to your computer and use it in GitHub Desktop.
#Define the function
def reverse_dns(ip_address):
'''
This method returns the true host name for a
given IP address
'''
host_name = socket.gethostbyaddr(ip_address)
reversed_dns = host_name[0]
return reversed_dns
def forward_dns(reversed_dns):
'''
This method returns the first IP address string
that responds as the given domain name
'''
try:
data = socket.gethostbyname(reversed_dns)
ip = str(data)
return ip
except Exception:
print('error')
return False
def ip_match(ip, true_ip):
'''
This method takes an ip address used for a reverse dns lookup
and an ip address returned from a forward dns lookup
and determines if they match.
'''
if ip == true_ip:
ip_match = True
else:
ip_match = False
return ip_match
def confirm_googlebot(host, ip_match):
'''
This method takes a hostname and the results of the ip_match() method
and determines if an ip address from a log file is truly googlebot
'''
googlebot = False
if host != False:
if host.endswith('.googlebot.com') or host.endswith('.google.com'):
if ip_match == True:
#googlebot = 'Yes'
googlebot = True
return googlebot
def run(ip):
try:
host = reverse_dns(ip)
true_ip = forward_dns(host)
is_match = ip_match(ip, true_ip)
return confirm_googlebot(host, is_match)
except:
#return 'No'
return False
#Run the function against the IP addresses listed in the dataframe
df['isRealGbot?'] = df['IP'].apply(run)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment