Created
April 5, 2023 17:26
-
-
Save uglyrobot/df3c7c32ec53f20b373cae4e63e86f5d to your computer and use it in GitHub Desktop.
Take a list of domains and export a CSV of their IP info: country, isp, org, as, asname, proxy, hosting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import requests | |
import json | |
import time | |
import socket | |
# Set up the URL for the IP-API.com API endpoint | |
api_url = "http://ip-api.com/batch" | |
# Set up the fields to request from the API | |
fields = "query,status,country,isp,org,as,asname,proxy,hosting" | |
# Set up the batch size and delay | |
batch_size = 100 | |
delay = 1.5 # in seconds | |
completed_domains = [] | |
# Open the CSV file and read its contents | |
with open('~/domains_results.csv') as csvfile: | |
csv_reader = csv.reader(csvfile) | |
# Skip the header row | |
next(csv_reader) | |
# Loop through each row in the CSV file | |
for row in csv_reader: | |
# Extract the domain name from the row | |
domain = row[1] | |
# parse only the hostname | |
domain = domain.split('/')[0] | |
completed_domains.append(domain) | |
csvfile.close() | |
# Open the CSV file and read its contents | |
with open('~/domains.csv') as csvfile: | |
csv_reader = csv.reader(csvfile) | |
# Skip the header row | |
next(csv_reader) | |
# Initialize the batch counter and the list of domains for the current batch | |
batch_count = 0 | |
domains = [] | |
info = dict() | |
i = 0 | |
#create a new csv file with the headers | |
if len(completed_domains) > 0: | |
new_csv = csv.writer(open('~/domains_results.csv', 'a')) | |
else: | |
new_csv = csv.writer(open('~/domains_results.csv', 'w')) | |
new_csv.writerow(['Userid','Domain', 'Status','Country', 'ISP', 'Org', 'AS', 'AS Name', 'Proxy', 'Hosting', 'IP']) | |
# Loop through each row in the CSV file | |
for row in csv_reader: | |
# Extract the domain name from the row | |
user_id = row[0] | |
domain = row[1] | |
# parse only the hostname | |
domain = domain.split('/')[0] | |
#check if domain has already been processed | |
if domain in completed_domains: | |
#print("Domain already processed: " + domain) | |
continue | |
#lookup IP for domain | |
try: | |
ip = socket.gethostbyname(domain) | |
except: | |
print("DNS Error: " + domain) | |
new_csv.writerow([user_id, domain, "DNS Error"]) | |
continue | |
# Add the domain to the list for the current batch | |
domains.append({"query": ip}) | |
info[i] = {'user_id': user_id, 'domain': domain} | |
i += 1 | |
# Check if we've reached the batch size | |
if len(domains) == batch_size: | |
# Make a request to the API with the domains as post body (not json) in the current batch and parameters | |
try: | |
response = requests.post(api_url, params={'fields': fields}, data=json.dumps(domains)) | |
except: | |
print("API Error: " + response.text) | |
domains = [] | |
info = dict() | |
i = 0 | |
break | |
#check for errors | |
if response.status_code != 200: | |
print("Error: " + response.text) | |
domains = [] | |
info = dict() | |
i = 0 | |
break | |
# Parse the JSON response from the API | |
data = json.loads(response.text) | |
# Process the response for each domain in the batch | |
for i in range(batch_size): | |
# Extract the relevant fields from the response | |
if data[i]['status'] == 'success': | |
print( "Batch %d - %s" % (batch_count, info[i]['domain']) ) | |
new_csv.writerow([info[i]['user_id'], info[i]['domain'], data[i]['status'], data[i]['country'], data[i]['isp'], data[i]['org'], data[i]['as'], data[i]['asname'], data[i]['proxy'], data[i]['hosting'], data[i]['query']]) | |
else: | |
new_csv.writerow([info[i]['user_id'], info[i]['domain'],data[i]['status']]) | |
# Increment the batch counter and clear the list of domains for the next batch | |
batch_count += 1 | |
domains = [] | |
info = dict() | |
i = 0 | |
# Wait for the specified delay before making the next request | |
time.sleep(delay) | |
# Check if there are any remaining domains to process | |
if domains: | |
# Make a request to the API with the domains as post body (not json) in the current batch and parameters | |
response = requests.post(api_url, params={'fields': fields}, data=json.dumps(domains)) | |
#check for errors | |
if response.status_code != 200: | |
print("Error: " + response.text) | |
else: | |
# Parse the JSON response from the API | |
data = json.loads(response.text) | |
# Process the response for each domain in the batch | |
for i in range(batch_size): | |
# Extract the relevant fields from the response | |
new_csv.writerow([info[i]['user_id'], info[i]['domain'], data[i]['status'], data[i]['country'], data[i]['isp'], data[i]['org'], data[i]['as'], data[i]['asname'], data[i]['proxy'], data[i]['hosting'], data[i]['query']]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment