Skip to content

Instantly share code, notes, and snippets.

@InJeCTrL
Created April 20, 2020 04:41
Show Gist options
  • Select an option

  • Save InJeCTrL/47c640dd2c54acd909722ac2ab16c4f6 to your computer and use it in GitHub Desktop.

Select an option

Save InJeCTrL/47c640dd2c54acd909722ac2ab16c4f6 to your computer and use it in GitHub Desktop.
masscan_json_parser
import json
import urllib.parse
import urllib.request
import pandas as pd
import numpy as np
def fetchURLresponse(URL, data = None):
while True:
try:
response = urllib.request.urlopen(urllib.request.Request(URL, data = data), timeout = 5)
retdata = response.read().decode('utf-8')
response.close()
return retdata
except:
continue
def getLocates(iplist):
postdata = json.dumps(iplist, ensure_ascii = False).encode('utf-8')
data_raw = fetchURLresponse("http://ip-api.com/batch?lang=zh-CN&fields=city,country,regionName,countryCode,query",
postdata)
return json.loads(data_raw)
data = pd.DataFrame(columns = ['ip', 'port', 'status', 'ttl', 'country', 'regionName', 'city'])
with open("test.json", "r", encoding = "utf-8") as f:
for line in f:
line = line.strip("\n")
if line == "{finished: 1}":
break
linejson = json.loads(line.strip("\n").strip(","))
for ports in linejson['ports']:
new = pd.DataFrame({'ip': linejson['ip'],
'port': ports['port'],
'status': ports['status'],
'ttl': ports['ttl']}, index = [0])
data = data.append(new, ignore_index = True)
iplist = data[data['ip'].duplicated() == False]['ip'].tolist()
len_list = len(iplist)
for i in range(len_list):
_list = iplist[i : i + 100 if i + 100 <= len_list else len_list]
i += 100
locates = getLocates(_list)
for locate in locates:
data.loc[data['ip'] == locate['query'], 'country'] = locate['country']
data.loc[data['ip'] == locate['query'], 'regionName'] = locate['regionName']
data.loc[data['ip'] == locate['query'], 'city'] = locate['city']
if i > len_list:
break
data.to_csv(
'parsed.csv',
index = False,
encoding = 'gb2312' # for next: utf-8
)
print("Fin!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment