Created
April 20, 2020 04:41
-
-
Save InJeCTrL/47c640dd2c54acd909722ac2ab16c4f6 to your computer and use it in GitHub Desktop.
masscan_json_parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import urllib.parse | |
| import urllib.request | |
| import pandas as pd | |
| import numpy as np | |
| def fetchURLresponse(URL, data = None): | |
| while True: | |
| try: | |
| response = urllib.request.urlopen(urllib.request.Request(URL, data = data), timeout = 5) | |
| retdata = response.read().decode('utf-8') | |
| response.close() | |
| return retdata | |
| except: | |
| continue | |
| def getLocates(iplist): | |
| postdata = json.dumps(iplist, ensure_ascii = False).encode('utf-8') | |
| data_raw = fetchURLresponse("http://ip-api.com/batch?lang=zh-CN&fields=city,country,regionName,countryCode,query", | |
| postdata) | |
| return json.loads(data_raw) | |
| data = pd.DataFrame(columns = ['ip', 'port', 'status', 'ttl', 'country', 'regionName', 'city']) | |
| with open("test.json", "r", encoding = "utf-8") as f: | |
| for line in f: | |
| line = line.strip("\n") | |
| if line == "{finished: 1}": | |
| break | |
| linejson = json.loads(line.strip("\n").strip(",")) | |
| for ports in linejson['ports']: | |
| new = pd.DataFrame({'ip': linejson['ip'], | |
| 'port': ports['port'], | |
| 'status': ports['status'], | |
| 'ttl': ports['ttl']}, index = [0]) | |
| data = data.append(new, ignore_index = True) | |
| iplist = data[data['ip'].duplicated() == False]['ip'].tolist() | |
| len_list = len(iplist) | |
| for i in range(len_list): | |
| _list = iplist[i : i + 100 if i + 100 <= len_list else len_list] | |
| i += 100 | |
| locates = getLocates(_list) | |
| for locate in locates: | |
| data.loc[data['ip'] == locate['query'], 'country'] = locate['country'] | |
| data.loc[data['ip'] == locate['query'], 'regionName'] = locate['regionName'] | |
| data.loc[data['ip'] == locate['query'], 'city'] = locate['city'] | |
| if i > len_list: | |
| break | |
| data.to_csv( | |
| 'parsed.csv', | |
| index = False, | |
| encoding = 'gb2312' # for next: utf-8 | |
| ) | |
| print("Fin!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment