Skip to content

Instantly share code, notes, and snippets.

@0xInfection
Last active February 29, 2020 09:05
Show Gist options
  • Save 0xInfection/dff5a57d113c292b2ba7e3348bd91434 to your computer and use it in GitHub Desktop.
Save 0xInfection/dff5a57d113c292b2ba7e3348bd91434 to your computer and use it in GitHub Desktop.
Use this script to sort out URLs from the wayback machine. :)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Written by: 0xInfection
# Context: https://twitter.com/har1sec/status/1183987309445820416
# Problem: https://gist.github.com/harisec/bff0ac7ad2e90da7e82297d945c63ef1
import optparse, sys
from urllib.parse import urlparse
narr = []
fina = []
def sameStrings(barr):
final_list = []
for num in barr:
if num not in final_list:
final_list.append(num)
return final_list
def parseUrl(url='http://yahoo.com/magazine/view.html?keyword=&byto=title'):
o = urlparse(url)
s='?'
for i in o.query.split('&'):
if o.query.split('&').index(i) != 0:
s += '&' + i.split('=')[0] + '='
else:
s += i.split('=')[0] + '='
b = o.netloc + o.path + o.params + s
return(b)
def main():
lackofart = '''
U R L S O R T E R
'''
print(lackofart)
parser = optparse.OptionParser(usage='python wsort.py -f url_list.txt')
parser.add_option('-f', '--file', dest='file_loc', help='Path of text file containing list of URLs.')
parser.add_option('-l', '--list', dest='list', action='store_true', default=False,
help='List all URLs within the file supplied by user.')
parser.add_option('-o', '--output', dest='out', default=False,
help='Name of the file where to write output URLs.')
options, arg = parser.parse_args()
if not options.file_loc:
print('[-] You must supply a filename with list of URLs with `python wsort.py -f <filename>`.')
quit()
arr = open(options.file_loc).read().splitlines()
if options.list:
print('[+] Input URLs:')
for i in arr:
print(i)
arr = open(options.file_loc).read().splitlines()
for m in arr:
d = parseUrl(url=m)
narr.append(d)
ss = sameStrings(narr)
for i in ss:
for j in range(0, len(arr)):
if (i.split('=')[0] in arr[j] and
urlparse('http://'+i.split('=')[0]).netloc == urlparse(arr[j]).netloc):
fina.append(arr[j])
break
print('[+] Sorted list:')
for i in fina:
print(i)
if options.out:
# If output directory is mentioned...
try:
f = open(options.out, 'w+')
for i in fina:
f.write(i+'\n')
f.close()
print('[+] URLs written to %s' % options.out)
except FileExistsError:
print('[-] File already exists.')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment