sakethramanujam · October 22, 2019 09:14
diff --git a/netflix-log.py b/netflix-log.py
 import pandas as pd
 import urllib
 from bs4 import BeautifulSoup
 import argparse


 def _parse():
    parser = argparse.ArgumentParser()
    parser.add_argument('-html', type=str, help='path to html file')
    parser.add_argument('-f', '--filename', type=str, help='name to save the output')
    return parser.parse_args()


 def _generate(path: str, outfile: str):
    path = 'file:///' + path
    file = urllib.request.urlopen(path)
    soup = BeautifulSoup(file.read(), 'html.parser')
    ul = soup.find('ul', attrs={'class': 'structural retable'})
    lis = ul.findAll('li')
    data = []
    for li in lis:
        device = li.find('h3').text
        divs = li.findAll('div', attrs={'class': 'activityAccess'})
        for div in divs:
            location_ip = div.find('div').text.split(' - ')
            last_used = div.find('div', attrs={'class': 'activityDate'}).text.split(',')
            date = last_used[0].split(':')[1]
            time = last_used[1]
            location = location_ip[0]
            ip = location_ip[1]
            i = {}
            i['device'] = device
            i['ip'] = ip
            i['location'] = location
            i['date'] = date
            i['time'] = time
            data.append(i)
    df = pd.DataFrame(data)
    df.to_csv(outfile, index=False)


 def main():
    args = _parse()
    out_filename = args.filename
    html_path = args.html
    print(out_filename)
    _generate(path=html_path, outfile=out_filename)


 if __name__ == '__main__':
    main()
	import pandas as pd
	import urllib
	from bs4 import BeautifulSoup
	import argparse


	def _parse():
	parser = argparse.ArgumentParser()
	parser.add_argument('-html', type=str, help='path to html file')
	parser.add_argument('-f', '--filename', type=str, help='name to save the output')
	return parser.parse_args()


	def _generate(path: str, outfile: str):
	path = 'file:///' + path
	file = urllib.request.urlopen(path)
	soup = BeautifulSoup(file.read(), 'html.parser')
	ul = soup.find('ul', attrs={'class': 'structural retable'})
	lis = ul.findAll('li')
	data = []
	for li in lis:
	device = li.find('h3').text
	divs = li.findAll('div', attrs={'class': 'activityAccess'})
	for div in divs:
	location_ip = div.find('div').text.split(' - ')
	last_used = div.find('div', attrs={'class': 'activityDate'}).text.split(',')
	date = last_used[0].split(':')[1]
	time = last_used[1]
	location = location_ip[0]
	ip = location_ip[1]
	i = {}
	i['device'] = device
	i['ip'] = ip
	i['location'] = location
	i['date'] = date
	i['time'] = time
	data.append(i)
	df = pd.DataFrame(data)
	df.to_csv(outfile, index=False)


	def main():
	args = _parse()
	out_filename = args.filename
	html_path = args.html
	print(out_filename)
	_generate(path=html_path, outfile=out_filename)


	if __name__ == '__main__':
	main()