Skip to content

Instantly share code, notes, and snippets.

@sakethramanujam
Created December 7, 2019 06:40
Show Gist options
  • Save sakethramanujam/7a726e7f2e9ce0c031e56ce55320fd9b to your computer and use it in GitHub Desktop.
Save sakethramanujam/7a726e7f2e9ce0c031e56ce55320fd9b to your computer and use it in GitHub Desktop.
# !usr/bin/env python3
from bs4 import BeautifulSoup
import sys
import pandas as pd
import argparse
def args():
parser = argparse.ArgumentParser()
parser.add_argument('-f','--filename',help='path to input', type=str)
parser.add_argument('-o','--output',help='Path to store the file', type=str)
args = parser.parse_args()
return args
def _row_data(row):
tds = row.findAll('td')
data = [td.text if td.text else 'Live' for td in tds]
return data
def _get_data(soup):
rows = soup.findAll('tr')
data = [_row_data(row) for row in rows]
return data
def table_data(filename):
file = open(filename)
soup = BeautifulSoup(file, 'html.parser')
data = _get_data(soup)
return data
def generate_csv(list_data, output_file):
cols = ['Sno','Blood Bank','Category','Availability','Last Updated','Type']
df = pd.DataFrame(data=list_data, columns=cols).dropna()
df.to_csv(output_file,index=False)
print(f'{output_file} Saved!')
def main():
filename = args().filename
output = args().output
table = table_data(filename=filename)
generate_csv(list_data=table, output_file=output)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment