revolutionisme · March 5, 2019 09:57
diff --git a/xml2csv.py b/xml2csv.py
 import pandas as pd
 import os
 from bs4 import BeautifulSoup
 import re
 import sys

 for root, dirs, files in os.walk('/toplevelfolderpath'):
    print(root, dirs, files)
    for file in files:
        if "xls" in file:
            try:
                tables = pd.read_html(os.path.join(root, file))
                html_soup = BeautifulSoup(open(os.path.join(root, file)), 'html.parser')
                text = html_soup.find(text=re.compile('Item To Search '))
                splitted_text = str(text).split(" - ")
                location = splitted_text[1].split("->")
                if len(location) == 2:
                    state = location[0]
                    district = location[1].replace(" \']", "")
                elif len(location) == 1:
                    state = location[0]
                    district = "Not available"
                else:
                    state = "Unnamed: 0"
                    district = "Unnamed: 0"
                for i, table in enumerate(tables):
                    table.rename(columns={'Unnamed: 0': state,
                                          'Unnamed: 1': district},
                                 inplace=True)
                    csv_file_name = file.replace(".xls", str(i)+".csv")
                    table.to_csv(os.path.join(root, csv_file_name), index=False)
            except ValueError as ve:
                print(f"File - {file} couldn't be processed because of {sys.exc_info()}")
            except Exception as ex:
                print(f"File - {file} couldn't be processed because of {sys.exc_info()}")
	import pandas as pd
	import os
	from bs4 import BeautifulSoup
	import re
	import sys

	for root, dirs, files in os.walk('/toplevelfolderpath'):
	print(root, dirs, files)
	for file in files:
	if "xls" in file:
	try:
	tables = pd.read_html(os.path.join(root, file))
	html_soup = BeautifulSoup(open(os.path.join(root, file)), 'html.parser')
	text = html_soup.find(text=re.compile('Item To Search '))
	splitted_text = str(text).split(" - ")
	location = splitted_text[1].split("->")
	if len(location) == 2:
	state = location[0]
	district = location[1].replace(" \']", "")
	elif len(location) == 1:
	state = location[0]
	district = "Not available"
	else:
	state = "Unnamed: 0"
	district = "Unnamed: 0"
	for i, table in enumerate(tables):
	table.rename(columns={'Unnamed: 0': state,
	'Unnamed: 1': district},
	inplace=True)
	csv_file_name = file.replace(".xls", str(i)+".csv")
	table.to_csv(os.path.join(root, csv_file_name), index=False)
	except ValueError as ve:
	print(f"File - {file} couldn't be processed because of {sys.exc_info()}")
	except Exception as ex:
	print(f"File - {file} couldn't be processed because of {sys.exc_info()}")