Skip to content

Instantly share code, notes, and snippets.

@kirankotari
Created May 3, 2020 02:04
Show Gist options
  • Save kirankotari/a12d917bc0f22a2b75a4ae1ab09bf2fc to your computer and use it in GitHub Desktop.
Save kirankotari/a12d917bc0f22a2b75a4ae1ab09bf2fc to your computer and use it in GitHub Desktop.
import json
import xml.etree.ElementTree as etree
class JSONDataExtractor:
def __init__(self, filepath):
self.data = dict()
with open(file=filepath, mode='r', encoding='utf-8') as fp:
self.data = json.load(fp)
@property
def parsed_data(self):
return self.data
def print_data(self):
print('\nJSON:')
for each_doc in self.data:
for k, v in each_doc.items():
print(f'{k.capitalize()} is {v}')
print('')
return None
class XMLDataExtrator:
def __init__(self, filepath):
self.tree = etree.parse(filepath)
@property
def parsed_data(self):
return self.tree
def print_data(self):
xml_data = self.tree.findall(f".//person")
print('\nXML:')
for each in xml_data:
print(f"FirstName: {each.find('firstName').text}")
print(f"LastName: {each.find('lastName').text}")
# for each_phone in each.find('phoneNumber'):
# print(f"PhoneNumber ({each_phone.attrib['type']}): {each_phone.text}")
[print(f"PhoneNumber ({each_phone.attrib['type']}): {each_phone.text}") for each_phone in each.find('phoneNumbers')]
print('')
class DataExtractorFactory:
def __init__(self, filepath):
self.__obj = None
self.__filepath = str(filepath)
self.__extract_data_from()
def __new__(cls, filepath):
# instance = super(DataExtractorFactory, cls).__new__(cls)
instance = object.__new__(cls)
instance.__init__(filepath)
return instance.__obj
def __extract_data_from(self):
try:
self.__obj = self.__data_extractor_factory()
except ValueError as e:
print(f'{e}')
def __data_extractor_factory(self):
if self.__filepath.endswith('json'):
extractor = JSONDataExtractor
elif self.__filepath.endswith('xml'):
extractor = XMLDataExtrator
else:
raise ValueError(f'Data extraction for the given file {self.__filepath} is not supported..!')
return extractor(self.__filepath)
def main():
# pylint: disable=no-member
print('SQLite')
sqlite_factory = DataExtractorFactory(filepath='./person.sq3')
print(sqlite_factory)
print('')
json_factory = DataExtractorFactory(filepath='./movies.json')
json_factory.print_data()
if __name__ == "__main__":
main()
[
{"title":"After Dark in Central Park",
"year":1900,
"director":null, "cast":null, "genre":null},
{"title":"Boarding School Girls' Pajama Parade",
"year":1900,
"director":null, "cast":null, "genre":null},
{"title":"Buffalo Bill's Wild West Parad",
"year":1900,
"director":null, "cast":null, "genre":null},
{"title":"Caught",
"year":1900,
"director":null, "cast":null, "genre":null},
{"title":"Clowns Spinning Hats",
"year":1900,
"director":null, "cast":null, "genre":null},
{"title":"Capture of Boer Battery by British",
"year":1900,
"director":"James H. White", "cast":null, "genre":"Short documentary"},
{"title":"The Enchanted Drawing",
"year":1900,
"director":"J. Stuart Blackton", "cast":null,"genre":null},
{"title":"Family Troubles",
"year":1900,
"director":null, "cast":null, "genre":null},
{"title":"Feeding Sea Lions",
"year":1900,
"director":null, "cast":"Paul Boyton", "genre":null}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment