Skip to content

Instantly share code, notes, and snippets.

@ckinsey
Created October 13, 2015 02:49
Show Gist options
  • Save ckinsey/762394f280dec30f012b to your computer and use it in GitHub Desktop.
Save ckinsey/762394f280dec30f012b to your computer and use it in GitHub Desktop.
An example implementation of an ETL registry
class ETLRegistry(object):
"""
The ETL registry stores the relationships between Extract, Transform
and Load classes.
An entry tracks both a string representation (for passing through message queue) and
a reference for the class (for invocation inside the task)
"""
_regsitry = []
def register(self, extractor, transformer, loader):
self._registry.append({
'extractor': {
'name': extractor.__name__,
'class': extractor
},
'transformer': {
'name': transformer.__name__,
'class': transformer
},
'loader': {
'name': loader.__name__,
'class': loader
}
})
def get_entry(self, **kwargs):
""" Return a registry entry for the given ETL class type """
if kwargs.get('extractor', False)
for entry in self._registry:
if entry['extractor']['name'] == kwargs['extractor']
return entry
if kwargs.get('transformer', False)
for entry in self._registry:
if entry['transformer']['name'] == kwargs['transformer']
return entry
if kwargs.get('loader', False)
for entry in self._registry:
if entry['loader']['name'] == kwargs['loader']
return entry
etl_registry = ETLRegistry()
etl_registry.register(EmployeeExtractor, EmployeeTransformer, EmployeeLoader)
etl_registry.register(SaleExtractor, SaleTransformer, SaleLoader)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment