1.) Python for Scientific Computing App
2.) Splunk ML Toolkit >=1.0
| [corr] | |
| filename = corr.py | |
| chunked = true |
| #!/usr/bin/env python | |
| # Xander Johnson @metasyn | |
| import sys | |
| import exec_anaconda | |
| try: | |
| exec_anaconda.exec_anaconda() | |
| except Exception as e: | |
| import cexc | |
| cexc.abort(e) | |
| sys.exit(1) | |
| # Import this to get our "chunked" handler | |
| from cexc import BaseChunkHandler | |
| import cexc | |
| import pandas as pd | |
| class Handler(BaseChunkHandler): | |
| def handler(self, metadata, data): | |
| """Get correlation matrix from a pandas dataframe""" | |
| if self.getinfo.get('preview', False): | |
| logger.debug('Not running in preview') | |
| return {'finished': True} | |
| def parse_args(raw_args): | |
| if len(raw_args) > 0: | |
| if 'method' not in raw_args[0]: | |
| raise ValueError('The only accepted option is method: pearson, spearman, kendall. e.g. method=spearman') | |
| else: | |
| try: | |
| value = raw_args[0].split('=')[1] | |
| if value == 'kendall' or value == 'spearman' or value == 'pearson': | |
| return value | |
| else: | |
| raise ValueError('The only accepted values are: pearson, spearman, kendall. e.g. method=spearman') | |
| except: | |
| raise ValueError('The only accepted option is method: pearson, spearman, kendall. e.g. method=pearson') | |
| else: | |
| return 'pearson' | |
| if len(data) != 0: | |
| # get them args | |
| raw_args = self.getinfo['searchinfo']['raw_args'] | |
| method = parse_args(raw_args) | |
| df = pd.DataFrame(data, dtype='float').corr(method=method) | |
| df.drop([col for col in df.columns if col.startswith('__')]) | |
| df['(field)'] = df.columns | |
| output = df.to_dict('records') | |
| return({'finished': True}, output) | |
| if not metadata.get('finished', False): | |
| return {} | |
| else: | |
| return ({'finished': False}, data) | |
| if __name__ == "__main__": | |
| Handler().run() |