Getting Started With Superset: Airbnb’s data exploration platform
These instructions are for Amazon Linux Version 2
sudo yum update -y
sudo yum install python3 -y
| tools,site,pricing,comment,"automated, intelligent population of the catalog",search,tagging and metadata freshness," business glossary so that users can view business terms, definitions and data stewards",view lineage from a data source such as a report to the underlying data source,preview of Sample Data & Profiling | |
| Amundsen,https://eng.lyft.com/amundsen-lyfts-data-discovery-metadata-engine-62d27254fbb9,open source,,+,+,+,+,,+ | |
| Marquez,https://marquezproject.github.io/marquez/,open source,,+,+,+,,, | |
| Metacat,https://www.dataone.org/software-tools/metacat,open source,for scientific purposes,?,?,?,?,?,? | |
| Talend,https://www.talend.com/products/data-catalog/,>$1000 user/month,,?,?,?,?,?,? | |
| Waterlinedata,https://www.waterlinedata.com/,?,,+,+,+,,+, | |
| Alation,https://www.alation.com/product/,?,,?,?,?,?,?,? | |
| Ataccama,https://www.ataccama.com/product/metadata-management-and-data-catalog,?,,+,,,+,+, | |
| Collibra,https://www.collibra.com/data-catalog,?,,?,?,?,?,?,? | |
| WhereHows >> LinkedIn'DataHub ,https://github.com/linkedin/WhereH |
| # importing libraries | |
| import json | |
| import pandas as pd | |
| import numpy as np | |
| import requests | |
| from pandas.io.json import json_normalize | |
| import time | |
| import datetime | |
| from furl import furl | |
| import re |
These instructions are for Amazon Linux Version 2
sudo yum update -y
sudo yum install python3 -y| df_ineff = save_results(df) | |
| df_complete = save_results_complete(df) | |
| df_complete.head() | |
| #Estimating the improvement options - input oriented | |
| heading = list(df_ineff.iloc[:, 1:6]) | |
| inter = [] | |
| for c in df_ineff.iloc[:, 1:6].columns: | |
| inter.append(df_ineff[c].multiply((1- df_ineff.iloc[:,-1]))) | |
| df_improvement = round(pd.concat(inter, axis = 1),1) |
| def save_results(dataframe): | |
| df_results = pd.DataFrame([]) | |
| df_results = df_results.append(not_efficient, ignore_index = True).T | |
| df_results = df_results.reset_index() | |
| df_results.columns = ['dmu', 'efficiency'] | |
| dataframe = dataframe.merge(df_results) | |
| return dataframe |
| def name_units(self, names): | |
| """ | |
| Provide names for units for presentation purposes | |
| :param names: a list of names, equal in length to the number of units | |
| :return: nothing | |
| """ | |
| assert(self.n == len(names)) | |
| self.names = names | |
| def fit(self): |
| def __optimize(self): | |
| """ | |
| Optimization of the DEA model | |
| Use: http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.optimize.linprog.html | |
| A = coefficients in the constraints | |
| b = rhs of constraints | |
| c = coefficients of the target function | |
| :return: | |
| """ | |
| d0 = self.m + self.r + self.n |
| def __constraints(self, x, unit): | |
| """ | |
| Constraints for optimization for one unit | |
| :param x: combined weights | |
| :param unit: which production unit to compute | |
| :return: array of constraints | |
| """ | |
| in_w, out_w, lambdas = x[:self.m], x[self.m:(self.m+self.r)], x[(self.m+self.r):] # unroll the weights | |
| constr = [] # init the constraint array | |
| # for each input, lambdas with inputs |
| def __target(self, x, unit): | |
| """ | |
| Theta target function for one unit | |
| :param x: combined weights | |
| :param unit: which production unit to compute | |
| :return: theta | |
| """ | |
| in_w, out_w, lambdas = x[:self.m], x[self.m:(self.m+self.r)], x[(self.m+self.r):] # unroll the weights | |
| denominator = np.dot(self.inputs[unit], in_w) | |
| numerator = np.dot(self.outputs[unit], out_w) |