Getting Started With Superset: Airbnb’s data exploration platform
These instructions are for Amazon Linux Version 2
sudo yum update -y
sudo yum install python3 -y
+--------+-------+-------+-------+-------+----+-------+-------+-------+--+ | |
| Branch | x1 | x2 | x3 | x4 | x5 | y1 | y2 | y3 | | | |
+--------+-------+-------+-------+-------+----+-------+-------+-------+--+ | |
| A | 86.13 | 16.24 | 48.21 | 49.69 | 9 | 54.53 | 58.98 | 38.16 | | | |
| B | 29.26 | 10.24 | 41.96 | 40.65 | 5 | 24.69 | 33.89 | 26.02 | | | |
| C | 43.12 | 11.31 | 38.19 | 35.03 | 9 | 36.41 | 40.62 | 28.51 | | | |
+--------+-------+-------+-------+-------+----+-------+-------+-------+--+ |
class DEA(object): | |
random.seed(5) | |
def __init__(self, inputs, outputs): | |
""" | |
Initialize the DEA object with input data | |
n = number of entities (observations) | |
m = number of inputs (variables, features) | |
r = number of outputs | |
:param inputs: inputs, n x m numpy array | |
:param outputs: outputs, n x r numpy array |
def __efficiency(self, unit): | |
""" | |
Efficiency function with already computed weights | |
:param unit: which unit to compute for | |
:return: efficiency | |
""" | |
# compute efficiency | |
denominator = np.dot(self.inputs, self.input_w) | |
numerator = np.dot(self.outputs, self.output_w) | |
return (numerator/denominator)[unit] |
def __target(self, x, unit): | |
""" | |
Theta target function for one unit | |
:param x: combined weights | |
:param unit: which production unit to compute | |
:return: theta | |
""" | |
in_w, out_w, lambdas = x[:self.m], x[self.m:(self.m+self.r)], x[(self.m+self.r):] # unroll the weights | |
denominator = np.dot(self.inputs[unit], in_w) | |
numerator = np.dot(self.outputs[unit], out_w) |
def __constraints(self, x, unit): | |
""" | |
Constraints for optimization for one unit | |
:param x: combined weights | |
:param unit: which production unit to compute | |
:return: array of constraints | |
""" | |
in_w, out_w, lambdas = x[:self.m], x[self.m:(self.m+self.r)], x[(self.m+self.r):] # unroll the weights | |
constr = [] # init the constraint array | |
# for each input, lambdas with inputs |
def __optimize(self): | |
""" | |
Optimization of the DEA model | |
Use: http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.optimize.linprog.html | |
A = coefficients in the constraints | |
b = rhs of constraints | |
c = coefficients of the target function | |
:return: | |
""" | |
d0 = self.m + self.r + self.n |
def name_units(self, names): | |
""" | |
Provide names for units for presentation purposes | |
:param names: a list of names, equal in length to the number of units | |
:return: nothing | |
""" | |
assert(self.n == len(names)) | |
self.names = names | |
def fit(self): |
def save_results(dataframe): | |
df_results = pd.DataFrame([]) | |
df_results = df_results.append(not_efficient, ignore_index = True).T | |
df_results = df_results.reset_index() | |
df_results.columns = ['dmu', 'efficiency'] | |
dataframe = dataframe.merge(df_results) | |
return dataframe |
df_ineff = save_results(df) | |
df_complete = save_results_complete(df) | |
df_complete.head() | |
#Estimating the improvement options - input oriented | |
heading = list(df_ineff.iloc[:, 1:6]) | |
inter = [] | |
for c in df_ineff.iloc[:, 1:6].columns: | |
inter.append(df_ineff[c].multiply((1- df_ineff.iloc[:,-1]))) | |
df_improvement = round(pd.concat(inter, axis = 1),1) |
These instructions are for Amazon Linux Version 2
sudo yum update -y
sudo yum install python3 -y