Skip to content

Instantly share code, notes, and snippets.

View abhijeet-talaulikar's full-sized avatar

Abhijeet Talaulikar abhijeet-talaulikar

View GitHub Profile
def expected_steps(df):
Q = df.drop(
['Null', 'Activation'], axis=1).drop(['Null', 'Activation'], axis=0)
I = np.identity(Q.shape[1])
N = np.linalg.inv(I - Q.to_numpy())
t = np.sum(N, axis=1)
import numpy as np
import pandas as pd
import openai
# Enter your own key in here
openai.api_key = ""
# Load data
data = pd.read_csv("complaints.csv")
import matplotlib.pyplot as plt
import matplotlib
matrix = clean_data['ada_embedding'].to_list()
# Create a t-SNE model and transform the data
tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
vis_dims = tsne.fit_transform(matrix)
x = [x for x,y in vis_dims]
from sklearn.manifold import TSNE
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import GridSearchCV
matrix = np.array(review_data['ada_embedding'].to_list())
# Grid search to find best n_components - number of clusters
components, aic, bic = [], [], []
for i in range(3,11):
import plotly.express as px
# Best model
gmm = GaussianMixture(n_components=7)
gmm.fit(matrix)
# Get aspect labels and give names
labels = gmm.predict(matrix)
aspect_labels = {
# Get inclusion probabilities
probabilities = gmm.predict_proba(matrix)
proba_df = pd.DataFrame(probabilities, columns = aspect_labels.values())
# Get dominant aspects
fig = px.line_polar(
pd.DataFrame({
"aspect": proba_df.idxmax(axis=1),
"max_proba": proba_df.lookup(proba_df.index, proba_df.idxmax(axis=1))
}).groupby("aspect").sum().reset_index(),
!git clone https://github.com/tctianchi/pyvenn.git
%matplotlib inline
from pyvenn import venn
# Get aspect labels
aspects = pd.DataFrame({
"aspect": proba_df.idxmax(axis=1),
"max_proba": proba_df.lookup(proba_df.index, proba_df.idxmax(axis=1))
}).groupby("aspect").sum().reset_index().sort_values("max_proba", ascending=False).head(5).aspect.to_list()