Skip to content

Instantly share code, notes, and snippets.

View abhijeet-talaulikar's full-sized avatar

Abhijeet Talaulikar abhijeet-talaulikar

View GitHub Profile
### Setup the page configuration ###
st.set_page_config(layout="wide", page_title="Concall Transcripts with GPT")
st.title('Concall Transcripts with GPT')
openai_api_key = st.sidebar.text_input('OpenAI API Key', value='')
if "response_default" not in st.session_state:
st.session_state['response_default'] = None
### Create UI elements ###
if "disabled" not in st.session_state:
# Add GPT 3.5 representation model into BERTopic
openai.api_key = "sk-xxx"
representation_model = OpenAI(model="gpt-3.5-turbo", chat=True)
topic_model = BERTopic(
verbose=True,
hdbscan_model=hdbscan_model,
representation_model=representation_model,
nr_topics=10,
umap_model=umap_model
# Intertopic distance map
topic_model.visualize_topics()
# Topic similarities
topic_model.visualize_heatmap()
# Leading keywords in topics
topic_model.visualize_barchart()
# Term score trends
from bertopic import BERTopic
from umap import UMAP
from hdbscan import HDBSCAN
# Use Hierarchical DBSCAN as clustering model
hdbscan_model = HDBSCAN(min_cluster_size=15, metric='euclidean', cluster_selection_method='eom', prediction_data=True)
# Use UMAP for dimensionality reduction
umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine', random_state=42)
from finbert_embedding.embedding import FinbertEmbedding
# Convert text to vectors using pretrained finbert embeddings
finbert = FinbertEmbedding()
embeddings = np.array([finbert.sentence_vector(i).numpy() for i in headline_texts])
import pandas as pd
import numpy as np
import pandas as pd
import swifter
import cleantext
# Load finance headlines from the dataset
data = pd.read_csv("raw_partner_headlines.csv", usecols=["headline"])
# Perform basic preprocessing steps using cleantext
import numpy as np
import pandas as pd
import csv
import random
import string
### Create full dataset of 5 million credit card customers ###
def generate_cust_id():
return ''.join(random.choices(string.ascii_uppercase + string.digits, k=7))
mmm, trace = BayesianMMM("YoY")
paid_search_df = pd.DataFrame({
'year': np.array(["Year before that"]*52 + ["Last Year"]*52),
'coefficient': trace['posterior']['coefficient_PAID_SEARCH'].mean(axis=(0,1))
})
display(paid_search_df.groupby('year').mean())
paid_search_df.groupby('year').mean().plot.bar();
mmm, trace = BayesianMMM("Q")
paid_search_df = pd.DataFrame({
'quarter': "Quarter "+pd.PeriodIndex(dates, freq='Q').astype(str).str[-1].astype(str).values,
'coefficient': trace['posterior']['coefficient_PAID_SEARCH'].mean(axis=(0,1))
})
paid_search_df.groupby('quarter').mean().plot.bar();
plt.title("Paid Search Effectiveness over Quarters")
plt.ylim(bottom=0.12)
mmm, trace = BayesianMMM("W")
import matplotlib.pyplot as plt
plt.figure(figsize=(15,5))
for channel in ['CTV', 'DIRECT_MAIL', 'EMAIL', 'TV']:
plt.plot(
trace.posterior[f'coefficient_{channel}'].values.mean(axis=(0,1)),
linewidth=2,