This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Setup the page configuration ### | |
st.set_page_config(layout="wide", page_title="Concall Transcripts with GPT") | |
st.title('Concall Transcripts with GPT') | |
openai_api_key = st.sidebar.text_input('OpenAI API Key', value='') | |
if "response_default" not in st.session_state: | |
st.session_state['response_default'] = None | |
### Create UI elements ### | |
if "disabled" not in st.session_state: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Add GPT 3.5 representation model into BERTopic | |
openai.api_key = "sk-xxx" | |
representation_model = OpenAI(model="gpt-3.5-turbo", chat=True) | |
topic_model = BERTopic( | |
verbose=True, | |
hdbscan_model=hdbscan_model, | |
representation_model=representation_model, | |
nr_topics=10, | |
umap_model=umap_model |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Intertopic distance map | |
topic_model.visualize_topics() | |
# Topic similarities | |
topic_model.visualize_heatmap() | |
# Leading keywords in topics | |
topic_model.visualize_barchart() | |
# Term score trends |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bertopic import BERTopic | |
from umap import UMAP | |
from hdbscan import HDBSCAN | |
# Use Hierarchical DBSCAN as clustering model | |
hdbscan_model = HDBSCAN(min_cluster_size=15, metric='euclidean', cluster_selection_method='eom', prediction_data=True) | |
# Use UMAP for dimensionality reduction | |
umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine', random_state=42) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from finbert_embedding.embedding import FinbertEmbedding | |
# Convert text to vectors using pretrained finbert embeddings | |
finbert = FinbertEmbedding() | |
embeddings = np.array([finbert.sentence_vector(i).numpy() for i in headline_texts]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import pandas as pd | |
import swifter | |
import cleantext | |
# Load finance headlines from the dataset | |
data = pd.read_csv("raw_partner_headlines.csv", usecols=["headline"]) | |
# Perform basic preprocessing steps using cleantext |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import csv | |
import random | |
import string | |
### Create full dataset of 5 million credit card customers ### | |
def generate_cust_id(): | |
return ''.join(random.choices(string.ascii_uppercase + string.digits, k=7)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mmm, trace = BayesianMMM("YoY") | |
paid_search_df = pd.DataFrame({ | |
'year': np.array(["Year before that"]*52 + ["Last Year"]*52), | |
'coefficient': trace['posterior']['coefficient_PAID_SEARCH'].mean(axis=(0,1)) | |
}) | |
display(paid_search_df.groupby('year').mean()) | |
paid_search_df.groupby('year').mean().plot.bar(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mmm, trace = BayesianMMM("Q") | |
paid_search_df = pd.DataFrame({ | |
'quarter': "Quarter "+pd.PeriodIndex(dates, freq='Q').astype(str).str[-1].astype(str).values, | |
'coefficient': trace['posterior']['coefficient_PAID_SEARCH'].mean(axis=(0,1)) | |
}) | |
paid_search_df.groupby('quarter').mean().plot.bar(); | |
plt.title("Paid Search Effectiveness over Quarters") | |
plt.ylim(bottom=0.12) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mmm, trace = BayesianMMM("W") | |
import matplotlib.pyplot as plt | |
plt.figure(figsize=(15,5)) | |
for channel in ['CTV', 'DIRECT_MAIL', 'EMAIL', 'TV']: | |
plt.plot( | |
trace.posterior[f'coefficient_{channel}'].values.mean(axis=(0,1)), | |
linewidth=2, |