This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dateconv(date): | |
q ="2020-03-22" | |
if date < q: | |
return "Pre-Lockdown" | |
else: | |
return "During-Lockdown" | |
df_india["Lockdown"] = df_india["date"].apply(dateconv) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dayofweek(i): | |
l = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] | |
return l[i]; | |
day_df=pd.DataFrame(messages_df["Message"]) | |
day_df['day_of_date'] = messages_df['Date'].dt.weekday | |
day_df['day_of_date'] = day_df["day_of_date"].apply(dayofweek) | |
day_df["messagecount"] = 1 | |
day = day_df.groupby("day_of_date").sum() | |
day.reset_index(inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
stopwords = set(STOPWORDS) | |
stopwords.update(["ra", "ga", "na", "ani", "em", "ki", "ah","ha","la","eh","ne","le"]) | |
# Generate a word cloud image | |
wordcloud = WordCloud(stopwords=stopwords, background_color="white").generate(text) | |
# Display the generated image: | |
# the matplotlib way: | |
plt.figure( figsize=(10,5)) | |
plt.imshow(wordcloud, interpolation='bilinear') | |
plt.axis("off") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text = " ".join(review for review in messages_df.Message) | |
print ("There are {} words in all the messages.".format(len(text))) | |
# OUTPUT - | |
# There are 687467 words in all the messages. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creates a list of unique Authors - ['Manikanta', 'Teja Kura', .........] | |
l = messages_df.Author.unique() | |
for i in range(len(l)): | |
dummy_df = messages_df[messages_df['Author'] == l[i]] | |
total_emojis_list = list([a for b in dummy_df.emoji for a in b]) | |
emoji_dict = dict(Counter(total_emojis_list)) | |
emoji_dict = sorted(emoji_dict.items(), key=lambda x: x[1], reverse=True) | |
print('Emoji Distribution for', l[i]) | |
author_emoji_df = pd.DataFrame(emoji_dict, columns=['emoji', 'count']) | |
fig = px.pie(author_emoji_df, values='count', names='emoji') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.express as px | |
fig = px.pie(emoji_df, values='count', names='emoji', | |
title='Emoji Distribution') | |
fig.update_traces(textposition='inside', textinfo='percent+label') | |
fig.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
total_emojis_list = list([a for b in messages_df.emoji for a in b]) | |
emoji_dict = dict(Counter(total_emojis_list)) | |
emoji_dict = sorted(emoji_dict.items(), key=lambda x: x[1], reverse=True) | |
emoji_df = pd.DataFrame(emoji_dict, columns=['emoji', 'count']) | |
emoji_df | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
total_emojis_list = list(set([a for b in messages_df.emoji for a in b])) | |
total_emojis = len(total_emojis_list) | |
print(total_emojis) | |
# Output: | |
# 185 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creates a list of unique Authors - ['Manikanta', 'Teja Kura', .........] | |
l = messages_df.Author.unique() | |
for i in range(len(l)): | |
# Filtering out messages of particular user | |
req_df= messages_df[messages_df["Author"] == l[i]] | |
# req_df will contain messages of only one particular user | |
print(f'Stats of {l[i]} -') | |
# shape will print number of rows which indirectly means the number of messages | |
print('Messages Sent', req_df.shape[0]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
messages_df['Letter_Count'] = messages_df['Message'].apply(lambda s : len(s)) | |
messages_df['Word_Count'] = messages_df['Message'].apply(lambda s : len(s.split(' '))) |