Skip to content

Instantly share code, notes, and snippets.

View kurasaiteja's full-sized avatar

Saiteja Kura kurasaiteja

View GitHub Profile
def dateconv(date):
q ="2020-03-22"
if date < q:
return "Pre-Lockdown"
else:
return "During-Lockdown"
df_india["Lockdown"] = df_india["date"].apply(dateconv)
def dayofweek(i):
l = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
return l[i];
day_df=pd.DataFrame(messages_df["Message"])
day_df['day_of_date'] = messages_df['Date'].dt.weekday
day_df['day_of_date'] = day_df["day_of_date"].apply(dayofweek)
day_df["messagecount"] = 1
day = day_df.groupby("day_of_date").sum()
day.reset_index(inplace=True)
stopwords = set(STOPWORDS)
stopwords.update(["ra", "ga", "na", "ani", "em", "ki", "ah","ha","la","eh","ne","le"])
# Generate a word cloud image
wordcloud = WordCloud(stopwords=stopwords, background_color="white").generate(text)
# Display the generated image:
# the matplotlib way:
plt.figure( figsize=(10,5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
text = " ".join(review for review in messages_df.Message)
print ("There are {} words in all the messages.".format(len(text)))
# OUTPUT -
# There are 687467 words in all the messages.
# Creates a list of unique Authors - ['Manikanta', 'Teja Kura', .........]
l = messages_df.Author.unique()
for i in range(len(l)):
dummy_df = messages_df[messages_df['Author'] == l[i]]
total_emojis_list = list([a for b in dummy_df.emoji for a in b])
emoji_dict = dict(Counter(total_emojis_list))
emoji_dict = sorted(emoji_dict.items(), key=lambda x: x[1], reverse=True)
print('Emoji Distribution for', l[i])
author_emoji_df = pd.DataFrame(emoji_dict, columns=['emoji', 'count'])
fig = px.pie(author_emoji_df, values='count', names='emoji')
import plotly.express as px
fig = px.pie(emoji_df, values='count', names='emoji',
title='Emoji Distribution')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
total_emojis_list = list([a for b in messages_df.emoji for a in b])
emoji_dict = dict(Counter(total_emojis_list))
emoji_dict = sorted(emoji_dict.items(), key=lambda x: x[1], reverse=True)
emoji_df = pd.DataFrame(emoji_dict, columns=['emoji', 'count'])
emoji_df
total_emojis_list = list(set([a for b in messages_df.emoji for a in b]))
total_emojis = len(total_emojis_list)
print(total_emojis)
# Output:
# 185
# Creates a list of unique Authors - ['Manikanta', 'Teja Kura', .........]
l = messages_df.Author.unique()
for i in range(len(l)):
# Filtering out messages of particular user
req_df= messages_df[messages_df["Author"] == l[i]]
# req_df will contain messages of only one particular user
print(f'Stats of {l[i]} -')
# shape will print number of rows which indirectly means the number of messages
print('Messages Sent', req_df.shape[0])
messages_df['Letter_Count'] = messages_df['Message'].apply(lambda s : len(s))
messages_df['Word_Count'] = messages_df['Message'].apply(lambda s : len(s.split(' ')))