Skip to content

Instantly share code, notes, and snippets.

View Rafastoievsky's full-sized avatar

Luis Rafael Arce Rafastoievsky

View GitHub Profile
@Rafastoievsky
Rafastoievsky / authorcommondwords.py
Created November 2, 2020 03:57
Whatsapp group chat analysis: commond words by author
TopTen =5
author_commond_words = (commond_words.set_index('Author')['Message']
.dropna()
.groupby(level=0)
.value_counts()
.groupby(level=0)
.head(TopTen)
.rename_axis(('Author','words'))
.reset_index(name='count'))
@Rafastoievsky
Rafastoievsky / commondwords.py
Created November 2, 2020 03:53
Whatsapp group chat analysis: getting commond words
commond_words = chat[['Author','Message']].copy()
from nltk.corpus import stopwords
STOPWORDS = stopwords.words('spanish')
stopwords = list(STOPWORDS)
extra = ["<multimedia", "omitido>", "k", "d","si","multimedia", "omitido"]
stopwords = stopwords + extra
commond_words["Message"] = (commond_words["Message"]
.str.lower()
@Rafastoievsky
Rafastoievsky / dayweekmessegechart.py
Created November 2, 2020 03:31
Whatsapp group chart analysis: a chart dayweek masseges
weekday_grouped_msg = (chat.set_index('weekday')['Message']
.groupby(level=0)
.value_counts()
.groupby(level=0)
.sum()
.reset_index(name='count'))
weekday_grouped_msg
fig = px.line_polar(weekday_grouped_msg, r='count', theta='weekday', line_close=True)
fig.update_traces(fill='toself')
@Rafastoievsky
Rafastoievsky / messageemojis.py
Created November 2, 2020 03:22
Whatsapp group chat analysis: create new emojis column from messeges
def split_count(text):
emoji_list = []
data = regex.findall(r'\X', text)
for word in data:
if any(char in emoji.UNICODE_EMOJI for char in word):
emoji_list.append(word)
return emoji_list
chat["emoji"] = chat["Message"].apply(split_count)
@Rafastoievsky
Rafastoievsky / cleaningdatafunctions.py
Created November 2, 2020 02:45
WhatsApp Group chat analysis: cleaning data functions
def startsWithDateAndTime(s):
pattern = '^\d{1,2}/\d{1,2}/\d{1,2}, \d{1,2}:\d{1,2}\S [AaPp][Mm] -'
result = re.match(pattern, s)
if result:
return True
return False
def FindAuthor(s):
patterns = [
'([\w]+):', # Nombre