Skip to content

Instantly share code, notes, and snippets.

@Rafastoievsky
Created November 2, 2020 02:45
Show Gist options
  • Save Rafastoievsky/4055397ecb078abdbc6b7902a76415e2 to your computer and use it in GitHub Desktop.
Save Rafastoievsky/4055397ecb078abdbc6b7902a76415e2 to your computer and use it in GitHub Desktop.
WhatsApp Group chat analysis: cleaning data functions
def startsWithDateAndTime(s):
pattern = '^\d{1,2}/\d{1,2}/\d{1,2}, \d{1,2}:\d{1,2}\S [AaPp][Mm] -'
result = re.match(pattern, s)
if result:
return True
return False
def FindAuthor(s):
patterns = [
'([\w]+):', # Nombre
'([\w]+[\s]+[\w]+):', # Nombre + Apellido
'([\w]+[\s]+[\w]+[\s]+[\w]+):', # Nombre + Segundo Nombre + Apellido
'([\w]+)[\u263a-\U0001f999]+:', # Nombre con Emoji
]
pattern = '^' + '|'.join(patterns)
result = re.match(pattern, s)
if result:
return True
return False
def getDataPoint(line):
splitLine = line.split(' - ')
dateTime = splitLine[0]
message = ' '.join(splitLine[1:])
if FindAuthor(message):
splitMessage = message.split(': ')
author = splitMessage[0]
message = ' '.join(splitMessage[1:])
else:
author = None
return dateTime, author, message
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment