Created
October 24, 2014 00:33
-
-
Save samliu/31f6764f80e370511f60 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# textanalysis.py | |
# | |
# Analyzing iphone text message csv dump from iBackup Viewer | |
# (http://www.imactools.com/iphonebackupviewer/) | |
# | |
# Requirements: | |
# - WordCloud (http://github.com/amueller/word_cloud) | |
# | |
# How to use: | |
# 1. Generate csv dump via iphonebackupviewer and point to it in __main__ | |
# 1a. Delete the first line of the CSV dump, it's a header. | |
# 2. Change numbers param in __main__ to be the 2 phone numbers you're | |
# interested in (or just 1 is okay too) and the relative file path of the | |
# csv to this program. | |
# 3. Choose a TrueType font on your system to use with the wordcloud generator | |
# and point to it with the font variable in __main__ | |
# | |
# OR write your own program that sets these params for the local system. | |
# This is just a couple-hour hack for fun. DWTFYW license. | |
# | |
# Original Author: Sam Liu <[email protected]> | |
from wordcloud import WordCloud, STOPWORDS | |
import datetime | |
import os | |
import re | |
class WCGenerator(): | |
# Word cloud generator from text messages between two people! | |
def __init__(self, input_file='', words_to_remove=None, | |
font='', | |
numbers=None): | |
# Note: Replace font parameter with the path of a font on your system. | |
self.input_file = input_file | |
self.words_to_remove = words_to_remove | |
self.font = font | |
self.numbers = numbers | |
def generate_word_clouds(self): | |
f = open(self.input_file) | |
print "Reading from " + self.input_file + "..." | |
for line in f.readlines(): | |
values = line.split(',') | |
number = values[0] | |
datestring = ''.join(values[1:4]) | |
datestring = datestring.strip('"') | |
timestamp = datetime.datetime.strptime(datestring, '%b %d %Y %H:%M %p'); | |
# You can look at a specific month or day or anything using the timestamp. | |
# I parsed the format provided by ibackupviewer for your convenience. | |
# Example: | |
# if timestamp.month != 7: | |
# continue | |
# Stick all the text bodies into one string, removing unwanted words | |
# from being put into the word cloud. | |
if number in self.numbers: | |
msg = values[5] | |
if self.words_to_remove: | |
remove = '|'.join(words_to_remove) | |
regex = re.compile(r'('+remove+')', flags=re.IGNORECASE) | |
msg = regex.sub("", msg) | |
self.numbers[number]['text'] += msg | |
else: | |
print "ERROR: " + number + " was not a valid number." | |
# Make wordcloud from each string and save image with filename using the | |
# number. | |
d = os.path.dirname(__file__) | |
wc = WordCloud(font_path=self.font) | |
for number in self.numbers: | |
if not self.numbers[number]['text']: | |
continue | |
wc.generate(self.numbers[number]['text']) | |
filename = number[1:] # Remove the prepended '+' for the filename. | |
filepath = os.path.join(d, filename + '.png') | |
wc.to_file(filepath) | |
print "Wrote file: " + filepath | |
if __name__ == '__main__': | |
# TODO(samcliu): Use argparse to take cmdline args. | |
numbers = { | |
'+16505551234' : { 'name': 'Name1', 'text': '' }, | |
'+16505554321' : { 'name': 'Name2', 'text': '' }, | |
} | |
input_file = 'putyourfilename.csv' | |
# Assuming you like DroidSansMono and have it installed... | |
font = '/Users/youruser/Library/Fonts/DroidSansMono.ttf' | |
wc_generator = WCGenerator(numbers=numbers, input_file=input_file, | |
font=font) | |
wc_generator.generate_word_clouds() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment