jsharkey13 · February 2, 2021 17:11 · jsharkey13 · Sep 9, 2018 · Seantimez · Sep 10, 2018
diff --git a/facebook_analysis_demo.py b/facebook_analysis_demo.py
 # Copyright (c) 2018 James Sharkey (https://github.com/jsharkey13/facebook_message_parser)
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 import datetime
 import matplotlib.pyplot as plt
 from matplotlib.dates import date2num, num2date
 from matplotlib import ticker
 import matplotlib
 import re

 # =============================================================================
 #                          Top N Most Messaged People                         #
 #                                                                             #
 # Public Functions:                                                           #
 #  - top_n_people(Chat, N, count_type, groups)                                #
 #                                                                             #
 # =============================================================================

 _COUNT_TYPES = ["total", "to", "from", "allfrom", "words", "wordsfrom", "wordsto",
                "chars", "charsfrom", "charsto"]


 def _update_thread_dict(thread_dict, thread_name, num):
    """Add new entries to count dictionary, dealing with duplicates carefully."""
    if thread_name not in thread_dict:
                thread_dict.update({thread_name: num})
    else:  # Deal with duplicates, otherwise old entries get overwritten:
        thread_dict[thread_name] += num


 def top_n_people(Chat, N=-1, count_type="total", groups=False):
    """Return a list of the top N most messaged people.

       The "Top N People" can be judged by one of four criteria. The list
       contains tuples of (name, message count). A negative or zero value for
       N returns the full list, this is the default. The optional argument
       'groups' allows group conversations to be included where this makes
       sense. The 'count_type' argument can be one of four values:
        - "total" - the default. This counts the total number of messages in
          message threads, and sorts by this. Groups can be enabled.
        - "to" - the total number of messages sent in a direct thread by
          the current user: '_owner'. Groups can be enabled.
        - "from" - the total number of messages sent in a direct thread by
          the other person in the thread. If 'groups' is enabled, all messages
          not from '_owner' are counted.
        - "allfrom" - the total number of messages from each individual person
          across all threads. Groups cannot be enabled and will be ignored."""
    thread_dict = {}
    if count_type is "to":
        # Count the number of messages sent directly to each person.
        for t in Chat.threads:
            num = len(t.by(Chat._owner))
            _update_thread_dict(thread_dict, t.people_str, num)
    elif count_type is "from":
        # Count the number of messages received directly from each person.
        for t in Chat.threads:
            my_num = len(t.by(Chat._owner))
            tot_num = len(t)
            num = tot_num - my_num
            _update_thread_dict(thread_dict, t.people_str, num)
    elif count_type is "allfrom":
        # Count all messages in all threads received from each person.
        all_people = Chat._all_people.copy()
        all_people.remove(Chat._owner)  # Remove _owner from all_people (but not the original!):
        for p in all_people:
            num = len(Chat.all_from(p))
            thread_dict.update({p: num})
    elif count_type is "words":
        # Count total number of words exchanged in threads.
        for t in Chat.threads:
            num = 0
            for m in t.messages:
                num += len(re.findall(r'\S+', m.text))  # Matches any non-whitespace sub-string
                # num += len(m.text.split(" "))  # Counts all things separated by a space
            _update_thread_dict(thread_dict, t.people_str, num)
    elif count_type is "wordsfrom":
        # Count total number of words sent by other people in threads.
        for t in Chat.threads:
            num = 0
            for m in t.messages:
                if not m.sent_by(Chat._owner):
                    num += len(re.findall(r'\S+', m.text))
            _update_thread_dict(thread_dict, t.people_str, num)
    elif count_type is "wordsto":
        # Count total number of words sent to the other people in threads.
        for t in Chat.threads:
            num = 0
            for m in t.messages:
                if m.sent_by(Chat._owner):
                    num += len(re.findall(r'\S+', m.text))
            _update_thread_dict(thread_dict, t.people_str, num)
    elif count_type is "chars":
        # Count total number of characters exchanged in threads.
        for t in Chat.threads:
            num = 0
            for m in t.messages:
                num += len(m)
            _update_thread_dict(thread_dict, t.people_str, num)
    elif count_type is "charsfrom":
        # Count total number of characters sent by other people in threads.
        for t in Chat.threads:
            num = 0
            for m in t.messages:
                if not m.sent_by(Chat._owner):
                    num += len(m)
            _update_thread_dict(thread_dict, t.people_str, num)
    elif count_type is "charsto":
        # Count total number of characters sent to the other people in threads.
        for t in Chat.threads:
            num = 0
            for m in t.messages:
                if m.sent_by(Chat._owner):
                    num += len(m)
            _update_thread_dict(thread_dict, t.people_str, num)
    else:
        # Else the default: count the total messages in each thread.
        for t in Chat.threads:
            num = len(t)
            _update_thread_dict(thread_dict, t.people_str, num)
    sorted_list = sorted(thread_dict.items(), key=lambda tup: tup[1], reverse=True)
    top_n = []
    for i, item in enumerate(sorted_list):
        if ((len(top_n) >= N) and (N > 0)):
            return top_n
        if ((len(item[0].split(", ")) == 1) or groups):
            top_n.append((item[0], item[1]))
    return top_n


 # =============================================================================
 #                           Graphing Message Counts                           #
 #                                                                             #
 # Public Functions:                                                           #
 #  - use_facebook_colours()                                                   #
 #  - use_ios_colours()                                                        #
 #  - messages_time_graph(Chat, name, filename, no_gui)                        #
 #  - messages_date_graph(Chat, name, filename, start_date, end_date, no_gui)  #
 #  - messages_pie_chart(Chat, N, filename, count_type, groups,                #
 #                                                        no_gui, percentages) #
 #                                                                             #
 # =============================================================================

 # Some useful colours:
 _FB_BLUE = (0.2314, 0.3490, 0.5961)
 _FB_GREY = (0.9294, 0.9294, 0.9294)
 _IOS_GREEN = (0.5451, 0.8235, 0.2824)
 _IOS_GREY = (0.8980, 0.8980, 0.9176)

 # The colours used by the code:
 _BG_COLOUR = (1.0, 1.0, 1.0)
 _TEXT_COLOUR = (0.0, 0.0, 0.0)
 _MY_COLOUR = None
 _OTHER_COLOUR = None


 def _change_matplotlib_colours(text_color=_TEXT_COLOUR, bg_colour=_BG_COLOUR):
    """Change matplotlib default colors for ALL graphs produced in current session.

        - 'text_colour' sets the colour of all text, as well as axes colours and
          axis tick mark colours.
        - 'bg_colour' changes the background and outside fill colour of the plot."""
    matplotlib.rc('figure', facecolor=_BG_COLOUR)
    matplotlib.rc('savefig', facecolor=_BG_COLOUR, edgecolor=_TEXT_COLOUR)
    matplotlib.rc('axes', edgecolor=_TEXT_COLOUR, facecolor=_BG_COLOUR, labelcolor=_TEXT_COLOUR)
    matplotlib.rc('text', color=_TEXT_COLOUR)
    matplotlib.rc('grid', color=_TEXT_COLOUR)
    matplotlib.rc('xtick', color=_TEXT_COLOUR)
    matplotlib.rc('ytick', color=_TEXT_COLOUR)


 def _change_graph_colours(my_colour, other_colour):
    """Change the colours used in histograms, both self colour and the other person colour."""
    global _MY_COLOUR, _OTHER_COLOUR
    _MY_COLOUR = my_colour
    _OTHER_COLOUR = other_colour


 def use_facebook_colours():
    """Use Facebook's colours for graphs; blue for self, grey for others."""
    _change_graph_colours(my_colour=_FB_BLUE, other_colour=_FB_GREY)


 def use_ios_colours():
    """Use iOS's colours for graphs; green for self, grey for others."""
    _change_graph_colours(my_colour=_IOS_GREEN, other_colour=_IOS_GREY)


 # Run the colour change code on import of the module:
 use_facebook_colours()
 _change_matplotlib_colours()


 # ====== Histogram of Time of Day:


 def _hour_list():
    """Generate a list containing hours in day converted to floats."""
    hours_bins = [n / 24.0 for n in range(0, 25)]
    return hours_bins


 def _dt_to_decimal_time(datetime):
    """Convert a datetime.datetime object into a fraction of a day float.

       Take the decimal part of the date converted to number of days from 01/01/0001
       and return it. It gives fraction of way through day: the time."""
    datetime_decimal = date2num(datetime)
    time_decimal = datetime_decimal - int(datetime_decimal)
    return time_decimal


 def messages_time_graph(Chat, name=None, filename=None, no_gui=False):
    """Create a graph of the time of day of messages sent between users.

       Produces a histogram of the times of messages sent to and received from
       another user. The method only works for individuals, not for threads between
       multiple friends.

       - 'Chat' should be the Chat object to analyse.
       - 'name' should be the name of the user, and so the Thread, to be graphed.
         A special case is when 'name' is the name of the current user, in which
         case the graph of ALL messages the current user has sent is produced.
       - If a 'filename' is specified, output to file as well as displaying
         onscreen for viewing.
       - To run without displaying a graph onscreen, set 'no_gui' to True. If no filename
         is specified with this, the function will run but produce no output anywhere."""
    # Implement a default case:
    if name is None:
        name = Chat._owner
    # Divide up into hourly bins, changing datetime objects to times in range [0,1):
    bins = _hour_list()
    # If looking at graph with other users, get messages to and from:
    if name != Chat._owner:
        Thread = Chat[name]
        times_from = [_dt_to_decimal_time(message.date_time) for message in Thread.by(name)]
        times_to = [_dt_to_decimal_time(message.date_time) for message in Thread.by(Chat._owner)]
        label = [Chat._owner, name]
    else:  # If looking at all messages sent; do things differently:
        times_from = [_dt_to_decimal_time(message.date_time) for message in Chat.all_messages() if message.author != Chat._owner]
        times_to = [_dt_to_decimal_time(message.date_time) for message in Chat.all_messages() if message.author == Chat._owner]
        label = [Chat._owner, "Others"]
    # Create the figure, hiding the display if no_gui set:
    if no_gui:
        plt.ioff()
    plt.figure(figsize=(18, 9), dpi=80)
    plt.hist([times_to, times_from], bins, histtype='bar', color=[_MY_COLOUR, _OTHER_COLOUR], label=label, stacked=True)
    # Title the graph correctly, and label axes:
    if name != Chat._owner:
        plt.suptitle("Messages with " + name, size=18)
    else:
        plt.suptitle("All Messages Sent", size=18)
    plt.xlabel("Time of Day", labelpad=20, size=15)
    plt.ylabel("Number of Messages", labelpad=20, size=15)
    # Move tick marks to centre of hourly bins by adding ~ half an hour (in days)
    axes = plt.gca()
    axes.set_xticks([b + 0.02 for b in bins])
    # Place tickmarks
    plt.xticks(rotation=0, ha='center')
    # Change the tick marks from useless fraction through day, to recognisable times:
    # To do this use strftime to convert times to string (which needs dates >= 1900),
    # so shift to 1900 (add 693596 days) and take off added half hour (minus 0.02)
    axes.xaxis.set_major_formatter(ticker.FuncFormatter(lambda numdate, _: num2date(numdate + 693596 - 0.02).strftime('%H:%M')))
    # Add some space at either end of the graph (axis in number of days, so +- 15 mins):
    plt.xlim([bins[0] - 0.01, bins[-1] + 0.01])
    # Place y gridlines beneath the plot:
    axes.yaxis.grid(True)
    axes.set_axisbelow(True)
    # Hide unnecessary borders and tickmarks:
    axes.spines['right'].set_visible(False)
    axes.spines['top'].set_visible(False)
    axes.yaxis.set_ticks_position('left')
    plt.tick_params(axis='x', which='both', bottom=False, top=False)
    # Add the legend at the top, underneath the title but outside the figure:
    plt.legend(frameon=False, bbox_to_anchor=(0.5, 1.05), loc=9, ncol=2, borderaxespad=0)
    # If given a filename, output to file:
    if ((filename is not None) and (type(filename) is str)):
        plt.savefig(filename, bbox_inches='tight')
    else:
        plt.show()


 # ====== Histogram of Date:


 def _month_list(d1, d2):
    """Generate a list of months between d1 and d2 inclusive.

       The list includes the months containing d1 and d2, with an extra month
       on the end for the upper limit of a histogram."""
    months = []
    d1 = datetime.datetime(d1.year, d1.month, 1)
    try:
        d2 = datetime.datetime(d2.year, d2.month + 1, 1)
    # If month is 12 (=December), adding one causes error:
    except ValueError:
        # So January of the next year instead
        d2 = datetime.datetime(d2.year + 1, 1, 1)
    # Just generate all months in the required years-range, including unecessary ones
    for y in range(d1.year, d2.year + 1):
        for m in range(1, 13):
            months.append(datetime.datetime(y, m, 1))
    # Then remove extra months
    months = [m for m in months if (d1 <= m <= d2)]
    return months


 def messages_date_graph(Chat, name=None, filename=None, start_date=None, end_date=None, no_gui=False):
    """Create a graph of the number of messages sent between users.

       Produces a graph of messages sent to and received from another user. The
       method only works for individuals, not for threads between multiple friends.

       - 'Chat' should be the Chat object to analyse.
       - 'name' should be the name of the user, and so the Thread, to be graphed.
         A special case is when 'name' is the name of the current user, in which
         case the graph of ALL messages the current user has sent is produced.
       - If a 'filename' is specified, output to file as well as displaying
         onscreen for viewing.
       - 'start_date' and 'end_date' can be used to narrow the range of dates
         covered; the default is the first message to the last, but specifying dates
         inside this range can be used to narrow down the region considered.
       - To run without displaying a graph onscreen, set 'no_gui' to True. If no filename
         is specified with this, the function will run but produce no output anywhere."""
    # Implement a default case:
    if name is None:
        name = Chat._owner
    # Sanity check input dates, and fix if necessary (note MUST be one line to avoid reassignment before comparison):
    if ((start_date is not None) and (end_date is not None)):
        start_date, end_date = min(start_date, end_date), max(start_date, end_date)
    # If looking at graph with other users, get messages to and from:
    if name != Chat._owner:
            Thread = Chat[name]
            # If a start date given (which is after the message thread starts), use it:
            if start_date is None:
                d_min = Thread[0].date_time
            else:
                d_min = max(Chat._date_parse(start_date), Thread[0].date_time)
            # If an end date given (which is before the message thread ends), use it:
            if end_date is None:
                d_max = Thread[-1].date_time
            else:
                d_max = min(Chat._date_parse(end_date), Thread[-1].date_time)
            dates_from = [date2num(message.date_time) for message in Thread.by(name)]
            dates_to = [date2num(message.date_time) for message in Thread.by(Chat._owner)]
            label = [Chat._owner, name]
    # If looking at all messages sent; do things differently:
    else:
        message_list = Chat.all_messages()
        # If a start date given (which is after the message thread starts), use it:
        if start_date is None:
            d_min = message_list[0].date_time
        else:
            d_min = max(Chat._date_parse(start_date), message_list[0].date_time)
        # If an end date given (which is before the message thread ends), use it:
        if end_date is None:
            d_max = message_list[-1].date_time
        else:
            d_max = min(Chat._date_parse(end_date), message_list[-1].date_time)
        dates_from = [date2num(message.date_time) for message in message_list if message.author != Chat._owner]
        dates_to = [date2num(message.date_time) for message in message_list if message.author == Chat._owner]
        label = [Chat._owner, "Others"]
    # Divide up into month bins, changing datetime objects to number of days for plotting:
    bins = [date2num(b) for b in _month_list(d_min, d_max)]
    # Create the figure, hiding the display if no_gui set:
    if no_gui:
        plt.ioff()
    plt.figure(figsize=(18, 9), dpi=80)
    plt.hist([dates_to, dates_from], bins, histtype='bar', color=[_MY_COLOUR, _OTHER_COLOUR], label=label, stacked=True)
    # Title the graph correctly, and label axes:
    if name != Chat._owner:
        plt.suptitle("Messages with " + name, size=18)
    else:
        plt.suptitle("All Messages Sent", size=18)
    plt.ylabel("Number of Messages", labelpad=20, size=15)
    # Put the tick marks at the rough centre of months by adding 15 days (~ 1/2 a month):
    axes = plt.gca()
    axes.set_xticks([b + 15 for b in bins])
    # The x labels are unreadbale at angle if more than ~50 of them, put them vertical if so:
    if len(bins) > 45:
        plt.xticks(rotation='vertical')
    else:
        plt.xticks(rotation=30, ha='right')
    # Change the tick marks from useless number of days, to recognisable dates:
    axes.xaxis.set_major_formatter(ticker.FuncFormatter(lambda numdate, _: num2date(numdate).strftime('%b %Y')))
    # Add some space at either end of the graph (axis in number of days, so -10 days and +5 days):
    plt.xlim([bins[0] - 10, bins[-1] + 5])
    # Place y gridlines beneath the plot:
    axes.yaxis.grid(True)
    axes.set_axisbelow(True)
    # Hide unnecessary borders and tickmarks:
    axes.spines['right'].set_visible(False)
    axes.spines['top'].set_visible(False)
    axes.yaxis.set_ticks_position('left')
    plt.tick_params(axis='x', which='both', bottom=False, top=False)
    # Add the legend at the top, underneath the title but outside the figure:
    plt.legend(frameon=False, bbox_to_anchor=(0.5, 1.05), loc=9, ncol=2, borderaxespad=0)
    # If given a filename, output to file:
    if ((filename is not None) and (type(filename) is str)):
        plt.savefig(filename, bbox_inches='tight')
    else:
        plt.show()


 # ====== Pie Chart of Totals:


 # Colours from http://www.mulinblog.com/a-color-palette-optimized-for-data-visualization/
 _COLOURS = ['#5DA5DA', '#FAA43A', '#60BD68', '#F17CB0', '#B2912F', '#B276B2', '#DECF3F', '#F15854']


 def _make_labels_wrap(labels):
    """Break labels which contain more than one name into multiple lines."""
    for i, l in enumerate(labels):
        if len(l) > 25:
            # Split lines at ", " and rejoin with newline.
            labels[i] = '\n'.join(l.split(", "))
    return labels


 def messages_pie_chart(Chat, N=10, filename=None, count_type="total", groups=False,
                       no_gui=False, percentages=True):
    """Create a pie chart of the number of messages exchanged with friends.

       The graph shows the most messaged friends sorted using the top_n_people()
       code. The graph also shows percentage sizes of wedges, though this can be disabled.
        - 'Chat' should be the Chat object to analyse.
        - 'N' should be how many people to show explicitly; all others are grouped
          together in a final chunk.
        - If a 'filename' is specified, output to file as well as displaying
          onscreen for viewing.
        - The 'count_type' argument is passed to top_n_people() and so one of the
          four valid counts can be used.
        - Setting 'groups' to True will include message threads with groups where
          appropriate.
        - To run without displaying a graph onscreen, set 'no_gui' to True. If no filename
          is specified with this, the function will run but produce no output anywhere.
        - The percentages on the graph can be removed by setting 'percentages' to
          False."""
    # The title of the graph depends on the count_type:
    _title_dict = {"total": "Total Lengths of Message Threads",
                   "allfrom": "Total Number of Messages Received",
                   "from": "Number of Messages Received from People in Personal Threads",
                   "to": "Number of Messages Sent to People in Personal Threads",
                   "words": "Total Word Counts of Message Threads", "wordsfrom": "Word Count of All Messages Received from People in Personal Threads",
                   "wordsto": "Word Count of All Messages Sent to People in Personal Threads",
                   "chars": "Total Character Lengths of Message Threads",
                   "charsfrom": "Character Length of All Messages Received from People in Personal Threads",
                   "charsto": "Character Length of All Messages Sent to People in Personal Threads"}
    # The data to plot:
    thread_counts = top_n_people(Chat, count_type=count_type, groups=groups)
    # Set up useful lists and counts:
    names = []
    counts = []
    other_count = 0
    colours = []
    # Run through the data, adding it to the correct list. If not in N, add to Other:
    for n, t in enumerate(thread_counts):
        if n < N:
            names.append(t[0])
            counts.append(t[1])
            colours.append(_COLOURS[n % len(_COLOURS)])
        else:
            other_count += t[1]
    # Add an "Others" section in dark grey using the other_count:
    names.append("Others")
    counts.append(other_count)
    colours.append('#4D4D4D')
    # If long names, wrap them:
    _make_labels_wrap(names)
    # Create the figure, hiding the display if no_gui set:
    if no_gui:
        plt.ioff()
    plt.figure(figsize=(18, 9), dpi=80)
    # We want the edges of the wedges in the chart to be white for aesthetics:
    plt.rcParams['patch.edgecolor'] = 'white'
    # Plot percentage counts on the figure:
    if percentages:
        pct = '%1.1f%%'
    else:
        pct = None
    # Make the plot, starting at the top (90 degrees from horizontal) and percentages outside (pctdist > 1)
    plt.pie(counts, colors=colours, autopct=pct, pctdistance=1.1, startangle=90, counterclock=False)
    # Put the right title on the graph:
    plt.suptitle(_title_dict[count_type], size=18)
    # And make it circular:
    plt.axis('equal')
    # Add the legend:
    plt.legend(labels=names, frameon=False, labelspacing=1, loc="center", bbox_to_anchor=[0, 0.5])
    # If given a filename, output to file:
    if ((filename is not None) and (type(filename) is str)):
        plt.savefig(filename, bbox_inches='tight')
    else:
        plt.show()
    # To get white outlines, we changed default. Fix this:
    plt.rcParams['patch.edgecolor'] = _TEXT_COLOUR


 # =============================================================================
 #                           Word Frequency Analysis                           #
 #                                                                             #
 # Public Functions:                                                           #
 #  - top_word_use(Chat, name, from_me, ignore_single_words)                   #
 #                                                                             #
 # =============================================================================


 def _str_to_word_list(text):
    """Turn a string into a list of words, removing URLs and punctuation.

       - The function takes in a string and returns a list of strings."""
    # Some characters and strings need deleting from messages to separate them into proper words:
    _EXCLUDE = ["'s", "'ll", ".", ",", ":", ";", "!", "?", "*", '"', "-", "+", "^", "_", "~", "(", ")", "[", "]", "/", "\\", "@", "="]
    # Some things need removing, but not deleting as with _EXCLUDE:
    _CHANGE = {"'": "", ":p": "tongueoutsmiley", ":-p": "tongueoutsmiley",
               ":)": "happyfacesmiley", ":-)": "happyfacesmiley", ":/": "awkwardfacesmiley",
               ":-/": "awkwardfacesmiley", "<3": "loveheartsmiley", ":(": "sadfacesmiley",
               ":-(": "sadfacesmiley", ":'(": "cryingfacesmiley", ":d": "grinningfacesmiley",
               ":-d": "grinningfacesmiley", ";)": "winkfacesmiley", ";-)": "winkfacesmiley",
               ":o": "shockedfacesmiley"}
    # Remove URLs with a regular expression, else they mess up when removing punctuation:
    text = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', text)
    # Remove the NEWLINE denoting string, and replace with a space before anything else:
    text = text.replace("<|NEWLINE|>", " ")
    text = text.lower()
    # Change and exclude things:
    for old, new in _CHANGE.items():
        text = text.replace(old, new)
    for ex in _EXCLUDE:
        text = text.replace(ex, " ")
    # A hack to replace all whitespace with one space:
    text = " ".join(text.split())
    # Get rid of non-ASCII characters for simplicity
    text = text.encode('ascii', 'replace')
    # Return a list of words:
    return text.split()


 def _message_list_word_list(messages):
    """Take a list of Message objects and return a list of strings.

       The returned list of strings contains all of the words in the messages."""
    word_list = []
    for m in messages:
        word_list.extend(_str_to_word_list(m.text))
    return word_list


 def _word_list_to_freq(words, ignore_single_words=False):
    """Take a list of strings, and return a list of (word, word_use_count).

       - The returned list of pairs is sorted in descending order.
       - Passing 'ignore_single_words' will remove any words only used once in
         a message thread."""
    # The order of items in the CHANGE dictionary means changing back isn't quite so simple; just use a second dictionary:
    _CHANGE_BACK = {"tongueoutsmiley": ":P", "happyfacesmiley": ":)", "awkwardfacesmiley": ":/",
                    "loveheartsmiley": "<3", "sadfacesmiley": ":(", "cryingfacesmiley": ":'(",
                    "grinningfacesmiley": ":D", "winkfacesmiley": ";)", "shockedfacesmiley": ":o"}
    # Make a dictionary of words and their total count:
    freq = {x: words.count(x) for x in words}
    # Change the emoticons back to emoticons:
    for new, old in _CHANGE_BACK.items():
        if new in freq:
            freq[old] = freq.pop(new)
    # Convert to a list and sort:
    freq = sorted(freq.items(), key=lambda tup: tup[1], reverse=True)
    # If only want words used more than once, remove those with count <= 1
    if ignore_single_words:
        freq = [f for f in freq if f[1] > 1]
    return freq


 def top_word_use(Chat, name, from_me=False, ignore_single_words=False):
    """Work out the most commonly used words by a friend.

       The function returns a list of (word, word_use_count) tuples. For long threads,
       THIS FUNCTION WILL TAKE A VERY LONG TIME, due to the analysis being done
       directly in Python, not in a module using the faster C or C++.

       - 'name' is a string of the name of the Thread to consider.
       - 'from_me' is a boolean flag to consider messages sent by you to 'name'
         if True, otherwise messages received from 'name' are used, the default.
       - Setting 'ignore_single_words' to True removes words which are only used
         once, which reduces the length of the list returned."""
    if name != Chat._owner:
        if from_me:
            messages = Chat[name].by(Chat._owner)
        else:
            messages = Chat[name].by(name)
    else:
        messages = Chat.all_from(Chat._owner)
    wlist = _message_list_word_list(messages)
    freq = _word_list_to_freq(wlist, ignore_single_words)
    return freq
diff --git a/facebook_parser_demo.py b/facebook_parser_demo.py
 # Copyright (c) 2018 James Sharkey (https://github.com/jsharkey13/facebook_message_parser)
 # Copyright (c) 2015 Chris Copley (https://github.com/CopOnTheRun/FB-Message-Parser)
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.


 import csv
 import datetime
 import json
 import zipfile
 from io import StringIO

 import pytz
 import tzlocal
 import dateutil.tz

 import facebook_analysis_demo

 # User Configuration:
 FACEBOOK_ZIPFILE = "facebook-username.zip"
 LOCAL_TIMEZONE = "Europe/London"
 MY_FACEBOOK_NAME = "My Name Here"
 UNKNOWN_USER_MAP = {
    "DemoThreadId": "Demo Replacement Name"
 }

 # Useful constants:
 MESSAGE_TYPE = "Facebook"
 ZIP_MESSAGE_DIR = "messages/"
 THREAD_ID_TEMPLATE = "FACEBOOK_{0:s}"
 DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
 FACEBOOK_USER = "Facebook User"
 local_timezone = dateutil.tz.gettz(LOCAL_TIMEZONE)


 # Some old classes that will be useful for analysis code:
 class Message(object):
    """An object to encapsulate a Message.

        - Contains a string of the author's name, the timestamp, number in the thread
          and the body of the message.
        - When initialising, 'thread_id' should be the containing Thread's ID,
          'author' should be string containing the message sender's name, 'date_time'
          should be a datetime.datetime object, 'text' should be the content of
          the message and 'number' should be the number of the message in the thread."""

    @staticmethod
    def _date_parse(date):
        """Allow dates to be entered as integer tuples (YYYY, MM, DD[, HH, MM, SS]).

           Removes the need to supply datetime objects, but still allows dates
           to be entered as datetime.datetime objects. The Year, Month and
           Day are compulsory, the Hours and Minutes optional. May cause exceptions
           if poorly formatted tuples are used."""
        if isinstance(date, datetime.datetime):
            return date.astimezone(pytz.utc)
        else:
            local_tz = tzlocal.get_localzone()
            return local_tz.localize(datetime.datetime(*date)).astimezone(pytz.utc)

    @staticmethod
    def _csv_header_row():
        return ["timestamp", "thread_id", "message_number", "author", "message"]

    def __init__(self, *, source, thread_id, author, date_time, text, number, attachments=None):
        self.source = source
        self.thread_id = thread_id
        self.author = author
        self.date_time = date_time.astimezone(local_timezone)
        self.text = text
        if isinstance(attachments, (list, set)):
            self.attachments = attachments
        else:
            self.attachments = [attachments] if attachments is not None else []
        self._num = number

    def __repr__(self):
        """Set Python's representation of the Message object."""
        return "<MESSAGE: THREAD='{}' NUMBER='{}' TIMESTAMP='{}' AUTHOR='{}' MESSAGE='{}'>". \
            format(self.thread_id, self._num, self.date_time, self.author, self.as_text())

    def __str__(self):
        """Return a string form of a Message."""
        return repr(self)

    def __lt__(self, message):
        """Allow sorting of messages by implementing the less than operator.

           Sorting is by date, unless two messages were sent at the same time,
           in which case message number is used to resolve conflicts. This number
           ordering holds fine for messages in single threads, but offers no real
           objective order outside a thread."""
        if self.date_time == message.date_time:
            return self._num < message._num
        else:
            return self.sent_before(message.date_time)

    def __gt__(self, message):
        """Allow sorting of messages by implementing the greater than operator.

           Sorting is by date, unless two messages were sent at the same time,
           in which case message number is used to resolve conflicts. This number
           ordering holds fine for messages in single threads, but offers no real
           objective order outside a thread."""
        if self.date_time == message.date_time:
            return self._num > message._num
        else:
            return self.sent_after(message.date_time)

    def __eq__(self, message):
        """Messages are equal if their number, date, author and text are the same."""
        equal = (self._num == message._num) and (self.author == message.author)
        equal = equal and (self.date_time == message.date_time) and (self.text == message.text)
        return equal

    def __len__(self):
        """Return the number of characters in the message body."""
        if self.text is not None:
            return len(self.text)
        return 0

    def is_empty_message(self):
        """Return True if the message contains no content."""
        return ((self.text is None) or (self.text == "")) and len(self.attachments) == 0

    def as_text(self):
        """Return a text representation of the message, including any attachments."""
        text_form = ""
        if self.text is not None:
            text_form += self.text.replace("\n", "\\n")
        for r in self.attachments:
            text_form += "\\n - {0}".format(r)
        return text_form

    def sent_by(self, name):
        """Return True if the message was sent by 'name'."""
        return self.author == name

    def sent_before(self, date):
        """Return True if the message was sent before the date specified.

           The 'date' can be a datetime.datetime object, or a three or five tuple
           (YYYY, MM, DD[, HH, MM])."""
        date = self._date_parse(date)
        return self.date_time < date

    def sent_after(self, date):
        """Return True if the message was sent after the date specified.

           The 'date' can be a datetime.datetime object, or a three or five tuple
           (YYYY, MM, DD[, HH, MM])."""
        date = self._date_parse(date)
        return self.date_time > date

    def sent_between(self, start, end=None):
        """Return True if the message was sent between the dates specified.

            - The 'start' and 'end' can be datetime.datetime objects, or
              a three or five tuple (YYYY, MM, DD[, HH, MM]). The start and end times
              are inclusive since this is simplest.
            - Not entering an 'end' date is interpreted as all messages sent on
              the day 'start'. Where a time is specified also, a 24 hour period
              beginning at 'start' is used."""
        start = self._date_parse(start)
        if end is not None:
            end = self._date_parse(end)
        else:
            end = start + datetime.timedelta(days=1)  # 1 day later than 'start'
        return start <= self.date_time <= end

    def contains(self, search_string, *, ignore_case=False):
        """Return True if 'search_string' is contained in the message text."""
        if ignore_case:
            return search_string.lower() in self.text.lower()
        else:
            return search_string in self.text

    def to_csv(self, filename=None, *, add_header=True, _file_handle=None):
        """Output the message as a comma-separated string

            - An optional header row is added by default, but can be turned off
              by setting 'add_header' to False.
            - Output is to file if 'filename' is provided, else the method returns
              a string of the message in CSV format.
            - An open file handle or object supporting the write(...) method can be
              provided as the '_file_handle' argument, but this is primarily internal."""
        if (filename is not None) or (_file_handle is not None):
            file_handle = _file_handle or open(filename, mode='w', encoding='utf-8')
        else:
            file_handle = StringIO()

        csv_writer = csv.writer(file_handle, quoting=csv.QUOTE_NONNUMERIC, lineterminator='\n')
        if add_header:
            csv_writer.writerow(self._csv_header_row())
        csv_writer.writerow([self.date_time, self.thread_id, self._num, self.author, self.as_text()])

        if filename is not None:
            file_handle.close()
        elif (filename is None) and (_file_handle is None):
            # Return a string:
            return file_handle.getvalue().strip("\r\n")


 class Thread(object):
    """An object to encapsulate a Message thread.

        - Contains a list of participants, a string form of the list and a list
          of messages in the thread as Message objects.
        - When initialising, 'people' should be a list of the names of the
          participants either comma-separated in a string or an actual list,
          and 'messages' should be a list of Message objects."""

    def __init__(self, *, _id, owner, people, messages, clean=True):
        if not all(isinstance(m, Message) for m in messages):
            raise TypeError("The message list must be Message objects!")
        self._id = str(_id)
        if isinstance(people, (list, set)):
            self.people = sorted(people)
        else:
            self.people = sorted(people.split(", "))
        self.people.remove(owner)
        self.people_str = ", ".join(self.people)
        self._owner = owner
        self.messages = sorted(messages)
        if clean:
            self._clean_messages(renumber=True)

    def __getitem__(self, key):
        """Allow accessing Message objects in the messages list using Thread[n].

           Beware out by one errors! The message numbers start counting at 1,
           but the list they are stored in is indexed from 0.
            - This behaviour could be corrected by either subtracting one from
              the key (which causes issues when slicing), or by counting messages
              from 0."""
        return self.messages[key]

    def __contains__(self, item):
        """Allow checking membership of the using Message in Thread."""
        if isinstance(item, Message):
            return item in self.messages
        else:
            return False

    def __repr__(self):
        """Set Python's representation of the Thread object."""
        return "<THREAD: ID='{}' PEOPLE='{}', MESSAGE_COUNT={}>".format(self._id, self.people_str, len(self.messages))

    def __str__(self):
        """Return a string summary of a Thread."""
        return repr(self)

    def __len__(self):
        """Return the total number of messages in the thread."""
        return len(self.messages)

    def _clean_messages(self, *, renumber=False):
        """Remove messages with no content from the thread.

           This is useful when empty messages are provided by an export
           and need to be removed systematically."""
        self.messages = [m for m in self.messages if not m.is_empty_message()]
        if renumber:
            self._renumber_messages()

    def _add_messages(self, new_messages):
        """Allow adding messages to an already created Thread object.

           This function is useful for merging duplicate threads together."""
        self.messages.extend(new_messages)
        self.messages = sorted(self.messages)

    def _renumber_messages(self):
        """Renumber all messages in the 'messages' list.

           Message objects are are sorted after being added; but if messages are
           added using _add_messages() then the numbering may be incorrect. This
           function fixes that."""
        i = 1
        for message in self.messages:
            message._num = i
            i += 1

    def rename_participant(self, old_name, new_name):
        """Change the name of a participant in the thread and all messages."""
        self.people = [p if not p == old_name else new_name for p in self.people]
        self.people_str = self.people_str.replace(old_name, new_name)
        for m in self.messages:
            if m.author == old_name:
                m.author = new_name

    def merge(self, thread, *, renumber=True):
        """Merge another thread with the same participants into this one.

           This is a stateful operation and modifies the original thread,
           leaving the 'thread' argument untouched. It returns False if
           the merge did not occur due to differing participants."""
        if not isinstance(thread, Thread) or set(self.people) != set(thread.people):
            return False

        self._add_messages(thread.messages)
        self._id = "{0:s}&{1:s}".format(self._id, thread._id)
        if renumber:
            self._renumber_messages()
        return True

    def by(self, name):
        """Return a date ordered list of all messages sent by 'name'.

           Returns a list of Message objects."""
        return [message for message in self.messages if message.sent_by(name)]

    def sent_before(self, date):
        """Return a date ordered list of all messages sent before specified date.

           The function returns a list of Message objects. The 'date' can be a
           datetime.datetime object, or a three or five tuple (YYYY, MM, DD[, HH, MM])."""
        return [message for message in self.messages if message.sent_before(date)]

    def sent_after(self, date):
        """Return a date ordered list of all messages sent after specified date.

           The list returned is a list of Message objects. The 'date' can be a
           datetime.datetime object, or a three or five tuple (YYYY, MM, DD[, HH, MM])."""
        return [message for message in self.messages if message.sent_after(date)]

    def sent_between(self, start, end=None):
        """Return a date ordered list of all messages sent between specified dates.

            - The list returned is a list of Message objects. The 'start' and 'end'
              can be datetime.datetime objects, or a three or five tuple
              (YYYY, MM, DD[, HH, MM]).
            - Not entering an 'end' date is interpreted as all messages sent on
              the day 'start'. Where a time is specified also, a 24 hour period
              beginning at 'start' is used."""
        return [message for message in self.messages if message.sent_between(start, end)]

    def search(self, string, *, ignore_case=False):
        """Return a date ordered list of messages in Thread containing 'string'.

           This function searches the current thread, and returns a list of Message
           objects.
            - The function can be made case-insensitive by setting 'ignore_case'
              to True."""
        return sorted([message for message in self.messages if message.contains(string, ignore_case=ignore_case)])

    def on(self, date):
        """Return the Thread object as it would have been on 'date'.

           The Thread object returned is a new object containing the subset of the
           messages sent before 'date'.
           - 'date' can be a datetime.datetime object, or a three or five tuple
              (YYYY, MM, DD[, HH, MM])."""
        return Thread(_id=self._id, people=self.people, messages=self.sent_before(date))

    def to_csv(self, filename=None, *, add_header=True, _file_handle=None):
        if (filename is not None) or (_file_handle is not None):
            # Either use existing file handle or open a new one:
            file_handle = _file_handle or open(filename, mode='w', encoding='utf-8')
            # Output the messages to file:
            for i, message in enumerate(self.messages):
                message.to_csv(add_header=(i == 0 and add_header), _file_handle=file_handle)
            # If opened a file here, then close it:
            if filename is not None:
                file_handle.close()
        elif len(self.messages) > 0:
            if len(self.messages) > 1000:
                print("There are more than 1000 messages, conversion to string may fail or crash Python!")
                pass
            return "{0}\n{1}".format(self.messages[0].to_csv(add_header=add_header),
                                     "\n".join([message.to_csv(add_header=False) for message in self.messages[1:]]))


 class Chat(object):
    """An object to encapsulate a group of Threads.

        - Contains a list of Thread objects, which can be accessed using item
          accessing Chat["Thread Name"] style.
        - When initialising, 'myname' should be the name of the user, and 'threads'
          should be a list of Thread objects.
        - Provides useful functions for accessing messages."""
    def __init__(self, *, owner, threads):
        if not all(isinstance(t, Thread) for t in threads):
            raise TypeError("The thread list must be Thread objects!")
        self._all_people = {owner}
        self.threads = []
        self._thread_dict = {}
        for thread in threads:
            self._add_new_thread(thread)
        self.threads = sorted(self.threads, key=len, reverse=True)
        self._owner = owner

    def __getitem__(self, key):
        """Allow accessing Thread objects in the list using Chat["Thread Name"].

           This method allows the threads list to be accessed using Chat["Thread Name"]
           or Chat[n] notation."""
        if type(key) is int:
            return self.threads[key]
        elif type(key) is str:
            return self._thread_dict[key]

    def __contains__(self, item):
        """Allow checking membership of the using "Thread Name" in Chat."""
        if isinstance(item, Thread):
            return item in self.threads
        elif isinstance(item, str):
            return item in self._thread_dict
        else:
            return False

    def __repr__(self):
        """Set Python's representation of the Chat object."""
        return "<CHAT LOG: OWNER='{:s}' TOTAL_THREADS={:d} TOTAL_MESSAGES={:d}>".format(self._owner, len(self.threads), self.count_messages())

    def __len__(self):
        """Return the total number of threads.

           Allows the len() method to be called on a Chat object. This could be
           changed to be the total number of messages, currently stored as
           Chat._total_messages()"""
        return len(self.threads)

    def _add_new_thread(self, thread):
        """Add a new thread to the chat object.

            The thread will be merged with any existing thread of the same name
             if 'clean' is True"""
        thread_key = thread.people_str
        # FIXME: Only one of the same named threads is accessible by name!
        self.threads.append(thread)
        self._thread_dict[thread_key] = thread
        self._all_people.update(thread.people)

    def merge(self, chat):
        """Merge another chat into this one, merging threads if 'clean' is True.

           The threads must be owned by the same person to be merged."""
        if not isinstance(chat, Chat) or chat._owner != chat._owner:
            print("Invalid chat object provided!")
            return False
        for thread in chat.threads:
            self._add_new_thread(thread)
        self.threads = sorted(self.threads, key=len, reverse=True)

    def rename_thread(self, old_name, new_name, *, rename_participant=True):
        """Rename a thread, and optionally rename the participant too."""
        thread = self._thread_dict.pop(old_name)
        if rename_participant:
            thread.rename_participant(old_name, new_name)
        self._thread_dict[new_name] = thread

    def count_messages(self):
        """Count the total number messages.

           Since Thread objects can be extended dynamically, this may prove
           necessary."""
        return sum([len(thread) for thread in self.threads])

    def all_messages(self):
        """Return a date ordered list of all messages.

           The list is all messages contained in the Chat object, as a list of
           Message objects."""
        return sorted([message for thread in self.threads for message in thread.messages])

    def all_from(self, name):
        """Return a date ordered list of all messages sent by 'name'.

           The list returned is a list of Message objects. This is distinct from
           Thread.by(name) since all threads are searched by this method. For all
           messages in one thread from 'name', use Thread.by(name) on the correct Thread."""
        return sorted([message for thread in self.threads for message in thread.by(name)])

    def sent_before(self, date):
        """Return a date ordered list of all messages sent before specified date.

           The function returns a list of Message objects. The 'date' can be a
           datetime.datetime object, or a three or five tuple (YYYY, MM, DD[, HH, MM])."""
        return sorted([message for thread in self.threads for message in thread.sent_before(date)])

    def sent_after(self, date):
        """Return a date ordered list of all messages sent after specified date.

           The list returned is a list of Message objects. The 'date' can be a
           datetime.datetime object, or a three or five tuple (YYYY, MM, DD[, HH, MM])."""
        return sorted([message for thread in self.threads for message in thread.sent_after(date)])

    def sent_between(self, start, end=None):
        """Return a date ordered list of all messages sent between specified dates.

            - The list returned is a list of Message objects. The 'start' and 'end'
              can be datetime.datetime objects, or a three or five tuple
              (YYYY, MM, DD[, HH, MM]).
            - Not entering an 'end' date is interpreted as all messages sent on
              the day 'start'. Where a time is specified also, a 24 hour period
              beginning at 'start' is used."""
        return sorted([message for thread in self.threads for message in thread.sent_between(start, end)])

    def search(self, string, *, ignore_case=False):
        """Return a date ordered list of all messages containing 'string'.

           This function searches in all threads, and returns a list of Message
           objects.
            - The function can be made case-insensitive by setting 'ignore_case'
              to True."""
        return sorted([message for thread in self.threads for message in thread.search(string, ignore_case=ignore_case)])

    def on(self, date):
        """Return the Chat object as it would have been on 'date'.

           The Chat object returned is a new object containing the subset of the
           Threads which contain messages sent before 'date', where each of these
           Threads is a new Thread with only these messages in.
           - 'date' can be a datetime.datetime object, or a three or five tuple
              (YYYY, MM, DD[, HH, MM])."""
        threads_on = [t.on(date) for t in self.threads if len(t.on(date)) > 0]
        return Chat(owner=self._owner, threads=threads_on)

    def to_csv(self, filename=None, *, add_header=True, _file_handle=None):
        if (filename is not None) or (_file_handle is not None):
            print("Writing messages to CSV file '{0:s}'.".format(filename or _file_handle.name))
            # Either use existing file handle or open a new one:
            file_handle = _file_handle or open(filename, mode='w', encoding='utf-8')
            # Output the messages to file:
            for i, thread in enumerate(self.threads):
                thread.to_csv(add_header=(i == 0 and add_header), _file_handle=file_handle)
            # If opened a file here, then close it:
            if filename is not None:
                file_handle.close()
        elif self.count_messages() > 0:
            if self.count_messages() > 1000:
                print("There are more than 1000 messages, conversion to string may fail or crash Python!")
                pass
            return "{0}\n{1}".format(self.threads[0].to_csv(add_header=add_header), "\n".join([thread.to_csv(add_header=False) for thread in self.threads[1:]]))


 # Some useful functions:
 def canonicalise_facebook_name(name):
    return "fb_{0:s}".format(name.replace(" ", "-").lower())


 def fix_participant(participant, thread_id, unknown_user_map):
    global FACEBOOK_USER
    fixed_participant = participant or FACEBOOK_USER
    thread_key = thread_id.replace("FACEBOOK_", "")
    if fixed_participant == FACEBOOK_USER:
        if thread_key in unknown_user_map:
            return unknown_user_map[thread_key]
        else:
            # This user is still unknown, print a message suggesting adding an entry to the
            # map that fixes up unknown users. Of course, Facebook merge all deleted/removed
            # users together, so this may really be several people not just one . . .
            print(" - Unknown user in thread: '{0:s}'.".format(thread_id))
            print("       Add   \"{0:s}\": \"Real Name\"   to the UNKNOWN_USER_MAP to remove this warning.".format(thread_key))
            UNKNOWN_USER_MAP[thread_key] = fixed_participant
    return fixed_participant


 def fix_participants(participants, thread_id, unknown_user_map):
    return [fix_participant(p, thread_id, unknown_user_map) for p in participants]


 # Open the zip file:
 print("Opening zipfile.")
 zip_archive = zipfile.ZipFile(FACEBOOK_ZIPFILE)
 message_files = [f for f in zip_archive.namelist() if f.startswith(ZIP_MESSAGE_DIR) and f.endswith(".json")]

 # Process the messages:
 print("Processing message threads.")
 thread_list = []
 for message_file in message_files:
    message_json = json.loads(json.dumps(json.load(zip_archive.open(message_file)), ensure_ascii=False).encode("latin-1").decode("utf-8"))

    if not message_json.get("is_still_participant"):
        # Skip message threads where we have opted to leave them.
        continue

    thread_id = THREAD_ID_TEMPLATE.format(message_json.get("thread_path"))

    participants = [p["name"] for p in message_json.get("participants", [])]
    participants = fix_participants(participants, thread_id, UNKNOWN_USER_MAP)

    message_list = []
    for n, message in enumerate(message_json.get("messages", [])[::-1]):  # Inline list reversal into ascending order!

        if message.get("content") is None:
            # Skip blank messages.
            continue

        message_text = message.get("content")
        message_time = datetime.datetime.fromtimestamp(message["timestamp_ms"] / 1000, local_timezone).astimezone(pytz.utc)

        author_name = fix_participant(message.get("sender_name"), thread_id, UNKNOWN_USER_MAP)
        if author_name not in participants:
            # Because not listing people who left the conversation is so helpful!
            participants.append(author_name)

        message_list.append(Message(source=MESSAGE_TYPE, thread_id=thread_id, author=author_name, date_time=message_time, text=message_text, number=n))

    thread_list.append(Thread(_id=thread_id, owner=MY_FACEBOOK_NAME, people=participants, messages=message_list))

 chat = Chat(owner=MY_FACEBOOK_NAME, threads=thread_list)
 print("Created 'chat' object.")

 # Close files:
 zip_archive.close()

 # Do some stuff with the "chat" object:
 print(chat)

 print(facebook_analysis_demo.top_n_people(chat, 15))

 facebook_analysis_demo.messages_date_graph(chat)