Last active
June 11, 2019 21:01
-
-
Save sushain97/0d3a235c6f80d1d74f80 to your computer and use it in GitHub Desktop.
Downloads, archives, analyzes and plots Facebook Messenger conversations (individual and group)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
__author__ = 'Sushain K. Cherivirala' | |
import argparse | |
import cmd | |
import collections | |
import contextlib | |
import copy | |
import datetime | |
import functools | |
import getpass | |
import glob | |
import gzip | |
import http.cookiejar | |
import importlib.util | |
import itertools | |
import json | |
import logging | |
import math | |
import operator | |
import os | |
import pathlib | |
import pprint | |
import re | |
import secrets | |
import shlex | |
import shutil | |
import statistics | |
import subprocess | |
import sys | |
import tempfile | |
import textwrap | |
import typing | |
import urllib | |
import urllib.parse | |
import lxml.html | |
import numpy # statistics doesn't provide a weighted average function (yet) | |
# Postgres, while far less portable/lightweight, has far better support for JSON than SQLite | |
# and Facebook's thread info responses change far too quickly to keep up with. The --bare | |
# option was added in order to somewhat compensate for this (very) breaking change. | |
psycopg2_installed = importlib.util.find_spec('psycopg2') | |
if psycopg2_installed: | |
import psycopg2 | |
import psycopg2.extensions | |
import psycopg2.extras | |
else: | |
logging.warning('Failed to import psycopg2, only bare mode supported (no database).') | |
psycopg2_polyfill = collections.namedtuple('pyscopg2', ['extensions', 'extras']) | |
psycopg2_extensions_polyfill = collections.namedtuple('psycopg2_extensions', ['cursor', 'connection']) | |
psycopg2 = psycopg2_polyfill(extensions=psycopg2_extensions_polyfill(None, None), extras=None) | |
if importlib.util.find_spec('matplotlib'): | |
import matplotlib | |
import matplotlib.pyplot as plot | |
else: | |
logging.warning('Failed to import matplotlib, plotting will not be available.') | |
matplotlib = plot = None | |
if importlib.util.find_spec('wordcloud'): | |
import wordcloud | |
else: | |
logging.warning('Failed to import wordcloud, word clouds will not be available.') | |
wordcloud = None | |
try: | |
import selenium # noqa: F401 | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.common.action_chains import ActionChains | |
except ImportError: | |
logging.warning('Failed to import selenium, browser authentication will not be available.') | |
webdriver = None | |
############# | |
# Constants # | |
############# | |
id_to_user_map = { | |
'777993547': 'Jean', | |
'1104767593': 'Annie', | |
'1253817276': 'Alexander', | |
'1311112684': 'Keerthana', | |
'1333603699': 'Saloni', | |
'1338262658': 'Sushain', | |
'1412264090': 'Michelle H.', | |
'1626215140': 'Maxine', | |
'1694710481': 'Sameer', | |
'1814644642': 'Devin', | |
'1841753743': 'Christina', | |
'100000241495175': 'Eric', | |
'100000284933876': 'Ani', | |
'100000534453859': 'Ashley', | |
'100000986269083': 'Prachi', | |
'100001184052364': 'Shreya', | |
'100002398916527': 'Amisha', | |
'100002421417870': 'Vijay', | |
'100002475584038': 'Ben', | |
'100002576434633': 'Snigdha', | |
'100002628181062': 'Pallavi', | |
'100002827417675': 'Rohan', | |
'100002878482600': 'Tiffany', | |
'100003127069904': 'Tiffany Do', | |
'100003355055997': 'Karen', | |
'100003971632140': 'Sara', | |
'100004252695708': 'Michelle N.', | |
'100004322110944': 'Rowena', | |
'100004476751719': 'Benji', | |
'100006790448156': 'Serena', | |
'100009196845865': 'Brittany', | |
'100012341320095': 'Spoorthi', | |
'100012529272199': 'Nikki', | |
'100025545846385': 'Brittany 2', | |
} | |
groups = [ | |
494248544089735, 1513200892278424, 322604171221575, 1021123947914529, | |
879550675408978, 940010492714431, 1700273163527834, 1097674336985252, | |
888706481258668, 851545464945488, 1378878545487465, | |
] | |
profanity = [ | |
r'\bfuck+(?:ing|ed|er)?\b', r'\b(?:dip)?shit+(?:ty+)?\b', r'\bdamn(?:it+)?\b', | |
r'\bgoddamn\b', r'\bdick\b', r'\bbullshit+\b', r'\bbastard\b', r'\bhell+\b', | |
r'\bbitch(?:ass)?\b', r'\bass+\b', r'\ba(?:ss)?hole\b', r'\bmotherfuck+(?:ing|ed|er)\b', | |
] | |
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36' | |
########### | |
# Globals # | |
########### | |
Session = collections.namedtuple('Session', ['username', 'id', 'opener', 'dtsg']) | |
session = Session(None, None, None, None) | |
postgres_connection_string = None | |
######## | |
# Code # | |
######## | |
def id_to_user(userid: typing.Union[str, int]) -> str: | |
userid = str(userid).replace('fbid:', '') | |
if userid in id_to_user_map: | |
return id_to_user_map[userid] | |
else: | |
return 'Unknown_' + str(hash(str(userid))).replace('-', '')[:4] | |
@functools.lru_cache(maxsize=None) | |
def user_to_id(user: str) -> typing.Optional[str]: | |
return {user.lower(): id for id, user in id_to_user_map.items()}.get(user.lower()) | |
def init_db() -> None: | |
conn = psycopg2.connect(postgres_connection_string) | |
cursor = conn.cursor() | |
cursor.execute(""" | |
CREATE TABLE threads ( | |
id VARCHAR(100) PRIMARY KEY, | |
info JSONB | |
) | |
""") | |
cursor.execute(""" | |
CREATE TABLE messages ( | |
id VARCHAR(100) PRIMARY KEY, | |
thread_id VARCHAR(100) REFERENCES threads(id) NOT NULL, | |
info JSONB | |
) | |
""") | |
cursor.execute("CREATE INDEX ON messages(((info->>'timestamp_precise')::TIMESTAMP))") | |
cursor.execute("CREATE INDEX ON messages((info->'message_sender'->>'id'))") | |
conn.commit() | |
conn.close() | |
def insert_messages( | |
cursor: psycopg2.extensions.cursor, | |
messages: typing.List[typing.Dict[str, typing.Any]], | |
thread_info: typing.Dict[str, typing.Any], | |
) -> None: | |
thread_key = thread_info['thread_key'] | |
thread_id = int(thread_key['thread_fbid'] or thread_key['other_user_id']) | |
cursor.execute(""" | |
INSERT INTO threads (id, info) | |
VALUES (%s, %s) | |
ON CONFLICT (id) DO UPDATE | |
SET info = EXCLUDED.info | |
""", (thread_id, json.dumps(thread_info))) | |
messages_values = [(msg['message_id'], thread_id, json.dumps(msg)) for msg in messages] | |
psycopg2.extras.execute_values(cursor, """ | |
INSERT INTO messages (id, thread_id, info) | |
VALUES %s | |
ON CONFLICT (id) DO UPDATE | |
SET info = EXCLUDED.info | |
""", messages_values) | |
def dump_db(path: str, database_name: str) -> None: | |
logging.info('Starting database dump...') | |
with tempfile.NamedTemporaryFile(mode='w+') as db: | |
with subprocess.Popen(['pg_dump', database_name, '--verbose'], stdout=db, stderr=subprocess.PIPE, universal_newlines=True) as proc: | |
for line in proc.stderr: | |
logging.debug(line.rstrip()) | |
logging.info(f'Completed dumping the database ({os.path.getsize(db.name) / 1024 ** 2:.2f} MB).') | |
with gzip.open(path, 'wb') as db_compressed: | |
with open(db.name, 'rb') as db: | |
db_compressed.writelines(db) | |
logging.info(f'Completed compressing the database ({os.path.getsize(path) / 1024 ** 2:.2f} MB).') | |
def load_db(path: str, database_name: str) -> None: | |
args = shlex.split(f'psql --set ON_ERROR_STOP=on --single-transaction {database_name}') | |
subprocess.run(args, stdin=gzip.open(path), check=True) | |
def get_new_messages( | |
conversation_id: typing.Union[str, int], | |
group: bool = False, | |
old_messages: typing.Optional[typing.List[typing.Dict[str, typing.Any]]] = None, | |
limit: int = 2000, | |
) -> typing.Tuple[typing.Dict[str, typing.Any], typing.List[typing.Dict[str, typing.Any]]]: | |
global session | |
if not (session.opener and session.dtsg and session.id): | |
session = login() | |
opener, dtsg = session.opener, session.dtsg | |
logging.info('Fetching new messages.') | |
if old_messages: | |
newest_message_date = datetime.datetime.fromtimestamp(old_messages[-1]['timestamp_precise'] / 1e3) | |
start_time = datetime.datetime.now() | |
new_messages: typing.List[typing.Dict[str, typing.Any]] = [] | |
info = None | |
before = int(datetime.datetime.now().timestamp() * 1e3) | |
failures = 0 | |
while True: | |
# TODO: Parallelize downloads (multiple queries via graphql?) | |
try: | |
data = { | |
'batch_name': 'MessengerGraphQLThreadFetcher', | |
'__req': 'vw', | |
'__rev': 4419169, | |
'__pc': 'PHASED:messengerdotcom_pkg', | |
'__user': session.id, | |
'fb_dtsg': dtsg, | |
'queries': json.dumps({ | |
'o0': { | |
'doc_id': 2463930623632059, | |
'query_params': { | |
'id': str(conversation_id), | |
'message_limit': limit, | |
'load_messages': True, | |
'load_read_receipts': False, | |
'load_delivery_receipts': False, | |
'before': before, | |
}, | |
}, | |
}), | |
} | |
with opener.open('https://www.messenger.com/api/graphqlbatch/', data=urllib.parse.urlencode(data).encode()) as response: | |
data = json.loads(response.read().decode().splitlines()[0])['o0']['data']['message_thread'] | |
data.pop('last_message', None) | |
new_messages_batch = data.pop('messages')['nodes'] | |
info = data | |
before = int(new_messages_batch[0]['timestamp_precise']) - 1 | |
except Exception as err: | |
failures += 1 | |
logging.warning(f'Failed to fetch messages before {before} with limit {limit} (failure #{failures}): {err}.') | |
if failures > 2: | |
logging.info(f'Changing limit from {limit} to {limit // 2}.') | |
limit //= 2 | |
failures = 0 | |
if limit < 10: | |
logging.error(f'Giving up after fetching {len(new_messages)} messages.') | |
raise err | |
continue | |
failures = 0 | |
new_messages = new_messages_batch + new_messages | |
oldest_message_date = datetime.datetime.fromtimestamp(before / 1e3) | |
if len(new_messages_batch) < limit or (old_messages and oldest_message_date < newest_message_date): | |
logging.info(f'Completed fetching {len(new_messages)} messages in conversation {conversation_id}.') | |
break | |
else: | |
logging.info(f'Fetched {limit} messages before {oldest_message_date}, fetched {len(new_messages)} messages so far.') | |
if old_messages: | |
new_messages = list(filter(lambda x: datetime.datetime.fromtimestamp(int(x['timestamp_precise']) / 1e3) > newest_message_date, new_messages)) | |
logging.info(f'Added {len(new_messages)} messages to existing {len(old_messages)} messages ' | |
f'for a total of {len(new_messages) + len(old_messages)}.') | |
logging.info(f'The data retrieval took {datetime.datetime.now() - start_time} seconds.') | |
return info, new_messages | |
def get_messages( | |
cursor: psycopg2.extensions.cursor, | |
conversation_id: str, | |
query: typing.Optional[str] = None, | |
regular_expression: bool = False, | |
case_sensitive: bool = False, | |
) -> typing.List[typing.Dict[str, typing.Any]]: | |
if query and not regular_expression: | |
if case_sensitive: | |
cursor.execute( | |
""" | |
SELECT id, info->'message_sender'->>'id', info->>'timestamp_precise', info->'message'->>'text', info->'sticker' | |
FROM messages | |
WHERE thread_id = %s AND info->'message'->>'text' LIKE %s | |
AsyncIterator ORDER BY info->>'timestamp_precise' | |
""", | |
(conversation_id, f'%{query}%'), | |
) | |
else: | |
cursor.execute( | |
""" | |
SELECT id, info->'message_sender'->>'id', info->>'timestamp_precise', info->'message'->>'text', info->'sticker' | |
FROM messages | |
WHERE thread_id = %s AND LOWER(info->'message'->>'text') LIKE %s | |
ORDER BY info->>'timestamp_precise' | |
""", | |
(conversation_id, f'%{query.lower()}%'), | |
) | |
else: | |
cursor.execute( | |
""" | |
SELECT id, info->'message_sender'->>'id', info->>'timestamp_precise', info->'message'->>'text', info->'sticker' | |
FROM messages | |
WHERE thread_id = %s | |
ORDER BY info->>'timestamp_precise' | |
""", | |
(conversation_id, ), | |
) | |
messages = cursor.fetchall() | |
if query and regular_expression: # TODO: use Postgres' built-in regex handling | |
regex = re.compile(query, flags=(0 if case_sensitive else re.IGNORECASE)) | |
messages = list(filter(lambda x: x[2] and bool(regex.search(x[2])), messages)) | |
# TODO: consider psycopg2's DictCursor | |
return [ | |
{ | |
'id': thread_id, | |
'author': author, | |
'timestamp': int(timestamp), | |
'body': body, | |
'sticker': sticker, | |
} for [thread_id, author, timestamp, body, sticker] in messages | |
] | |
def all_days_span(oldest: datetime.datetime, newest: datetime.datetime) -> typing.List[datetime.date]: | |
start_date = oldest.date() | |
return [start_date + datetime.timedelta(days=i) for i in range((newest.date() - start_date).days + 1)] | |
def termgraph(data: typing.Sequence[typing.Tuple[str, int]], buffer: int = 10) -> None: | |
columns = shutil.get_terminal_size((80, 20)).columns | |
x_padding = min(len(max(data, key=lambda x: len(x[0]))[0]), columns // 5) | |
max_y = max(data, key=operator.itemgetter(1))[1] | |
width = columns - x_padding - len(str(max_y)) - buffer | |
step = max_y / width | |
for x, y in data: | |
padded_x = f'{x: >{x_padding}}:' if len(x) <= x_padding else f'{x}\n{" " * x_padding}:' | |
bar = '|' if y < step else '▇' * int(y / step) | |
print(f'{padded_x} {bar} {y}') | |
# @profile | |
# NOTE: at one point (circa Python 3.3) this function used to be very slow, | |
# as of Python 3.6 it is very fast. Perhaps due to a new native dict implementation? | |
# Regardless, the profiling was a poor man's optimization technique rather than | |
# using more proper data structures. Here be dragons! | |
def messages_stats( | |
messages: typing.List[typing.Dict[str, typing.Any]], | |
plot_message_count: bool = False, | |
plot_cumulative_message_count: bool = False, | |
word_clouds: bool = False, | |
limit_plot_to_streak: bool = False, | |
) -> None: | |
start_time = datetime.datetime.now() | |
oldest, newest = datetime.datetime.max, datetime.datetime.min | |
# TODO: count more message types? reactions? | |
message_counts: typing.DefaultDict[str, typing.Counter[str]] = \ | |
collections.defaultdict(lambda: collections.Counter({'sticker': 0, 'text': 0, 'other': 0})) | |
message_contents: typing.DefaultDict[str, typing.List[str]] = collections.defaultdict(list) | |
days_spoken = set() | |
days_messages: typing.DefaultDict[datetime.date, typing.DefaultDict[str, int]] = collections.defaultdict(lambda: collections.defaultdict(int)) | |
sticker_counts: typing.DefaultDict[str, typing.Counter[str]] = collections.defaultdict(lambda: collections.Counter()) | |
response_times: typing.DefaultDict[str, typing.List[float]] = collections.defaultdict(list) | |
message_streaks: typing.List[typing.Tuple[str, int]] = [] | |
users = set() | |
last_message_user = None | |
current_message_streak = 0 | |
last_timestamp = None | |
for message in messages: | |
date = datetime.datetime.fromtimestamp(message['timestamp'] / 1e3) | |
oldest = min(oldest, date) | |
newest = max(newest, date) | |
user = id_to_user(message['author']) | |
days_messages[date.date()][user] += 1 | |
days_spoken.add(date.date()) | |
users.add(user) | |
if last_message_user == user: | |
current_message_streak += 1 | |
last_timestamp = date | |
else: | |
if last_message_user: | |
message_streaks.append((last_message_user, current_message_streak)) | |
last_message_user = user | |
current_message_streak = 1 | |
if last_timestamp: | |
response_times[user].append((date - last_timestamp).total_seconds()) | |
last_timestamp = date | |
text = message['body'] | |
message_counts[user]['all'] += 1 | |
if text and len(text): | |
message_counts[user]['text'] += 1 | |
message_contents[user].append(text) | |
else: | |
if message['sticker']: | |
message_counts[user]['sticker'] += 1 | |
sticker_counts[user][message['sticker']['url']] += 1 | |
else: | |
message_counts[user]['other'] += 1 | |
print(f"Conversations amongst {' & '.join(users)} between {oldest} and {newest}:\n") | |
message_content = {user: '\n'.join(content) for user, content in message_contents.items()} | |
total_counts = collections.Counter({'sticker': 0, 'text': 0}) | |
for person, counts in message_counts.items(): | |
total_counts['sticker'] += counts['sticker'] | |
total_counts['text'] += counts['text'] | |
total_counts['all'] += counts['all'] | |
print(f"{person} sent {counts['all']:,} total messages, {counts['text']:,} text messages ({counts['text'] / counts['all']:.2%}) and " | |
f"{counts['sticker']:,} stickers ({counts['sticker'] / counts['all']:.2%}). " | |
f"On average, the text messages were {len(message_content[person]) / counts['text']:.2f} characters long which makes " | |
f'for a total of {len(message_content[person]):,} characters.') | |
top_messages = {day: max(messages.items(), key=operator.itemgetter(1))[0] for day, messages in days_messages.items()} | |
top_messages_counts = collections.Counter(top_messages.values()).most_common() | |
if len(top_messages_counts) == 1: | |
print(f'{top_messages_counts[0][0]} talked the most every day...') | |
else: | |
print(f'{top_messages_counts[0][0]} talks the most, with {top_messages_counts[0][1]} day(s) when they sent the most messages, ' | |
f'and {top_messages_counts[-1][0]} is the quiet one with {top_messages_counts[-1][1]} day(s).') | |
print(f"\nSo, a total of {total_counts['all']:,} messages, {total_counts['text']:,} text messages ({total_counts['text'] / total_counts['all']:.2%}) " | |
f"and {total_counts['sticker']} stickers ({total_counts['sticker'] / total_counts['all']:.2%}).") | |
all_days = all_days_span(oldest, newest) | |
print(f"That makes for an average of {total_counts['all'] / len(all_days):.2f} messages per day!") | |
print(f'Over the span of {len(all_days)} day(s), {len(set(all_days) - days_spoken)} day(s) went without conversation ' | |
f'({(len(set(all_days) - days_spoken)) / len(all_days):.2%}).') | |
print(f"So, if we take that into account, it makes for an average of {total_counts['all'] / len(days_spoken):.2f} messages on days with conversation!") | |
profanity_counts: typing.DefaultDict[str, typing.Dict[str, int]] = collections.defaultdict(dict) | |
for user in users: | |
for word in profanity: | |
matches = re.findall(word, message_content[user], flags=re.IGNORECASE) | |
if matches: | |
most_common = collections.Counter(map(str.lower, matches)).most_common(1)[0] | |
profanity_counts[user][most_common[0]] = most_common[1] | |
profanity_total_counts = dict(sorted(((word, sum(counts.values())) for word, counts in profanity_counts.items()), | |
key=operator.itemgetter(1), reverse=True)) | |
profanity_total_counts_items = list(profanity_total_counts.items()) | |
print(f'\n{profanity_total_counts_items[0][0]} has the potty mouth with {profanity_total_counts_items[0][1]} profane word(s) said whereas ' | |
f"{', '.join(map(functools.partial('{0[0]} has said {0[1]} profane word(s)'.format), profanity_total_counts_items[1:]))}.") | |
for user in sorted(users, key=lambda x: profanity_total_counts.get(x, 0)): | |
user_profanity_counts = list(sorted(profanity_counts[user].items(), key=operator.itemgetter(1), reverse=True)) | |
if user_profanity_counts: | |
print(f"{user}'s profanity of choice seems to be {repr(user_profanity_counts[0][0])} ({user_profanity_counts[0][1]} occurences), they're also " | |
f"fans of {(', '.join(f'{repr(word)} ({count})' for word, count in user_profanity_counts[1:])) or 'apparently not much else'}.") | |
else: | |
print(f"{user} hasn't been the slightest bit profane.") | |
most_eventful_day = tuple(max(((day, functools.reduce(lambda s, a: s + a[1], messages.items(), 0)) for day, messages in days_messages.items()), | |
key=operator.itemgetter(1))) | |
print("\nJust in case you're curious, the most eventful day was {}, when {:,} messages were sent :D".format(*most_eventful_day)) | |
longest_seq: typing.List[datetime.date] = [] | |
current_seq: typing.List[datetime.date] = [] | |
for day in sorted(list(days_spoken)): | |
if len(current_seq) > len(longest_seq): | |
longest_seq = copy.copy(current_seq) | |
if current_seq and current_seq[-1] + datetime.timedelta(days=1) == day: | |
current_seq.append(day) | |
else: | |
current_seq = [day] | |
if len(current_seq) > len(longest_seq): | |
longest_seq = copy.copy(current_seq) | |
print(f'The longest streak of days with at least one message lasted {len(longest_seq)} days, from {longest_seq[0]} to {longest_seq[-1]}!') | |
if current_seq and datetime.datetime.now().date() - current_seq[-1] <= datetime.timedelta(days=1): | |
print(f'On the other hand, the current streak is {len(current_seq)} days, from {current_seq[0]} to {current_seq[-1]}.') | |
else: | |
print(f"On the other hand, the current streak is 0 days, you haven't conversed since {current_seq[-1]} :(") | |
print(f"\nNow, on to stickers. There were an average of {total_counts['sticker'] / len(days_spoken):.2f} stickers used on days with conversation!") | |
for user in users: | |
print(f"Out of {user}'s {message_counts[user]['sticker']:,} stickers, the five most used were: " + | |
', '.join(list(map(functools.partial('{0[0]} ({0[1]})'.format), sticker_counts[user].most_common(5))))) | |
message_streaks_per_user = {user: collections.Counter(messages for messages_user, messages in message_streaks if messages_user == user) | |
for user in users} | |
if len(users) == 2 and len(message_streaks) > 1: | |
print('\nSince there are only two people in this conversation, we can do some more calculations!') | |
[[user1, *_], [user2, *_], *_] = message_streaks | |
sum1, num1, sum2, num2 = 0.0, 0, 0.0, 0 | |
last_message_streak: typing.Tuple[typing.Optional[str], int] = (None, 0) | |
for message_streak in message_streaks: | |
if last_message_streak[0] == user1 and message_streak[0] == user2: | |
sum1 += message_streak[1] / last_message_streak[1] | |
num1 += 1 | |
elif last_message_streak[0] == user2 and message_streak[0] == user1: | |
sum2 += message_streak[1] / last_message_streak[1] | |
num2 += 1 | |
last_message_streak = message_streak | |
response_time_averages = [numpy.average(list(message_streaks_per_user[user].keys()), | |
weights=list(message_streaks_per_user[user].values())) for user in users] | |
print(f'{user1} sends {response_time_averages[0]:.2f} consecutive message on average and for each message, ' | |
f'{user2} responds with {sum1 / num1:.2f} messages on average.') | |
print(f'On the other hand, {user2} sends {response_time_averages[1]:.2f} consecutive message on average and ' | |
f'for each message, {user1} responds with {sum2 / num2:.2f} messages on average.') | |
print(f'When {user1} sends a message, {user2} tends to respond in {statistics.median(response_times[user2]):.1f} seconds (median response time).') | |
print(f'On the other hand, when {user2} sends a message, {user1} tends to respond in {statistics.median(response_times[user1]):.1f} ' | |
f'seconds (median response time).') | |
print(f'\nThe data compilation took {datetime.datetime.now() - start_time} seconds.') | |
colors = ['b', 'r', 'g', 'c'] | |
if plot_message_count or plot_cumulative_message_count: | |
days_messages_list = sorted(days_messages.items(), key=operator.itemgetter(0)) | |
fig = plot.figure() | |
subplot_count = len(list(filter(operator.truth, [plot_message_count, plot_cumulative_message_count]))) | |
if plot_message_count: | |
ax1 = fig.add_subplot(subplot_count, 1, 1) | |
plot.xlabel('Date') | |
plot.ylabel('Quantity') | |
plot.title('Number of Messages') | |
plots1 = [] | |
if plot_cumulative_message_count: | |
ax2 = fig.add_subplot(subplot_count, 1, 2 if plot_message_count else 1) | |
plot.xlabel('Date') | |
plot.ylabel('Quantity') | |
plot.title('Number of Messages over Time') | |
plots2 = [] | |
for i, user in enumerate(users): | |
user_messages = [(day, messages[user]) for day, messages in filter(lambda x: user in x[1], days_messages_list)] | |
user_days = list(map(operator.itemgetter(0), user_messages)) | |
for day in filter(lambda x: x not in user_days, all_days): | |
user_messages.append((day, 0)) | |
user_messages = sorted(user_messages, key=operator.itemgetter(0)) | |
if limit_plot_to_streak: | |
user_messages = list(filter(lambda x: x[0] >= longest_seq[0] and x[0] <= longest_seq[-1], user_messages)) | |
if plot_message_count: | |
plt, = ax1.plot(*zip(*user_messages), f'.{colors[i % len(colors)]}-', label=user) | |
ax1.set_ylim(bottom=0) # TODO: the top gets cut off sometimes | |
plots1.append(plt) | |
if plot_cumulative_message_count: | |
cumulative_user_messages = list(itertools.accumulate(user_messages, func=lambda x, y: (y[0], x[1] + y[1]))) | |
plt, = ax2.plot(*zip(*cumulative_user_messages), f'.{colors[i % len(colors)]}-', label=user + ' (cumulative)') | |
ax2.set_ylim(bottom=0) # TODO: the top gets cut off sometimes | |
plots2.append(plt) | |
if plot_message_count: | |
ax1.legend(handles=plots1) | |
if plot_cumulative_message_count: | |
ax2.legend(handles=plots2, loc='lower right') | |
plot.show() | |
if word_clouds: | |
wordcloud.STOPWORDS.update(['T', 't', 'P', ':P', 'im', 'p', 'http', 'https', 'd', 'o']) | |
wordcloud.STOPWORDS.update(['u', 'ur', 'i']) | |
wordcloud.STOPWORDS.update(['T', 't', 'P', ':P', 'lol', 'LOL', 'yeah', 'okay', 'oh', 'im', 'p', 'http', 'https', 'd', 'o', 'want', 'go', 'png', 'skc']) # noqa: E501 | |
wordcloud.STOPWORDS.update(['dont', 'hes', 'whens', 'weve', 'hed', 'theres', 'havent', 'theyll', 'whos', 'theyd', 'youve', 'well', 'theyve', 'wont', 'mustnt', 'isnt', 'ill', 'whys', 'youd', 'wasnt', 'shouldnt', 'youre', 'arent', 'id', 'werent', 'im', 'cant', 'hadnt', 'couldnt', 'doesnt', 'hows', 'its', 'wheres', 'ive', 'didnt', 'whats', 'heres', 'theyre', 'hasnt', 'wouldnt', 'wed', 'shant', 'lets', 'hell', 'shed', 'youll', 'were', 'shes', 'thats']) # noqa: E501 | |
wordcloud.STOPWORDS.update(['think', 'make', 'one', 'wait', 'people', 'yea', 'ok', 'haha', 'guy', 'right', 'good', 'know', 'also', 'guys', 'mean', 'also']) # noqa: E501 | |
fig = plot.figure() | |
if sys.platform.startswith('win') or sys.platform.startswith('cygwin'): | |
font_dir = pathlib.Path(os.environ['WINDIR']) / 'Fonts' | |
elif sys.platform.startswith('darwin'): | |
font_dir = pathlib.Path('/Library/Fonts') | |
else: | |
font_dir = None | |
font_path = None | |
if font_dir: | |
futura_file = next(font_dir.glob('Futura*'), None) # TODO: replace with := | |
if futura_file: | |
font_path = str(font_dir / futura_file) | |
word_cloud_kwargs = {'background_color': 'white', 'max_words': 500, 'stopwords': wordcloud.STOPWORDS, 'font_path': font_path} | |
if len(users) > 10: | |
all_content = '\n'.join(message_content.values()) | |
wc = wordcloud.WordCloud(width=1000, height=1500, **word_cloud_kwargs) | |
wc.generate(all_content) | |
f = fig.add_subplot(1, 2, 1) | |
f.axes.get_xaxis().set_visible(False) | |
f.axes.get_yaxis().set_visible(False) | |
f.set_title('Everyone') | |
plot.imshow(wc) | |
if session.id: | |
wc = wordcloud.WordCloud(width=1000, height=1500, **word_cloud_kwargs) | |
wc.generate(message_content[id_to_user(session.id)]) | |
f = fig.add_subplot(1, 2, 2) | |
f.axes.get_xaxis().set_visible(False) | |
f.axes.get_yaxis().set_visible(False) | |
f.set_title('Me') | |
plot.imshow(wc) | |
else: | |
rows = (len(users) // 5) + 1 | |
for i, user in enumerate(users): | |
wc = wordcloud.WordCloud(width=1000, height=1000, **word_cloud_kwargs) | |
wc.generate(message_content[user]) | |
f = fig.add_subplot(rows, math.ceil(len(users) / rows), i + 1) | |
f.axes.get_xaxis().set_visible(False) | |
f.axes.get_yaxis().set_visible(False) | |
f.set_title(user) | |
plot.imshow(wc) | |
plot.axis('off') | |
plot.show() | |
def all_messages_stats(cursor: psycopg2.extensions.cursor, plot_message_count: bool = False) -> None: | |
cursor.execute("SELECT thread_id, info->>'timestamp_precise', info->'message_sender'->>'id' FROM messages") | |
messages = [{'id': thread_id, 'timestamp': timestamp, 'author': author} for [thread_id, timestamp, author] in cursor.fetchall()] | |
oldest, newest = datetime.datetime.max, datetime.datetime.min | |
day_message_user_counts: typing.DefaultDict[datetime.date, typing.DefaultDict[str, int]] = \ | |
collections.defaultdict(lambda: collections.defaultdict(int)) | |
for message in messages: | |
date = datetime.datetime.fromtimestamp(int(message['timestamp']) / 1e3) | |
oldest = min(oldest, date) | |
newest = max(newest, date) | |
user = id_to_user(message['author']) | |
day_message_user_counts[date.date()][user] += 1 | |
days_message_counts = {day: sum(counts.values()) for day, counts in day_message_user_counts.items()} | |
missed_days = set(all_days_span(oldest, newest)) - set(days_message_counts.keys()) | |
day_span_length = (newest - oldest).days | |
print(f'You have a total of {len(messages)} messages spanning {oldest} through {newest} ({day_span_length} days)!') | |
print(f'That makes an average of {len(messages) / day_span_length:.02f} messages per day unless you account for the {len(missed_days)} ' | |
f'days without conversation, which makes it {len(messages) / (day_span_length - len(missed_days)):.02f} per day.') | |
last_missed_day = sorted(missed_days)[-1] | |
most_eventful_day, most_eventful_day_num_messages = max(days_message_counts.items(), key=operator.itemgetter(1)) | |
print(f'The last day without messages was {last_missed_day}, {(datetime.datetime.now().date() - last_missed_day).days} days ago, and ' | |
f'the most eventful day was {most_eventful_day} with {most_eventful_day_num_messages} messages.') | |
print('\nConversations:\n') | |
cursor.execute("SELECT DISTINCT thread_id, info->'message_sender'->>'id' FROM messages") | |
conversation_authors: typing.DefaultDict[str, typing.Set[str]] = collections.defaultdict(set) | |
for [thread_id, author] in cursor.fetchall(): | |
conversation_authors[thread_id].add(author) | |
user = id_to_user(session.id if session.id else collections.Counter(itertools.chain.from_iterable(conversation_authors.values())).most_common(1)[0][0]) | |
cursor.execute('SELECT thread_id, COUNT(*) FROM messages GROUP BY thread_id') | |
message_nums = list(sorted(((', '.join(sorted(map(id_to_user, conversation_authors[thread_id]), key=lambda x: (thread_id != user, thread_id))), count) | |
for thread_id, count in cursor.fetchall()), key=operator.itemgetter(1))) | |
termgraph(message_nums) | |
print('\nMessage authors:\n') | |
cursor.execute("SELECT info->'message_sender'->>'id', COUNT(*) FROM messages GROUP BY info->'message_sender'->>'id'") | |
message_nums = list(sorted(filter(lambda x: not x[0].startswith('Unknown_'), ((id_to_user(user), message) for user, message in cursor.fetchall())), | |
key=operator.itemgetter(1))) | |
termgraph(message_nums) | |
if plot_message_count: | |
days_message_counts.update({day: 0 for day in missed_days}) | |
days_message_counts_list = sorted(days_message_counts.items(), key=operator.itemgetter(0)) | |
plt, = plot.plot(list(map(operator.itemgetter(0), days_message_counts_list)), | |
list(map(operator.itemgetter(1), days_message_counts_list)), '.b-', label='All') | |
plot.xlabel('Date') | |
plot.ylabel('Number of Messages') | |
plot.title('Number of Messages over time') | |
plot.legend(handles=[plt]) | |
plot.show() | |
def update_conversation( | |
cursor: psycopg2.extensions.cursor, | |
conversation_id: typing.Union[str, int], | |
group: bool = False, | |
limit: int = 500, | |
save: bool = True, | |
) -> typing.Tuple[int, int]: | |
logging.info(f'Updating messages in conversation {conversation_id}' + (f' ({id_to_user(conversation_id)}).' if not group else '.')) | |
if cursor: | |
logging.debug('Fetching existing messages in database.') | |
cursor.execute("SELECT info->>'timestamp_precise' FROM messages " | |
"WHERE thread_id = %s ORDER BY info->>'timestamp_precise'", (str(conversation_id), )) | |
old_message_timestamps = [{'timestamp_precise': int(timestamp)} for [timestamp] in cursor.fetchall()] | |
logging.info(f'{len(old_message_timestamps)} messages currently saved in database.') | |
else: | |
old_message_timestamps = [] | |
if len(old_message_timestamps): | |
thread_info, new_messages = get_new_messages(conversation_id, old_messages=old_message_timestamps, limit=limit, group=group) | |
else: | |
thread_info, new_messages = get_new_messages(conversation_id, group=group, limit=limit * 10) | |
if save: | |
if len(new_messages) != 0: | |
insert_messages(cursor, new_messages, thread_info) | |
logging.info(f'Inserted {len(new_messages)} messages in database.') | |
else: | |
logging.info('No new messages to insert in database.') | |
if cursor is None: | |
pprint.pprint( | |
{ | |
'thread_info': thread_info, | |
'messages': new_messages, | |
}, | |
width=shutil.get_terminal_size((80, 20)).columns, | |
compact=True, | |
) | |
return len(new_messages) + len(old_message_timestamps), len(new_messages) | |
def update_conversations( | |
cursor: psycopg2.extensions.cursor, | |
conversation_ids: typing.List[str], | |
limit: int = 500, | |
save: bool = True, | |
) -> typing.Tuple[int, int]: | |
num_conversation_messages = {} | |
num_added_messages = 0 | |
for conversation_id in conversation_ids: | |
num_messages, num_new_messages = update_conversation(cursor, conversation_id, limit=limit, save=save) | |
num_conversation_messages[conversation_id] = num_messages | |
num_added_messages += num_new_messages | |
message_nums = [(id_to_user(conversation_id), messages) for conversation_id, messages in | |
sorted(num_conversation_messages.items(), key=operator.itemgetter(1))] | |
termgraph(message_nums) | |
return num_added_messages, sum(map(operator.itemgetter(1), message_nums)) | |
def update_all_individual_conversations(cursor: psycopg2.extensions.cursor, limit: int = 500, save: bool = True) -> None: | |
conversation_ids = list(id_to_user_map.keys()) | |
total_num_messages_inserted, total_num_messages = update_conversations(cursor, conversation_ids, limit=limit, save=save) | |
logging.info(f'Inserted {total_num_messages_inserted} new messages in all individual conversations for a total of {total_num_messages} messages.') | |
def print_messages(messages: typing.Sequence[typing.Union[str, typing.Dict[str, typing.Any]]], interactive: bool = False) -> None: | |
current_line = 0 | |
max_author_length = max(map(len, map(id_to_user, map(operator.itemgetter('author'), filter(lambda x: not isinstance(x, str), messages)))), default=0) | |
page_size = shutil.get_terminal_size((80, 20)) | |
def format_lines(body: str, header: str) -> typing.List[str]: | |
lines = body.splitlines() | |
indent = ' ' * len(header) | |
first_line_wrapper = textwrap.TextWrapper(initial_indent=header, subsequent_indent=indent, width=page_size.columns) | |
rest_wrapper = textwrap.TextWrapper(initial_indent=indent, subsequent_indent=indent, width=page_size.columns) | |
return first_line_wrapper.wrap(lines[0]) + list(itertools.chain.from_iterable(map(rest_wrapper.wrap, lines[1:]))) | |
for message in messages: | |
if isinstance(message, str): | |
output = format_lines(message, '') | |
else: | |
author = id_to_user(message['author']) | |
if message['body']: | |
body = message['body'] | |
elif message['sticker']: | |
body = f"[STICKER: {message['sticker']['url']}]" | |
else: | |
body = str(message) | |
timestamp = datetime.datetime.fromtimestamp(message['timestamp'] / 1e3).strftime('%m-%d-%Y %H:%M:%S') | |
header = f'{timestamp} {author.rjust(max_author_length)}: ' | |
output = format_lines(body, header) | |
output_lines = len(output) | |
if current_line + len(output) > (page_size.lines - 2): | |
if input('\nPress enter to continue (q to quit)... ').lower() in {'q', 'quit', 'exit', 'no', 'n'}: | |
return | |
if not interactive: | |
os.system('cls' if os.name == 'nt' else 'clear') | |
current_line = 0 | |
print('\n'.join(output)) | |
current_line += output_lines | |
def search_conversation( | |
cursor: psycopg2.extensions.cursor, | |
conversation_id: str, | |
query: str, | |
latest_first: bool = False, | |
author_filter: typing.Optional[typing.Union[str, int]] = None, | |
regular_expression: bool = False, | |
case_sensitive: bool = False, | |
interactive: bool = False, | |
) -> None: | |
messages = get_messages(cursor, conversation_id, query=query, regular_expression=regular_expression, case_sensitive=case_sensitive) | |
print(f'{len(messages)} message results found for search query {repr(query)}.\n') | |
if messages: | |
regex = re.compile(query, flags=(0 if case_sensitive else re.IGNORECASE)) | |
instance_counts = collections.Counter(itertools.chain.from_iterable(map(repr, regex.findall(msg['body'])) for msg in messages)).most_common() | |
termgraph(list(instance_counts)) | |
print('\n') | |
author_counts = collections.Counter(id_to_user(msg['author']) for msg in messages).items() | |
termgraph(list(author_counts)) | |
if input('\nPress enter to continue (q to quit)... ').lower() in {'q', 'quit', 'exit', 'no', 'n'}: | |
return | |
if not interactive: | |
os.system('cls' if os.name == 'nt' else 'clear') | |
if latest_first: | |
messages = list(reversed(messages)) | |
if author_filter: | |
author_user = id_to_user(author_filter) | |
messages = list(filter(lambda x: id_to_user(x['author']) == author_user, messages)) | |
print_messages(messages, interactive=interactive) | |
def grep_conversation( | |
cursor: psycopg2.extensions.cursor, | |
conversation_id: str, | |
args: typing.List[str], | |
latest_first: bool = False, | |
interactive: bool = False, | |
) -> None: | |
grep_output_re = re.compile(r'(?P<line_num>\d+)[:-](?P<message>.*)') | |
messages = list(filter(operator.itemgetter('body'), get_messages(cursor, conversation_id))) | |
newline_sentinel = secrets.token_hex() | |
def process_grep_line(grep_output: str) -> typing.Union[str, typing.Dict[str, typing.Any]]: | |
message_line_matches = grep_output_re.match(grep_output) | |
if message_line_matches: | |
line_num_string, message_body = message_line_matches.groups() | |
message = messages[int(line_num_string) - 1] | |
return dict(message, **{'body': message_body.replace(newline_sentinel, '\n')}) | |
return grep_output | |
grep_input = '\n'.join(str(message['body']).replace('\n', newline_sentinel) for message in messages) | |
proc = subprocess.run(['grep', '-n', '--color=always'] + args, universal_newlines=True, stdout=subprocess.PIPE, check=True, input=grep_input) | |
filtered_messages = list(map(process_grep_line, proc.stdout.splitlines())) | |
print_messages(filtered_messages, interactive=interactive) | |
def login(password: typing.Optional[str] = None) -> Session: | |
# TODO: determine whether this still works (and/or support 2FA) | |
if not password: | |
password = getpass.getpass(f'Password ({session.username}): ') | |
cookie_jar = http.cookiejar.CookieJar() | |
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie_jar)) | |
opener.addheaders = [('User-agent', user_agent)] | |
opener.open('https://m.facebook.com/login.php') | |
login_page = lxml.html.parse(opener.open('https://m.facebook.com/login.php')) | |
login_form = dict(login_page.forms[0].fields) | |
login_form.update({ | |
'email': session.username, | |
'pass': password, | |
}) | |
opener.open('https://m.facebook.com/login.php?refsrc=https%3A%2F%2Fm.facebook.com%2Flogin.php&lwv=100&refid=9', data=json.dumps(login_form).encode()) | |
dtsg = re.findall(r'\["DTSGInitialData",\[\],\{"token":"(.*?)"}', opener.open('https://www.facebook.com/').read().decode()) | |
if not dtsg: | |
logging.critical('Login failed.') | |
sys.exit(1) | |
else: | |
[dtsg] = dtsg | |
logging.info(f'Login successful (dtsg: {dtsg}).\n') | |
cookies = {cookie.name: cookie.value for cookie in cookie_jar} | |
return Session( | |
username=session.username, | |
id=cookies['c_user'], | |
opener=opener, | |
dtsg=dtsg, | |
) | |
def browser_login(username: str) -> typing.Tuple[str, typing.Dict[str, str]]: | |
with contextlib.closing(webdriver.Chrome()) as driver: | |
driver.get('https://www.messenger.com/') | |
action_chain = ActionChains(driver) | |
action_chain.send_keys_to_element(driver.find_element_by_name('email'), username) | |
action_chain.move_to_element(driver.find_element_by_name('pass')).click() | |
action_chain.perform() | |
dtsg = WebDriverWait(driver, 2 * 60).until( | |
lambda driver: | |
driver and 'checkpoint' not in driver.current_url and | |
re.findall(r'\["DTSGInitialData",\[\],\{"token":"(.*?)"}', driver.page_source), | |
)[0] | |
cookies = {cookie['name']: cookie['value'] for cookie in driver.get_cookies()} | |
return dtsg, cookies | |
def valid_conversation(id_or_name: typing.Union[str, int]) -> str: | |
if not id_to_user(id_or_name).startswith('Unknown') or str(id_or_name) in map(str, groups): | |
return str(id_or_name).replace('fbid:', '') | |
user_as_id = user_to_id(str(id_or_name)) | |
if user_as_id is not None: | |
return user_as_id | |
else: | |
raise argparse.ArgumentTypeError(f'{id_or_name} is not a valid Facebook ID or recognized name') | |
def valid_path(path: str) -> str: | |
if os.access(os.path.dirname(path) or '.', os.W_OK): | |
return path | |
else: | |
raise argparse.ArgumentTypeError(f'{path} is not a valid path for dumping the message database') | |
def valid_cookies(cookie_string: str) -> typing.Dict[str, str]: | |
try: | |
cookie_string = cookie_string.strip(';') | |
return dict(map(lambda x: typing.cast(typing.Tuple[str, str], tuple(x.strip().split('=', 2))), cookie_string.split(';'))) | |
except Exception: | |
raise argparse.ArgumentTypeError(f'{cookie_string} is an invalid cookie string') | |
class Shell(cmd.Cmd): | |
intro = 'Welcome to the shell. Type help or ? to list commands.\n' | |
prompt = '\033[92m(shell)\033[0m ' | |
# TODO: swallow errors and don't die (functools.wrap?) | |
def __init__( | |
self, | |
cursor: psycopg2.extensions.cursor, | |
conn: psycopg2.extensions.connection, | |
args: argparse.Namespace, | |
save: bool, | |
) -> None: | |
super().__init__() | |
self.cursor = cursor | |
self.conn = conn | |
self.args = args | |
self.save = save | |
def do_update(self, arg: str) -> None: | |
"""Update conversation(s) (all without argument): update Joe""" | |
group_limit = self.args.limit * self.args.group_limit_multiplier | |
if arg: | |
for conversation in map(valid_conversation, shlex.split(arg)): | |
if conversation in groups: | |
update_conversation(self.cursor, conversation, group=True, limit=group_limit, save=self.save) | |
else: | |
update_conversation(self.cursor, conversation, limit=args.limit, save=self.save) | |
else: | |
update_all_individual_conversations(self.cursor, limit=args.limit, save=self.save) | |
for group in groups: | |
update_conversation(self.cursor, group, group=True, limit=group_limit, save=self.save) | |
def complete_update(self, text: str, line: str, begidx: int, endidx: int) -> typing.List[str]: | |
return list(filter(lambda x: x.startswith(text), list(id_to_user_map.values()) + list(map(str, groups)))) | |
def do_search(self, arg: str) -> None: | |
"""Search a conversation: search Joe term""" | |
user, query = shlex.split(arg) | |
search_conversation( | |
self.cursor, | |
valid_conversation(user), | |
query, | |
latest_first=self.args.latest_first, | |
author_filter=self.args.author, | |
regular_expression=self.args.regular_expression, | |
case_sensitive=self.args.case_sensitive, | |
interactive=True, | |
) | |
def complete_search(self, text: str, line: str, begidx: int, endidx: int) -> typing.List[str]: | |
return self.complete_update(text, line, begidx, endidx) if line.count(' ') == 1 else [] | |
def do_grep(self, arg: str) -> None: | |
"""Grep a conversation: grep Joe -iE hel+o""" | |
user, *grep_args = shlex.split(arg) | |
grep_conversation( | |
self.cursor, | |
valid_conversation(user), | |
grep_args, | |
latest_first=self.args.latest_first, | |
interactive=True, | |
) | |
def do_dump(self, arg: str) -> None: | |
"""Dump the database: dump""" | |
dump_db(self.args.dump_db or arg, self.args.db_name) | |
def do_load(self, arg: str) -> None: | |
"""Load compressed database: load filename""" | |
load_db(self.args.load_db or arg, self.args.db_name) | |
def complete_load(self, text: str, line: str, begidx: int, endidx: int) -> typing.List[str]: | |
return glob.glob(text + '*') | |
def do_statistics(self, arg: str) -> None: | |
"""Show statistics for conversations (all without an argument): statistics Joe Jane""" | |
conversations = shlex.split(arg) | |
if len(conversations): | |
for conversation in conversations: | |
messages_stats( | |
get_messages(self.cursor, valid_conversation(conversation)), | |
plot_message_count=getattr(self.args, 'plot_message_count', None), | |
plot_cumulative_message_count=getattr(self.args, 'plot_cumulative_message_count', None), | |
word_clouds=getattr(self.args, 'word_clouds', None), | |
limit_plot_to_streak=getattr(self.args, 'limit_plot_to_streak', None), | |
) | |
else: | |
all_messages_stats(self.cursor, plot_message_count=getattr(self.args, 'plot_message_count', None)) | |
def complete_statistics(self, text: str, line: str, begidx: int, endidx: int) -> typing.List[str]: | |
return self.complete_update(text, line, begidx, endidx) | |
def do_exit(self, arg: str) -> None: | |
"""Exits the shell: exit""" | |
def postcmd(self, stop: bool, line: str) -> bool: | |
if self.save: | |
self.conn.commit() | |
return line == 'exit' | |
def main(args: argparse.Namespace) -> None: | |
levels = [logging.WARNING, logging.INFO, logging.DEBUG] | |
logging.basicConfig( | |
format='[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', | |
level=levels[min(len(levels) - 1, args.verbose)], | |
) | |
if args.init_db: | |
init_db() | |
return logging.info('Database initialization complete.') | |
global session | |
session = Session(username=args.username, opener=None, id=None, dtsg=None) | |
if getattr(args, 'browser', None): | |
try: | |
args.dtsg, args.cookies = browser_login(args.username) | |
logging.info(f'Login successful (dtsg: {args.dtsg}, ' | |
f"cookies: {';'.join(map(functools.partial('{0[0]}={0[1]}'.format), args.cookies.items()))}).\n") | |
except Exception as e: | |
return logging.error(f'Login failed: {repr(e)}.') | |
if args.dtsg and args.cookies: | |
cookie_jar = http.cookiejar.CookieJar() | |
cookies = [ | |
http.cookiejar.Cookie( # type: ignore # since Cookie types are broken | |
0, name, value, None, False, '', False, False, '', | |
False, False, None, True, None, None, {'HttpOnly': None}, | |
) for (name, value) in args.cookies.items() | |
] | |
for cookie in cookies: | |
cookie_jar.set_cookie(cookie) | |
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie_jar)) | |
opener.addheaders = [('User-agent', user_agent)] | |
session = Session( | |
username=args.username, | |
opener=opener, | |
id=args.cookies['c_user'], | |
dtsg=args.dtsg, | |
) | |
elif args.password: | |
session = login(password=args.password) | |
if not args.bare: | |
global postgres_connection_string | |
db_password = getpass.getpass(f'Postgres password ({args.db_username}): ') if args.db_password is None else args.db_password # can be empty | |
postgres_connection_string = "dbname='%s' user='%s' host='%s' password='%s'" % (args.db_name, args.db_username, args.db_host, db_password) | |
if args.bare: | |
conn, cursor = None, None | |
else: | |
conn = psycopg2.connect(postgres_connection_string) | |
cursor = conn.cursor() | |
save = not args.dry_run | |
group_limit = args.limit * args.group_limit_multiplier | |
if args.interactive: | |
Shell(cursor, conn, args, save).cmdloop() | |
elif args.update: | |
for conversation in args.update: | |
if conversation in groups: | |
update_conversation(cursor, conversation, group=True, limit=group_limit, save=save) | |
else: | |
update_conversation(cursor, conversation, limit=args.limit, save=save) | |
elif args.update_all or args.update_individuals or args.update_groups: | |
if args.update_all or args.update_individuals: | |
update_all_individual_conversations(cursor, limit=args.limit, save=save) | |
if args.update_all or args.update_groups: | |
for group in groups: | |
update_conversation(cursor, group, group=True, limit=group_limit, save=save) | |
elif args.search: | |
search_conversation( | |
cursor, | |
args.search[0], | |
args.search[1], | |
latest_first=args.latest_first, | |
author_filter=args.author, | |
regular_expression=args.regular_expression, | |
case_sensitive=args.case_sensitive, | |
) | |
elif args.grep: | |
grep_conversation( | |
cursor, | |
args.grep[0], | |
shlex.split(args.grep[1]), | |
latest_first=args.latest_first, | |
) | |
elif args.dump_db: | |
dump_db(args.dump_db, args.db_name) | |
elif args.load_db: | |
load_db(args.load_db, args.db_name) | |
elif args.statistics is not None: | |
if len(args.statistics): | |
for conversation in args.statistics: | |
messages_stats( | |
get_messages(cursor, conversation), | |
plot_message_count=getattr(args, 'plot_message_count', None), | |
plot_cumulative_message_count=getattr(args, 'plot_cumulative_message_count', None), | |
word_clouds=getattr(args, 'word_clouds', None), | |
limit_plot_to_streak=getattr(args, 'limit_plot_to_streak', None), | |
) | |
else: | |
all_messages_stats(cursor, plot_message_count=getattr(args, 'plot_message_count', None)) | |
if save and conn: | |
conn.commit() | |
if conn: | |
conn.close() | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser( | |
description='Download, archive, analyze and plot Facebook Messenger conversations (individual and group)', | |
epilog=', '.join(typing.cast(typing.List[str], filter(None, [ | |
'selenium not installed, browser authentication disabled' if not webdriver else None, | |
'matplotlib not installed, message plotting disabled' if not matplotlib else None, | |
'wordcloud not installed, word clouds disabled' if not wordcloud else None, | |
]))), | |
) | |
parser.add_argument('--verbose', '-v', action='count', help='Add verbosity (maximum -vv, default -v)', default=1) | |
mode_named_group = parser.add_argument_group('mode') | |
mode_group = mode_named_group.add_mutually_exclusive_group(required=True) | |
mode_group.add_argument('-i', '--interactive', action='store_true', help='Interactive mode') | |
mode_group.add_argument('-k', '--update', type=valid_conversation, nargs='+', metavar='ID/NAME', help='Update a conversation') | |
mode_group.add_argument('-a', '--update-all', action='store_true', help='Update all conversations') | |
mode_group.add_argument('-n', '--update-individuals', action='store_true', help='Update all individual conversations') | |
mode_group.add_argument('-g', '--update-groups', action='store_true', help='Update all group conversations') | |
if psycopg2_installed: | |
mode_group.add_argument('-s', '--search', metavar=('ID/NAME', 'QUERY'), nargs=2, help='Search a conversation') | |
mode_group.add_argument('-gr', '--grep', metavar=('ID/NAME', 'ARGS'), nargs=2, help='Grep a conversation') | |
mode_group.add_argument('-t', '--statistics', type=valid_conversation, nargs='*', metavar='ID/NAME', | |
help='Display conversation(s) statistics (all conversations void of an argument)') | |
mode_group.add_argument('--init-db', action='store_true', help='Initialize Postgres database') | |
mode_group.add_argument('--dump-db', metavar='PATH', type=valid_path, help='Dump compressed Postgres database here') | |
mode_group.add_argument('--load-db', metavar='PATH', type=open, help='Load compressed Postgres database from here') | |
datebase_group = parser.add_argument_group('database', 'Postgres connection options') | |
datebase_group.add_argument('-du', '--db-username', metavar='USERNAME', default=getpass.getuser(), help='Postgres username') | |
datebase_group.add_argument('-dn', '--db-name', metavar='DATABASE', default='fb_messages', help='Postgres database name') | |
datebase_group.add_argument('-dh', '--db-host', metavar='HOST', default='localhost', help='Postgres host URL') | |
datebase_group.add_argument('-dp', '--db-password', metavar='PASSWORD', default=os.environ.get('POSTGRES_PASSWORD'), | |
help='Postgres password (defaults to environment variable POSTGRES_PASSWORD; interactive otherwise)') | |
auth_group = parser.add_argument_group('authentication', 'Conversation authentication options') | |
auth_group.add_argument('-u', '--username', default='sushain97', help='Facebook account username') | |
auth_group.add_argument('-p', '--password', help='Facebook account password') | |
if webdriver: | |
auth_group.add_argument('--browser', action='store_true', default=False, help='Facebook browser authentication') | |
auth_group.add_argument('--browser-timeout', type=int, default=2, help='Facebook browser authentication timeout in minutes') | |
auth_group.add_argument('--dtsg', help='Facebook dtsg value (must use --cookies as well)') | |
auth_group.add_argument('--cookies', type=valid_cookies, help='Facebook cookies value (must use --dtsg as well)') | |
stats_group = parser.add_argument_group('statistics', 'Conversation statistics options') | |
if matplotlib: | |
stats_group.add_argument('-P', '--plot-message-count', action='store_true', default=False, help='Plot individual message count over time') | |
stats_group.add_argument('-Q', '--plot-cumulative-message-count', action='store_true', default=False, | |
help='Plot individual cumulative message count over time') | |
stats_group.add_argument('-S', '--limit-plot-to-streak', action='store_true', default=False, | |
help='Limit message plot to time since streak started') | |
if wordcloud: | |
stats_group.add_argument('-W', '--word-clouds', action='store_true', default=False, help='Display individual message word clouds') | |
search_group = parser.add_argument_group('search', 'Conversation search options') | |
search_group.add_argument('-F', '--latest-first', action='store_true', default=False, help='Show latest messages first') | |
search_group.add_argument('-I', '--regular-expression', action='store_true', default=False, help='Treat search query as regular expression') | |
search_group.add_argument('-A', '--author', type=valid_conversation, metavar='ID/NAME', help='Show only messages from this author') | |
search_group.add_argument('-C', '--case-sensitive', action='store_true', default=False, help='Case sensitive search') | |
download_group = parser.add_argument_group('download', 'Conversation download options') | |
download_group.add_argument('-L', '--limit', type=int, default=500, help='Message download limit') | |
download_group.add_argument('-M', '--group-limit-multiplier', action='count', default=2, help='Multiply message download limit for groups') | |
download_group.add_argument('-D', '--dry-run', action='store_true', default=False, help="Don't save to database") | |
download_group.add_argument('-B', '--bare', action='store_true', default=not psycopg2_installed, | |
help='Run without database (update results redirected to STDOUT)') | |
args = parser.parse_args() | |
if not psycopg2_installed: | |
args.search = args.statistics = None | |
args.init_db = args.dump_db = args.load_db = False | |
if bool(args.dtsg) ^ bool(args.cookies): | |
parser.error('--dtsg and --cookies must both be set for manual authentication.') | |
args.dry_run = args.dry_run or args.bare | |
if args.bare and (args.search or args.statistics or args.init_db or args.dump_db or args.load_db): | |
parser.error('--bare is incompatible with any operations that require a database (search, statistics, init_db, dump_db, load_db)') | |
if args.search: | |
args.search[0] = valid_conversation(args.search[0]) | |
plot_messages_arg_required = getattr(args, 'limit_plot_to_streak', False) | |
plotting_messages = getattr(args, 'plot_cumulative_message_count', False) or getattr(args, 'plot_message_count', False) | |
if plot_messages_arg_required and not plotting_messages: | |
parser.error('--plot_message-count or --plot-cumulative-message-count must be set when --limit-plot-to-streak is.') | |
main(args) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[[source]] | |
name = "pypi" | |
url = "https://pypi.org/simple" | |
verify_ssl = true | |
[dev-packages] | |
flake8 = "*" | |
flake8-bugbear = "*" | |
flake8-builtins = "*" | |
flake8-commas = "*" | |
flake8-comprehensions = "*" | |
flake8-eradicate = "*" | |
flake8-import-order = "*" | |
flake8-quotes = "*" | |
mypy = "*" | |
pep8-naming = "*" | |
[packages] | |
lxml = "*" | |
matplotlib = "*" | |
numpy = "*" | |
psycopg2 = "==2.7.3.2" | |
selenium = "*" | |
wordcloud = "*" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"_meta": { | |
"hash": { | |
"sha256": "b840d575cc24b6f76c31cfe65ef387ff76c5dabf86cfb7a3998451ec2488eec4" | |
}, | |
"pipfile-spec": 6, | |
"requires": {}, | |
"sources": [ | |
{ | |
"name": "pypi", | |
"url": "https://pypi.org/simple", | |
"verify_ssl": true | |
} | |
] | |
}, | |
"default": { | |
"cycler": { | |
"hashes": [ | |
"sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d", | |
"sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8" | |
], | |
"version": "==0.10.0" | |
}, | |
"kiwisolver": { | |
"hashes": [ | |
"sha256:05b5b061e09f60f56244adc885c4a7867da25ca387376b02c1efc29cc16bcd0f", | |
"sha256:26f4fbd6f5e1dabff70a9ba0d2c4bd30761086454aa30dddc5b52764ee4852b7", | |
"sha256:3b2378ad387f49cbb328205bda569b9f87288d6bc1bf4cd683c34523a2341efe", | |
"sha256:400599c0fe58d21522cae0e8b22318e09d9729451b17ee61ba8e1e7c0346565c", | |
"sha256:47b8cb81a7d18dbaf4fed6a61c3cecdb5adec7b4ac292bddb0d016d57e8507d5", | |
"sha256:53eaed412477c836e1b9522c19858a8557d6e595077830146182225613b11a75", | |
"sha256:58e626e1f7dfbb620d08d457325a4cdac65d1809680009f46bf41eaf74ad0187", | |
"sha256:5a52e1b006bfa5be04fe4debbcdd2688432a9af4b207a3f429c74ad625022641", | |
"sha256:5c7ca4e449ac9f99b3b9d4693debb1d6d237d1542dd6a56b3305fe8a9620f883", | |
"sha256:682e54f0ce8f45981878756d7203fd01e188cc6c8b2c5e2cf03675390b4534d5", | |
"sha256:79bfb2f0bd7cbf9ea256612c9523367e5ec51d7cd616ae20ca2c90f575d839a2", | |
"sha256:7f4dd50874177d2bb060d74769210f3bce1af87a8c7cf5b37d032ebf94f0aca3", | |
"sha256:8944a16020c07b682df861207b7e0efcd2f46c7488619cb55f65882279119389", | |
"sha256:8aa7009437640beb2768bfd06da049bad0df85f47ff18426261acecd1cf00897", | |
"sha256:939f36f21a8c571686eb491acfffa9c7f1ac345087281b412d63ea39ca14ec4a", | |
"sha256:9733b7f64bd9f807832d673355f79703f81f0b3e52bfce420fc00d8cb28c6a6c", | |
"sha256:a02f6c3e229d0b7220bd74600e9351e18bc0c361b05f29adae0d10599ae0e326", | |
"sha256:a0c0a9f06872330d0dd31b45607197caab3c22777600e88031bfe66799e70bb0", | |
"sha256:acc4df99308111585121db217681f1ce0eecb48d3a828a2f9bbf9773f4937e9e", | |
"sha256:b64916959e4ae0ac78af7c3e8cef4becee0c0e9694ad477b4c6b3a536de6a544", | |
"sha256:d3fcf0819dc3fea58be1fd1ca390851bdb719a549850e708ed858503ff25d995", | |
"sha256:d52e3b1868a4e8fd18b5cb15055c76820df514e26aa84cc02f593d99fef6707f", | |
"sha256:db1a5d3cc4ae943d674718d6c47d2d82488ddd94b93b9e12d24aabdbfe48caee", | |
"sha256:e3a21a720791712ed721c7b95d433e036134de6f18c77dbe96119eaf7aa08004", | |
"sha256:e8bf074363ce2babeb4764d94f8e65efd22e6a7c74860a4f05a6947afc020ff2", | |
"sha256:f16814a4a96dc04bf1da7d53ee8d5b1d6decfc1a92a63349bb15d37b6a263dd9", | |
"sha256:f2b22153870ca5cf2ab9c940d7bc38e8e9089fa0f7e5856ea195e1cf4ff43d5a", | |
"sha256:f790f8b3dff3d53453de6a7b7ddd173d2e020fb160baff578d578065b108a05f" | |
], | |
"version": "==1.1.0" | |
}, | |
"lxml": { | |
"hashes": [ | |
"sha256:03984196d00670b2ab14ae0ea83d5cc0cfa4f5a42558afa9ab5fa745995328f5", | |
"sha256:0815b0c9f897468de6a386dc15917a0becf48cc92425613aa8bbfc7f0f82951f", | |
"sha256:175f3825f075cf02d15099eb52658457cf0ff103dcf11512b5d2583e1d40f58b", | |
"sha256:30e14c62d88d1e01a26936ecd1c6e784d4afc9aa002bba4321c5897937112616", | |
"sha256:3210da6f36cf4b835ff1be853962b22cc354d506f493b67a4303c88bbb40d57b", | |
"sha256:40f60819fbd5bad6e191ba1329bfafa09ab7f3f174b3d034d413ef5266963294", | |
"sha256:43b26a865a61549919f8a42e094dfdb62847113cf776d84bd6b60e4e3fc20ea3", | |
"sha256:4a03dd682f8e35a10234904e0b9508d705ff98cf962c5851ed052e9340df3d90", | |
"sha256:62f382cddf3d2e52cf266e161aa522d54fd624b8cc567bc18f573d9d50d40e8e", | |
"sha256:7b98f0325be8450da70aa4a796c4f06852949fe031878b4aa1d6c417a412f314", | |
"sha256:846a0739e595871041385d86d12af4b6999f921359b38affb99cdd6b54219a8f", | |
"sha256:a3080470559938a09a5d0ec558c005282e99ac77bf8211fb7b9a5c66390acd8d", | |
"sha256:ad841b78a476623955da270ab8d207c3c694aa5eba71f4792f65926dc46c6ee8", | |
"sha256:afdd75d9735e44c639ffd6258ce04a2de3b208f148072c02478162d0944d9da3", | |
"sha256:b4fbf9b552faff54742bcd0791ab1da5863363fb19047e68f6592be1ac2dab33", | |
"sha256:b90c4e32d6ec089d3fa3518436bdf5ce4d902a0787dbd9bb09f37afe8b994317", | |
"sha256:b91cfe4438c741aeff662d413fd2808ac901cc6229c838236840d11de4586d63", | |
"sha256:bdb0593a42070b0a5f138b79b872289ee73c8e25b3f0bea6564e795b55b6bcdd", | |
"sha256:c4e4bca2bb68ce22320297dfa1a7bf070a5b20bcbaec4ee023f83d2f6e76496f", | |
"sha256:cec4ab14af9eae8501be3266ff50c3c2aecc017ba1e86c160209bb4f0423df6a", | |
"sha256:e83b4b2bf029f5104bc1227dbb7bf5ace6fd8fabaebffcd4f8106fafc69fc45f", | |
"sha256:e995b3734a46d41ae60b6097f7c51ba9958648c6d1e0935b7e0ee446ee4abe22", | |
"sha256:f679d93dec7f7210575c85379a31322df4c46496f184ef650d3aba1484b38a2d", | |
"sha256:fd213bb5166e46974f113c8228daaef1732abc47cb561ce9c4c8eaed4bd3b09b", | |
"sha256:fdcb57b906dbc1f80666e6290e794ab8fb959a2e17aa5aee1758a85d1da4533f", | |
"sha256:ff424b01d090ffe1947ec7432b07f536912e0300458f9a7f48ea217dd8362b86" | |
], | |
"index": "pypi", | |
"version": "==4.3.3" | |
}, | |
"matplotlib": { | |
"hashes": [ | |
"sha256:08d9bc2e2acef42965256acd5015dc2c899cbd53e01bf4214c5510c7ea0efd2d", | |
"sha256:1e0213f87cc0076f7b0c4c251d7e23601e2419cd98691df79edb95517ba06f0c", | |
"sha256:1f31053f660df5f0310118d7f5bd1e8025170e9773f0bebe8fec486d0926adf6", | |
"sha256:399bf6352633aeeb45ca55c6c943fa2738022fb17ae498c32a142ced0b41528d", | |
"sha256:409a5894efb810d630d2512449c7a4394de9a4d15fc6394e26a409b17d9cc18c", | |
"sha256:5c5ef5cf1bc8f483123102e2615644937af7d4c01d100acc72bf74a044a78717", | |
"sha256:d0052be5cdfa27018bb08194b8812c47cb985d60eb682e1809c76e9600839516", | |
"sha256:e7d6620d145ca9f6c3e88248e5734b6fda430e75e70755b887e48f8e9bc1de2a", | |
"sha256:f3d8b6bccc577e4e5ecbd58fdd63cacb8e58f0ed1e97616a7f7a7baaf4b8d036" | |
], | |
"index": "pypi", | |
"version": "==3.1.0" | |
}, | |
"numpy": { | |
"hashes": [ | |
"sha256:0778076e764e146d3078b17c24c4d89e0ecd4ac5401beff8e1c87879043a0633", | |
"sha256:141c7102f20abe6cf0d54c4ced8d565b86df4d3077ba2343b61a6db996cefec7", | |
"sha256:14270a1ee8917d11e7753fb54fc7ffd1934f4d529235beec0b275e2ccf00333b", | |
"sha256:27e11c7a8ec9d5838bc59f809bfa86efc8a4fd02e58960fa9c49d998e14332d5", | |
"sha256:2a04dda79606f3d2f760384c38ccd3d5b9bb79d4c8126b67aff5eb09a253763e", | |
"sha256:3c26010c1b51e1224a3ca6b8df807de6e95128b0908c7e34f190e7775455b0ca", | |
"sha256:52c40f1a4262c896420c6ea1c6fda62cf67070e3947e3307f5562bd783a90336", | |
"sha256:6e4f8d9e8aa79321657079b9ac03f3cf3fd067bf31c1cca4f56d49543f4356a5", | |
"sha256:7242be12a58fec245ee9734e625964b97cf7e3f2f7d016603f9e56660ce479c7", | |
"sha256:7dc253b542bfd4b4eb88d9dbae4ca079e7bf2e2afd819ee18891a43db66c60c7", | |
"sha256:94f5bd885f67bbb25c82d80184abbf7ce4f6c3c3a41fbaa4182f034bba803e69", | |
"sha256:a89e188daa119ffa0d03ce5123dee3f8ffd5115c896c2a9d4f0dbb3d8b95bfa3", | |
"sha256:ad3399da9b0ca36e2f24de72f67ab2854a62e623274607e37e0ce5f5d5fa9166", | |
"sha256:b0348be89275fd1d4c44ffa39530c41a21062f52299b1e3ee7d1c61f060044b8", | |
"sha256:b5554368e4ede1856121b0dfa35ce71768102e4aa55e526cb8de7f374ff78722", | |
"sha256:cbddc56b2502d3f87fda4f98d948eb5b11f36ff3902e17cb6cc44727f2200525", | |
"sha256:d79f18f41751725c56eceab2a886f021d70fd70a6188fd386e29a045945ffc10", | |
"sha256:dc2ca26a19ab32dc475dbad9dfe723d3a64c835f4c23f625c2b6566ca32b9f29", | |
"sha256:dd9bcd4f294eb0633bb33d1a74febdd2b9018b8b8ed325f861fffcd2c7660bb8", | |
"sha256:e8baab1bc7c9152715844f1faca6744f2416929de10d7639ed49555a85549f52", | |
"sha256:ec31fe12668af687b99acf1567399632a7c47b0e17cfb9ae47c098644ef36797", | |
"sha256:f12b4f7e2d8f9da3141564e6737d79016fe5336cc92de6814eba579744f65b0a", | |
"sha256:f58ac38d5ca045a377b3b377c84df8175ab992c970a53332fa8ac2373df44ff7" | |
], | |
"index": "pypi", | |
"version": "==1.16.4" | |
}, | |
"pillow": { | |
"hashes": [ | |
"sha256:15c056bfa284c30a7f265a41ac4cbbc93bdbfc0dfe0613b9cb8a8581b51a9e55", | |
"sha256:1a4e06ba4f74494ea0c58c24de2bb752818e9d504474ec95b0aa94f6b0a7e479", | |
"sha256:1c3c707c76be43c9e99cb7e3d5f1bee1c8e5be8b8a2a5eeee665efbf8ddde91a", | |
"sha256:1fd0b290203e3b0882d9605d807b03c0f47e3440f97824586c173eca0aadd99d", | |
"sha256:24114e4a6e1870c5a24b1da8f60d0ba77a0b4027907860188ea82bd3508c80eb", | |
"sha256:258d886a49b6b058cd7abb0ab4b2b85ce78669a857398e83e8b8e28b317b5abb", | |
"sha256:33c79b6dd6bc7f65079ab9ca5bebffb5f5d1141c689c9c6a7855776d1b09b7e8", | |
"sha256:367385fc797b2c31564c427430c7a8630db1a00bd040555dfc1d5c52e39fcd72", | |
"sha256:3c1884ff078fb8bf5f63d7d86921838b82ed4a7d0c027add773c2f38b3168754", | |
"sha256:44e5240e8f4f8861d748f2a58b3f04daadab5e22bfec896bf5434745f788f33f", | |
"sha256:46aa988e15f3ea72dddd81afe3839437b755fffddb5e173886f11460be909dce", | |
"sha256:74d90d499c9c736d52dd6d9b7221af5665b9c04f1767e35f5dd8694324bd4601", | |
"sha256:809c0a2ce9032cbcd7b5313f71af4bdc5c8c771cb86eb7559afd954cab82ebb5", | |
"sha256:85d1ef2cdafd5507c4221d201aaf62fc9276f8b0f71bd3933363e62a33abc734", | |
"sha256:8c3889c7681af77ecfa4431cd42a2885d093ecb811e81fbe5e203abc07e0995b", | |
"sha256:9218d81b9fca98d2c47d35d688a0cea0c42fd473159dfd5612dcb0483c63e40b", | |
"sha256:9aa4f3827992288edd37c9df345783a69ef58bd20cc02e64b36e44bcd157bbf1", | |
"sha256:9d80f44137a70b6f84c750d11019a3419f409c944526a95219bea0ac31f4dd91", | |
"sha256:b7ebd36128a2fe93991293f997e44be9286503c7530ace6a55b938b20be288d8", | |
"sha256:c4c78e2c71c257c136cdd43869fd3d5e34fc2162dc22e4a5406b0ebe86958239", | |
"sha256:c6a842537f887be1fe115d8abb5daa9bc8cc124e455ff995830cc785624a97af", | |
"sha256:cf0a2e040fdf5a6d95f4c286c6ef1df6b36c218b528c8a9158ec2452a804b9b8", | |
"sha256:cfd28aad6fc61f7a5d4ee556a997dc6e5555d9381d1390c00ecaf984d57e4232", | |
"sha256:dca5660e25932771460d4688ccbb515677caaf8595f3f3240ec16c117deff89a", | |
"sha256:de7aedc85918c2f887886442e50f52c1b93545606317956d65f342bd81cb4fc3", | |
"sha256:e6c0bbf8e277b74196e3140c35f9a1ae3eafd818f7f2d3a15819c49135d6c062" | |
], | |
"version": "==6.0.0" | |
}, | |
"psycopg2": { | |
"hashes": [ | |
"sha256:009e0bc09a57dbef4b601cb8b46a2abad51f5274c8be4bba276ff2884cd4cc53", | |
"sha256:0344b181e1aea37a58c218ccb0f0f771295de9aa25a625ed076e6996c6530f9e", | |
"sha256:0cd4c848f0e9d805d531e44973c8f48962e20eb7fc0edac3db4f9dbf9ed5ab82", | |
"sha256:1286dd16d0e46d59fa54582725986704a7a3f3d9aca6c5902a7eceb10c60cb7e", | |
"sha256:1cf5d84290c771eeecb734abe2c6c3120e9837eb12f99474141a862b9061ac51", | |
"sha256:207ba4f9125a0a4200691e82d5eee7ea1485708eabe99a07fc7f08696fae62f4", | |
"sha256:25250867a4cd1510fb755ef9cb38da3065def999d8e92c44e49a39b9b76bc893", | |
"sha256:2954557393cfc9a5c11a5199c7a78cd9c0c793a047552d27b1636da50d013916", | |
"sha256:317612d5d0ca4a9f7e42afb2add69b10be360784d21ce4ecfbca19f1f5eadf43", | |
"sha256:37f54452c7787dbdc0a634ca9773362b91709917f0b365ed14b831f03cbd34ba", | |
"sha256:40fa5630cd7d237cd93c4d4b64b9e5ed9273d1cfce55241c7f9066f5db70629d", | |
"sha256:57baf63aeb2965ca4b52613ce78e968b6d2bde700c97f6a7e8c6c236b51ab83e", | |
"sha256:594aa9a095de16614f703d759e10c018bdffeafce2921b8e80a0e8a0ebbc12e5", | |
"sha256:5c3213be557d0468f9df8fe2487eaf2990d9799202c5ff5cb8d394d09fad9b2a", | |
"sha256:697ff63bc5451e0b0db48ad205151123d25683b3754198be7ab5fcb44334e519", | |
"sha256:6c2f1a76a9ebd9ecf7825b9e20860139ca502c2bf1beabf6accf6c9e66a7e0c3", | |
"sha256:7a75565181e75ba0b9fb174b58172bf6ea9b4331631cfe7bafff03f3641f5d73", | |
"sha256:7a9c6c62e6e05df5406e9b5235c31c376a22620ef26715a663cee57083b3c2ea", | |
"sha256:7c31dade89634807196a6b20ced831fbd5bec8a21c4e458ea950c9102c3aa96f", | |
"sha256:82c40ea3ac1555e0462803380609fbe8b26f52620f3d4f8eb480cfd8ceed8a14", | |
"sha256:8f5942a4daf1ffac42109dc4a72f786af4baa4fa702ede1d7c57b4b696c2e7d6", | |
"sha256:92179bd68c2efe72924a99b6745a9172471931fc296f9bfdf9645b75eebd6344", | |
"sha256:94e4128ba1ea56f02522fffac65520091a9de3f5c00da31539e085e13db4771b", | |
"sha256:988d2ec7560d42ef0ac34b3b97aad14c4f068792f00e1524fa1d3749fe4e4b64", | |
"sha256:9d6266348b15b4a48623bf4d3e50445d8e581da413644f365805b321703d0fac", | |
"sha256:9d64fed2681552ed642e9c0cc831a9e95ab91de72b47d0cb68b5bf506ba88647", | |
"sha256:b9358e203168fef7bfe9f430afaed3a2a624717a1d19c7afa7dfcbd76e3cd95c", | |
"sha256:bf708455cd1e9fa96c05126e89a0c59b200d086c7df7bbafc7d9be769e4149a3", | |
"sha256:d3ac07240e2304181ffdb13c099840b5eb555efc7be9344503c0c03aa681de79", | |
"sha256:ddca39cc55877653b5fcf59976d073e3d58c7c406ef54ae8e61ddf8782867182", | |
"sha256:fc993c9331d91766d54757bbc70231e29d5ceb2d1ac08b1570feaa0c38ab9582" | |
], | |
"index": "pypi", | |
"version": "==2.7.3.2" | |
}, | |
"pyparsing": { | |
"hashes": [ | |
"sha256:1873c03321fc118f4e9746baf201ff990ceb915f433f23b395f5580d1840cb2a", | |
"sha256:9b6323ef4ab914af344ba97510e966d64ba91055d6b9afa6b30799340e89cc03" | |
], | |
"version": "==2.4.0" | |
}, | |
"python-dateutil": { | |
"hashes": [ | |
"sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", | |
"sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e" | |
], | |
"version": "==2.8.0" | |
}, | |
"selenium": { | |
"hashes": [ | |
"sha256:2d7131d7bc5a5b99a2d9b04aaf2612c411b03b8ca1b1ee8d3de5845a9be2cb3c", | |
"sha256:deaf32b60ad91a4611b98d8002757f29e6f2c2d5fcaf202e1c9ad06d6772300d" | |
], | |
"index": "pypi", | |
"version": "==3.141.0" | |
}, | |
"six": { | |
"hashes": [ | |
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", | |
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" | |
], | |
"version": "==1.12.0" | |
}, | |
"urllib3": { | |
"hashes": [ | |
"sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", | |
"sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" | |
], | |
"version": "==1.25.3" | |
}, | |
"wordcloud": { | |
"hashes": [ | |
"sha256:1ba7de6e3179f77f390fa70a363fcd8a3708dd79df7994738f0ba20e6b4eec4a", | |
"sha256:2804995d361438fde75114543d5e48cf4b9f6bc74327c7ed6973ccba0e11221d", | |
"sha256:2c67ae08992ced6ee465b31e307e2d686c593fd5d25459b7bf79effd08e5b797", | |
"sha256:5259f04f0e06a58c313e4c9c145a11ca8c4443bb5d9195414160c0f359d047d8", | |
"sha256:5425b176d9d8a634130db398400a830aa1f43a2bd1d60e16801764ae2b44ed4a", | |
"sha256:578996a2ac83d2199f96661e66af587f010ffb472744fd834186cdfd060d5e38", | |
"sha256:5a90670745e346e468ec0e203e329e8c44392a4e9aae38dcceb4d313ac0dc939", | |
"sha256:6ceed30c943b49e8e8b222a4d5a6725bbeca180fada7a04c4121ed238d6896e6", | |
"sha256:78a856ca60be4c912cd27c3050dd4ad79e34ca021cd83fd61b77bbb10a84a089", | |
"sha256:80b6646a821537faa88b4bba78e35c20f9056bd7227b76fcf065b6627709c089", | |
"sha256:9b87d3ccb7829f8e5fbe1847126f9d232bd7d8d299475e5fc8e5331c5cd52fa6", | |
"sha256:a0e70a23de1e978059a35a1fda976d921b802f0ef7cd145283be7a0772f46b11", | |
"sha256:a8b7b896e7c767afc4d195d0c1657320d6e3db1e02a919eeb2d895b0c277afd9", | |
"sha256:c567d8d5e6743932f1748ac0db6235dc8948fdf34c4819dffebc4914b9e37d3c", | |
"sha256:c7c0de04c68bef168d51ece614e979b19a22287101efbf86ad0b23e91a9f8405", | |
"sha256:d13cce1ad31d5dc6d9d276a5745fe1067d40b680901fad45527bad660f28232e", | |
"sha256:f2df3140b4355b589316f4bc80e08f4385cd74a57d6de6e97d2f85847e2161c5" | |
], | |
"index": "pypi", | |
"version": "==1.5.0" | |
} | |
}, | |
"develop": { | |
"attrs": { | |
"hashes": [ | |
"sha256:10cbf6e27dbce8c30807caf056c8eb50917e0eaafe86347671b57254006c3e69", | |
"sha256:ca4be454458f9dec299268d472aaa5a11f67a4ff70093396e1ceae9c76cf4bbb" | |
], | |
"version": "==18.2.0" | |
}, | |
"entrypoints": { | |
"hashes": [ | |
"sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19", | |
"sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451" | |
], | |
"version": "==0.3" | |
}, | |
"eradicate": { | |
"hashes": [ | |
"sha256:4ffda82aae6fd49dfffa777a857cb758d77502a1f2e0f54c9ac5155a39d2d01a" | |
], | |
"version": "==1.0" | |
}, | |
"flake8": { | |
"hashes": [ | |
"sha256:859996073f341f2670741b51ec1e67a01da142831aa1fdc6242dbf88dffbe661", | |
"sha256:a796a115208f5c03b18f332f7c11729812c8c3ded6c46319c59b53efd3819da8" | |
], | |
"index": "pypi", | |
"version": "==3.7.7" | |
}, | |
"flake8-bugbear": { | |
"hashes": [ | |
"sha256:5070774b668be92c4312e5ca82748ddf4ecaa7a24ff062662681bb745c7896eb", | |
"sha256:fef9c9826d14ec23187ae1edeb3c6513c4e46bf0e70d86bac38f7d9aabae113d" | |
], | |
"index": "pypi", | |
"version": "==19.3.0" | |
}, | |
"flake8-builtins": { | |
"hashes": [ | |
"sha256:8d806360767947c0035feada4ddef3ede32f0a586ef457e62d811b8456ad9a51", | |
"sha256:cd7b1b7fec4905386a3643b59f9ca8e305768da14a49a7efb31fe9364f33cd04" | |
], | |
"index": "pypi", | |
"version": "==1.4.1" | |
}, | |
"flake8-commas": { | |
"hashes": [ | |
"sha256:d3005899466f51380387df7151fb59afec666a0f4f4a2c6a8995b975de0f44b7", | |
"sha256:ee2141a3495ef9789a3894ed8802d03eff1eaaf98ce6d8653a7c573ef101935e" | |
], | |
"index": "pypi", | |
"version": "==2.0.0" | |
}, | |
"flake8-comprehensions": { | |
"hashes": [ | |
"sha256:35f826956e87f230415cde9c3b8b454e785736cf5ff0be551c441b41b937f699", | |
"sha256:f0b61d983d608790abf3664830d68efd3412265c2d10f6a4ba1a353274dbeb64" | |
], | |
"index": "pypi", | |
"version": "==2.1.0" | |
}, | |
"flake8-eradicate": { | |
"hashes": [ | |
"sha256:0953cd3bcae4bfd04d45075234e0b5fd465ff50ecc56cdcaf0027da751632127", | |
"sha256:c762fbb5c3e3694c9ba656d38477b2dcca6599b8baeee4984d05d655591a6f83" | |
], | |
"index": "pypi", | |
"version": "==0.2.0" | |
}, | |
"flake8-import-order": { | |
"hashes": [ | |
"sha256:90a80e46886259b9c396b578d75c749801a41ee969a235e163cfe1be7afd2543", | |
"sha256:a28dc39545ea4606c1ac3c24e9d05c849c6e5444a50fb7e9cdd430fc94de6e92" | |
], | |
"index": "pypi", | |
"version": "==0.18.1" | |
}, | |
"flake8-polyfill": { | |
"hashes": [ | |
"sha256:12be6a34ee3ab795b19ca73505e7b55826d5f6ad7230d31b18e106400169b9e9", | |
"sha256:e44b087597f6da52ec6393a709e7108b2905317d0c0b744cdca6208e670d8eda" | |
], | |
"version": "==1.0.2" | |
}, | |
"flake8-quotes": { | |
"hashes": [ | |
"sha256:10c9af6b472d4302a8e721c5260856c3f985c5c082b04841aefd2f808ac02038" | |
], | |
"index": "pypi", | |
"version": "==2.0.1" | |
}, | |
"mccabe": { | |
"hashes": [ | |
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", | |
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" | |
], | |
"version": "==0.6.1" | |
}, | |
"mypy": { | |
"hashes": [ | |
"sha256:2afe51527b1f6cdc4a5f34fc90473109b22bf7f21086ba3e9451857cf11489e6", | |
"sha256:56a16df3e0abb145d8accd5dbb70eba6c4bd26e2f89042b491faa78c9635d1e2", | |
"sha256:5764f10d27b2e93c84f70af5778941b8f4aa1379b2430f85c827e0f5464e8714", | |
"sha256:5bbc86374f04a3aa817622f98e40375ccb28c4836f36b66706cf3c6ccce86eda", | |
"sha256:6a9343089f6377e71e20ca734cd8e7ac25d36478a9df580efabfe9059819bf82", | |
"sha256:6c9851bc4a23dc1d854d3f5dfd5f20a016f8da86bcdbb42687879bb5f86434b0", | |
"sha256:b8e85956af3fcf043d6f87c91cbe8705073fc67029ba6e22d3468bfee42c4823", | |
"sha256:b9a0af8fae490306bc112229000aa0c2ccc837b49d29a5c42e088c132a2334dd", | |
"sha256:bbf643528e2a55df2c1587008d6e3bda5c0445f1240dfa85129af22ae16d7a9a", | |
"sha256:c46ab3438bd21511db0f2c612d89d8344154c0c9494afc7fbc932de514cf8d15", | |
"sha256:f7a83d6bd805855ef83ec605eb01ab4fa42bcef254b13631e451cbb44914a9b0" | |
], | |
"index": "pypi", | |
"version": "==0.701" | |
}, | |
"mypy-extensions": { | |
"hashes": [ | |
"sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812", | |
"sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e" | |
], | |
"version": "==0.4.1" | |
}, | |
"pep8-naming": { | |
"hashes": [ | |
"sha256:01cb1dab2f3ce9045133d08449f1b6b93531dceacb9ef04f67087c11c723cea9", | |
"sha256:0ec891e59eea766efd3059c3d81f1da304d858220678bdc351aab73c533f2fbb" | |
], | |
"index": "pypi", | |
"version": "==0.8.2" | |
}, | |
"pycodestyle": { | |
"hashes": [ | |
"sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56", | |
"sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c" | |
], | |
"version": "==2.5.0" | |
}, | |
"pyflakes": { | |
"hashes": [ | |
"sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0", | |
"sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2" | |
], | |
"version": "==2.1.1" | |
}, | |
"typed-ast": { | |
"hashes": [ | |
"sha256:132eae51d6ef3ff4a8c47c393a4ef5ebf0d1aecc96880eb5d6c8ceab7017cc9b", | |
"sha256:18141c1484ab8784006c839be8b985cfc82a2e9725837b0ecfa0203f71c4e39d", | |
"sha256:2baf617f5bbbfe73fd8846463f5aeafc912b5ee247f410700245d68525ec584a", | |
"sha256:3d90063f2cbbe39177e9b4d888e45777012652d6110156845b828908c51ae462", | |
"sha256:4304b2218b842d610aa1a1d87e1dc9559597969acc62ce717ee4dfeaa44d7eee", | |
"sha256:4983ede548ffc3541bae49a82675996497348e55bafd1554dc4e4a5d6eda541a", | |
"sha256:5315f4509c1476718a4825f45a203b82d7fdf2a6f5f0c8f166435975b1c9f7d4", | |
"sha256:6cdfb1b49d5345f7c2b90d638822d16ba62dc82f7616e9b4caa10b72f3f16649", | |
"sha256:7b325f12635598c604690efd7a0197d0b94b7d7778498e76e0710cd582fd1c7a", | |
"sha256:8d3b0e3b8626615826f9a626548057c5275a9733512b137984a68ba1598d3d2f", | |
"sha256:8f8631160c79f53081bd23446525db0bc4c5616f78d04021e6e434b286493fd7", | |
"sha256:912de10965f3dc89da23936f1cc4ed60764f712e5fa603a09dd904f88c996760", | |
"sha256:b010c07b975fe853c65d7bbe9d4ac62f1c69086750a574f6292597763781ba18", | |
"sha256:c908c10505904c48081a5415a1e295d8403e353e0c14c42b6d67f8f97fae6616", | |
"sha256:c94dd3807c0c0610f7c76f078119f4ea48235a953512752b9175f9f98f5ae2bd", | |
"sha256:ce65dee7594a84c466e79d7fb7d3303e7295d16a83c22c7c4037071b059e2c21", | |
"sha256:eaa9cfcb221a8a4c2889be6f93da141ac777eb8819f077e1d09fb12d00a09a93", | |
"sha256:f3376bc31bad66d46d44b4e6522c5c21976bf9bca4ef5987bb2bf727f4506cbb", | |
"sha256:f9202fa138544e13a4ec1a6792c35834250a85958fde1251b6a22e07d1260ae7" | |
], | |
"version": "==1.3.5" | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[flake8] | |
ignore = W504 | |
max-line-length = 155 | |
import-order-style = google |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment