Last active
January 10, 2017 14:23
-
-
Save tfiers/11251129 to your computer and use it in GitHub Desktop.
Facebook post insight & event attendees tracker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Periodically downloads facebook events and page post insights data to a SQLite database. | |
from pytz import timezone | |
from datetime import datetime | |
import facebook | |
import sqlite3 | |
from time import time, sleep # both in seconds | |
import logging | |
from pprint import pprint, pformat | |
import settings as S | |
# Provides: | |
# PAGE_ACCESS_TOKEN = str | |
# DATABASE_FILE = str | |
# UPDATE_PERIOD = int | |
# RETRY_PERIOD = int | |
# TRACKED_EVENTS = ({"name": str, "id": str}, ... ) | |
# EVENTS_TRACKED_RSVP_STATUSES = (str, ...) | |
# TRACKED_PAGE_POSTS = ({"name": str, "id": str}, ... ) | |
# PAGE_POST_TRACKED_FIELDS = ({"name": str, "metric-name": str}, ..., {"name": str, "metric-name": str, "type-name": str}, ... ) | |
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s") | |
rootLogger = logging.getLogger() | |
rootLogger.setLevel(logging.INFO) | |
fileHandler = logging.FileHandler("fb_data_fetcher.log") | |
fileHandler.setFormatter(logFormatter) | |
rootLogger.addHandler(fileHandler) | |
consoleHandler = logging.StreamHandler() | |
consoleHandler.setFormatter(logFormatter) | |
rootLogger.addHandler(consoleHandler) | |
# Make sure the recorded timestamp is correct regardless | |
# of where this script runs. | |
fmt = "%Y-%m-%d %H:%M:%S" | |
tz = timezone('Europe/Brussels') | |
def init_db(): | |
# Creates the database file if it does not exist yet. | |
conn = sqlite3.connect(S.DATABASE_FILE) | |
# 'with' automatically releases the resource ('conn.close') "and provides error handling". | |
with conn: | |
# conn.execute("CREATE TABLE IF NOT EXISTS EventGoers("++")") | |
PagePostData_columns = ", ".join(["datetime DATETIME", "post_name TEXT"]+[field["name"]+" INTEGER" for field in S.PAGE_POST_TRACKED_FIELDS]) | |
conn.execute("CREATE TABLE IF NOT EXISTS PagePostData("+PagePostData_columns+");") | |
conn.execute("CREATE TABLE IF NOT EXISTS EventGoers(time_registered DATETIME, event_name TEXT, name TEXT, uid TEXT, rsvp_status TEXT);") | |
def fetch_data(): | |
global S | |
S = reload(S) # Might they have changed. | |
time = datetime.now(tz).strftime(fmt) | |
try: | |
graph = facebook.GraphAPI(S.PAGE_ACCESS_TOKEN) | |
conn = sqlite3.connect(S.DATABASE_FILE) | |
with conn: | |
for post in S.TRACKED_PAGE_POSTS: | |
data = graph.get_object(post["id"]+"/insights")["data"] | |
values = [] | |
for field in S.PAGE_POST_TRACKED_FIELDS: | |
for metric in data: | |
if field["metric-name"] == metric["name"]: | |
value_node = metric["values"][0]["value"] | |
if "type-name" in field: | |
if field["type-name"] in value_node: | |
values.append(str(value_node[field["type-name"]])) | |
else: | |
values.append("0") | |
else: | |
values.append(str(value_node)) | |
q = "INSERT INTO PagePostData VALUES (strftime('{}', '{}'), '{}', {});".format(fmt, time, post["name"], ", ".join(values)) | |
conn.execute(q) | |
new_event_entries = 0 | |
for event in S.TRACKED_EVENTS: | |
for rsvp_status in S.EVENTS_TRACKED_RSVP_STATUSES: | |
# Note that we do not page through the data, as all results seem to be returned on one page. | |
# The docs discourage doing this, but fuck the police. | |
data = graph.get_object("{}/{}".format(event["id"], rsvp_status))["data"] | |
for entry in data: | |
# Only insert a new record if, for the current event, the user is new or | |
# if the user's last status is different from the currently fetched status. | |
q = "SELECT rsvp_status FROM EventGoers WHERE event_name='{}' AND uid='{}' ORDER BY time_registered DESC LIMIT 1".format( | |
event["name"], entry["id"]) | |
result = conn.execute(q).fetchone() | |
if result is None or result[0] != rsvp_status: | |
q = "INSERT INTO EventGoers VALUES (strftime(?, ?), ?, ?, ?, ?);" | |
conn.execute(q, (fmt, time, event["name"], entry["name"], entry["id"], rsvp_status)) | |
new_event_entries += 1 | |
conn.commit() | |
logging.info("{} - {}".format(time, "okey")) | |
logging.info("new event entries: {}".format(new_event_entries)) | |
return "great success" | |
except Exception, e: | |
logging.exception("Something awful happened!") | |
return False | |
def fetch_periodically(): | |
# Fetch data. Wait. Repeat. | |
while True: | |
t0 = time() | |
elapsed_time = 0 | |
retries = 0 | |
while elapsed_time < S.UPDATE_PERIOD: | |
if fetch_data() == "great success": | |
elapsed_time = time() - t0 | |
logging.info("retries: "+str(retries)) | |
logging.info("elapsed time: {:.3f}\n".format(elapsed_time)) | |
# Wait until next update. | |
break | |
else: | |
# Wait a short bit and then retry. | |
sleep(S.RETRY_PERIOD) | |
retries += 1 | |
elapsed_time = time() - t0 | |
if elapsed_time < S.UPDATE_PERIOD: # Testing is so vital. (Passing a negative number to 'sleep()' makes it sleep a very long time.) | |
sleep(S.UPDATE_PERIOD - elapsed_time) | |
init_db() | |
fetch_periodically() | |
# For playing with data. Normal program flow does not reach this. | |
conn = sqlite3.connect(S.DATABASE_FILE) | |
# conn.row_factory = sqlite3.Row # Return results as dictionaries. | |
c = conn.cursor() | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GET ETERNAL PAGE ACCESS TOKEN | |
# | |
# As per here: | |
# https://developers.facebook.com/docs/facebook-login/access-tokens/#extendingpagetokens | |
# and here: | |
# https://developers.facebook.com/docs/facebook-login/access-tokens/#pagetokens | |
# | |
# First get a long lived access token, with the 'manage_pages' permission. | |
# Use get_long_lived_access_token.py | |
import facebook | |
from pprint import pprint | |
from datetime import datetime | |
PAGE_NAME = "Arenbergorkest" # Page you manage for which you desire an eternal access token. | |
LONG_LIVED_ACCESS_TOKEN = "CAAInC..." | |
graph = facebook.GraphAPI(LONG_LIVED_ACCESS_TOKEN) | |
r = graph.get_object("me/accounts") | |
# pprint(r) | |
for page in r["data"]: | |
if page["name"] == PAGE_NAME: | |
eternal_access_token = page["access_token"] | |
# Expires: Never | |
graph = facebook.GraphAPI(eternal_access_token) | |
print u'\u2713' | |
# # # # # # # # # # | |
# Arenbergorkest: # | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
# CAAIn... # | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
# For ease of use: | |
def obj(path): | |
return graph.get_object(path) | |
def pr(path_or_obj): | |
if isinstance(path_or_obj, basestring): | |
pprint(obj(path_or_obj)) | |
else: | |
pprint(path_or_obj) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GET LONG LIVED ACCESS TOKEN | |
# | |
# As per here: | |
# https://developers.facebook.com/docs/facebook-login/access-tokens/#extending | |
# | |
# This needs to be redone every 2 months. | |
import requests | |
from pprint import pprint | |
from datetime import datetime | |
# Get a user token from the Graph API Explorer (https://developers.facebook.com/tools/explorer/), | |
# with the 'read_insights' permission e.g. If you want to get an (eternal) page access token, also grant the | |
# 'manage_pages' permission. | |
# | |
# (Get info about this token - app, user, permissions and expiry - here: https://developers.facebook.com/tools/debug/) | |
# | |
short_lived_user_token = "CAAInCjiZAhnQBAIPSpXw5SZB8RZBf6tjab0ZABuJnpKo63399WQZCX76LZA5ZBoPsF8IZBQciSGrY1uFs8pZC8jhZBWiN2Qx6IOFzidFG3WVZB9VQuD8ciaLi33Eux6tZCCJEdebMXaIE6CQAZCcLh2wLkhg8PZAAy6ZB9CIkRagVfxmZBPYn2JJZBfmB8XGswlATPTB1ZCp8ZD" | |
# Expires in about an hour. | |
APP_ID = "" | |
APP_SECRET = "" | |
r = requests.get("https://graph.facebook.com/oauth/access_token?"+ | |
"grant_type=fb_exchange_token"+ | |
"&client_id="+APP_ID+ | |
"&client_secret="+APP_SECRET+ | |
"&fb_exchange_token="+short_lived_user_token) | |
pprint(r.text) | |
long_lived_access_token = r.text.split('=')[1] | |
# Expires in about 2 months | |
# Long lived access token issued 2014-04-08 03:10: | |
# | |
# CAA... | |
# | |
# "Expires 1402188344 (in about 2 months)" (--> 2014-06-08 02:45) | |
# | |
# print(datetime.fromtimestamp(int("5183999")).strftime('%Y-%m-%d %H:%M:%S')) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A never expiring one preferably. See 'get_eternal_page_access_token.py'. | |
# PAGE_ACCESS_TOKEN = "CAAInCj..." | |
DATABASE_FILE = "fb_data.db" # SQLite | |
# In seconds | |
UPDATE_PERIOD = 15*60 | |
# UPDATE_PERIOD = 5 | |
RETRY_PERIOD = 60 | |
# RETRY_PERIOD = 1 | |
# On insights data update intervals | |
# | |
# This page: https://developers.facebook.com/docs/graph-api/reference/insights says: | |
# "Metrics that are marked with * are available in real-time, i.e., they are refreshed approximately every 15 minutes. | |
# Most other metrics will update once every 24 hours." | |
# Page post impressions and engagement are marked *. | |
# Page impressions and engagement are not marked. | |
TRACKED_EVENTS = ( | |
{"name": "woe", "id": "653690918014139"}, | |
{"name": "don", "id": "309063299244493"}, | |
{"name": "ie", "id": "635511229861503"} | |
) | |
# Note: not the same strings as in the returned field 'rsvp_status' (those have 'unsure' instead of 'maybe' for example). We don't use those. | |
EVENTS_TRACKED_RSVP_STATUSES = ( | |
"attending", | |
"maybe", | |
"declined", | |
"noreply" | |
) | |
TRACKED_PAGE_POSTS = ( | |
{"name": "Bellingen 2014", "id": "16875510075_10153945920955076"}, | |
{"name": "Share Woe", "id": "16875510075_10153973792450076"}, | |
{"name": "Share Don", "id": "16875510075_10153973793850076"}, | |
{"name": "Promo-post", "id": "16875510075_10154026155645076"} | |
) | |
# Keys will be used as SQLite column headings. | |
PAGE_POST_TRACKED_FIELDS = ( | |
{"name": "impressions", "metric-name": "post_impressions"}, | |
{"name": "impressions_unique", "metric-name": "post_impressions_unique"}, | |
{"name": "impressions_organic", "metric-name": "post_impressions_organic"}, | |
{"name": "impressions_organic_unique", "metric-name": "post_impressions_organic_unique"}, | |
{"name": "impressions_viral", "metric-name": "post_impressions_viral"}, | |
{"name": "impressions_viral_unique", "metric-name": "post_impressions_viral_unique"}, | |
{"name": "impressions_fan", "metric-name": "post_impressions_fan"}, | |
{"name": "impressions_fan_unique", "metric-name": "post_impressions_fan_unique"}, | |
{"name": "consumptions", "metric-name": "post_consumptions"}, | |
{"name": "photo_views", "metric-name": "post_consumptions_by_type", "type-name": "photo view"}, | |
{"name": "other_clicks", "metric-name": "post_consumptions_by_type", "type-name": "other clicks"}, | |
{"name": "consumptions_unique", "metric-name": "post_consumptions_unique"}, | |
{"name": "photo_views_unique", "metric-name": "post_consumptions_by_type_unique", "type-name": "photo view"}, | |
{"name": "other_clicks_unique", "metric-name": "post_consumptions_by_type_unique", "type-name": "other clicks"}, | |
# more consumption types? | |
{"name": "engaged_users", "metric-name": "post_engaged_users"}, | |
{"name": "stories", "metric-name": "post_stories"}, | |
{"name": "stories_by_like", "metric-name": "post_stories_by_action_type", "type-name": "like"}, | |
{"name": "stories_by_share", "metric-name": "post_stories_by_action_type", "type-name": "share"}, | |
{"name": "storytellers", "metric-name": "post_storytellers"}, | |
{"name": "storytellers_by_liking", "metric-name": "post_storytellers_by_action_type", "type-name": "like"}, | |
{"name": "storytellers_by_sharing", "metric-name": "post_storytellers_by_action_type", "type-name": "share"}, | |
# more story/storyteller types? | |
) | |
# nog mogelijk te tracken: | |
# /{photo-id}: likes,comments,shares op event pics | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Export to csv via SQLite Database Browser: | |
create table promo_post_data as select * from pagepostdata where post_name='Promo-post' | |
-- Export to csv via SQLite CLI: | |
sqlite> .header on | |
sqlite> .output c:/work/dataout.csv | |
sqlite> .mode csv | |
sqlite> SELECT * FROM pagepostdata WHERE post_name='Promo-post' | |
sqlite> .output stdout |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment