|
#!/usr/bin/env python |
|
# -*- coding: utf-8 -*- |
|
|
|
######### count up twitter, FB, Pinterest shares + comments on your/project's site, put them in beautiful spreadsheet |
|
######### screenshot: https://twitter.com/bluechoochoo/status/431853397612834816/photo/1 |
|
######### you just have to change ONE line of code, baby. |
|
######### i'm not author, just adding newbie instructions. forked from https://gist.github.com/mediaczar/7808764 |
|
######### ******************* how-to: ******************* |
|
######### 1. replace URL in line 57 with YOUR sitemap (probably http://yourdomain.com/sitemap.xml) |
|
######### 2. paste this entire code-snippet in your scraperwiki.com session (you've already signed-up, right?) |
|
######### 3. let scraperwiki work its magic |
|
######### 4. profit |
|
######### 4a. tell me on twitter: @bluechoochoo |
|
|
|
|
|
################### |
|
import scraperwiki |
|
import requests |
|
import xmltodict |
|
import json |
|
import time |
|
|
|
graph_query_root = "https://graph.facebook.com/fql" |
|
graph_attr = ['share_count', 'like_count', 'comment_count'] |
|
|
|
|
|
################### |
|
def query_graph_api(url): # query the Graph API, return data. |
|
result = {} |
|
graph_query = graph_query_root + '?q=SELECT ' + ','.join(graph_attr) + ' FROM link_stat WHERE url = "' + url + '"' |
|
print graph_query # debug console |
|
query_data = requests.get(graph_query) |
|
query_json = json.loads(query_data.text) |
|
for item in graph_attr: |
|
result[item] = query_json['data'][0][item] |
|
time.sleep(2) |
|
return result |
|
|
|
def query_twitter(url): |
|
twitter_query = "http://urls.api.twitter.com/1/urls/count.json?url=%s" % url |
|
print twitter_query |
|
query_data = requests.get(twitter_query) |
|
query_json = json.loads(query_data.text) |
|
return query_json['count'] |
|
|
|
def query_pinterest(url): |
|
pinterest_query = "http://widgets.pinterest.com/v1/urls/count.json?url=%s" % url |
|
print pinterest_query |
|
query_data = requests.get(pinterest_query) |
|
query_data_trim = query_data.text[13:-1] |
|
query_json = json.loads(query_data_trim) |
|
return query_json['count'] |
|
|
|
|
|
|
|
################### CREATE DICTIONARY FROM SITEMAP |
|
sitemapURL = 'http://www.recipegirl.com/sitemap.xml' |
|
sitemap_raw = requests.get(sitemapURL) |
|
sitemap_dict = xmltodict.parse(sitemap_raw.text) |
|
|
|
|
|
|
|
################### COLLECT DATA FROM SITEMAP |
|
for page in sitemap_dict['urlset']['url']: |
|
pages = {} |
|
pages['url'] = page['loc'] |
|
|
|
# Get Facebook Graph data |
|
graph_data = query_graph_api(pages['url']) |
|
for item in graph_attr: |
|
pages[item] = graph_data[item] |
|
|
|
# Get Twitter data |
|
pages['tweets'] = query_twitter(pages['url']) |
|
# Get Pinterest data |
|
pages['pins'] = query_pinterest(pages['url']) |
|
# Get LinkedIn data |
|
# Get Delicious data |
|
# Get StumbleUpon data |
|
# Get Reddit data |
|
|
|
# Commit data |
|
scraperwiki.sqlite.save(unique_keys=['url'], data=pages) |