Last active
August 29, 2015 14:27
-
-
Save dlewand691/3f6004fa178c17bb0775 to your computer and use it in GitHub Desktop.
Twitter Analytics Scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Call this with a cron script on a timed interval to scrape it | |
$user_agent = $_SERVER['HTTP_USER_AGENT']; // its possible you may wish to use an alternative user agent here | |
// First call gets hidden form field authenticity_token | |
// and session cookie | |
$ch = curl_init(); | |
$sTarget = "https://twitter.com/"; | |
curl_setopt($ch, CURLOPT_URL, $sTarget); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); | |
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); | |
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']); | |
curl_setopt($ch, CURLOPT_COOKIEFILE, "/tmp/cookie.txt"); | |
curl_setopt($ch, CURLOPT_REFERER, "https://twitter.com/"); | |
$html = curl_exec($ch); | |
// parse authenticity_token out of html response | |
preg_match('/<input type="hidden" value="([a-zA-Z0-9]*)" name="authenticity_token"\/>/', $html, $match); | |
$authenticity_token = $match[1]; | |
$username = "username"; | |
$password = "password"; | |
// set post data | |
$sPost = "session[username_or_email]=$username&session[password]=$password&return_to_ssl=true&scribe_log=&redirect_after_login=%2F&authenticity_token=$authenticity_token"; | |
# second call is a post and performs login | |
$sTarget = "https://twitter.com/sessions"; | |
curl_setopt($ch, CURLOPT_URL, $sTarget); | |
curl_setopt($ch, CURLOPT_POST, true); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, $sPost); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-type: application/x-www-form-urlencoded")); | |
curl_exec($ch); | |
$start_time = 1417651200000; // replace with calculated time/date ranges for report wanted (should be some sort of variable based on current time/date ) | |
$end_time = 1420070399999; // replace with calculated time/date ranges for report wanted (should be some sort of variable based on current time/date ) | |
// We are now authed into Twitter. Let's get the information for the tweets | |
// generate json | |
$sTarget = "https://analytics.twitter.com/user/$username/tweets/export.json?start_time=$start_time&end_time=$end_time&lang=en"; | |
curl_setopt($ch, CURLOPT_URL, $sTarget); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_HEADER, true); | |
// download json | |
//$sTarget = "https://analytics.twitter.com/user/$username/tweets/bundle?start_time=$start_time&end_time=$end_time&lang=en"; | |
//curl_setopt($ch, CURLOPT_URL, $sTarget); | |
//curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
//curl_setopt($ch, CURLOPT_HEADER, true); | |
// display server response | |
$data = curl_exec ($ch); | |
$error = curl_error($ch); | |
// Overwrite the last scrape data with new scrape data. On website, have PHP convert CSV into multidimenional array to use. Careful though, as doing this with large | |
// time differences for start and end could make an extremely large array. If you want to make a history, what you can do is save the CSV data to a database, | |
// and then you would only need to pull in a few values on each curl get. Just a thought. | |
$destination = "./tweets.csv"; | |
$file = fopen($destination, "w+"); // overwrite the previous file | |
fputs($file, $data); | |
fclose($file); | |
// Now lets get the small graph data: | |
$sTarget = "https://analytics.twitter.com/user/$username/tweets/account_stats.json?start_time=$start_time&end_time=$end_time"; | |
curl_setopt($ch, CURLOPT_URL, $sTarget); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_HEADER, true); | |
$data = curl_exec ($ch); | |
$error = curl_error($ch); | |
$destination = "./graphs.json"; | |
$file = fopen($destination, "w+"); // overwrite the previous file | |
fputs($file, $data); | |
fclose($file); | |
// Now lets get the data to make the SVG timeline: | |
$sTarget = "https://analytics.twitter.com/user/$username/tweets/timeline.json?start_time=$start_time&max_id=0&end_time=$end_time&page=0&filter=no_replies&metric=clicks&lang=en"; | |
curl_setopt($ch, CURLOPT_URL, $sTarget); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
$data = curl_exec ($ch); | |
$error = curl_error($ch); | |
$destination = "./timeline.json"; | |
$file = fopen($destination, "w+"); // overwrite the previous file | |
fputs($file, $data); | |
fclose($file); | |
curl_close ($ch); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment