Skip to content

Instantly share code, notes, and snippets.

@dlewand691
Last active August 29, 2015 14:27
Show Gist options
  • Save dlewand691/3f6004fa178c17bb0775 to your computer and use it in GitHub Desktop.
Save dlewand691/3f6004fa178c17bb0775 to your computer and use it in GitHub Desktop.
Twitter Analytics Scraper
<?php
// Call this with a cron script on a timed interval to scrape it
$user_agent = $_SERVER['HTTP_USER_AGENT']; // its possible you may wish to use an alternative user agent here
// First call gets hidden form field authenticity_token
// and session cookie
$ch = curl_init();
$sTarget = "https://twitter.com/";
curl_setopt($ch, CURLOPT_URL, $sTarget);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_COOKIEFILE, "/tmp/cookie.txt");
curl_setopt($ch, CURLOPT_REFERER, "https://twitter.com/");
$html = curl_exec($ch);
// parse authenticity_token out of html response
preg_match('/<input type="hidden" value="([a-zA-Z0-9]*)" name="authenticity_token"\/>/', $html, $match);
$authenticity_token = $match[1];
$username = "username";
$password = "password";
// set post data
$sPost = "session[username_or_email]=$username&session[password]=$password&return_to_ssl=true&scribe_log=&redirect_after_login=%2F&authenticity_token=$authenticity_token";
# second call is a post and performs login
$sTarget = "https://twitter.com/sessions";
curl_setopt($ch, CURLOPT_URL, $sTarget);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $sPost);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-type: application/x-www-form-urlencoded"));
curl_exec($ch);
$start_time = 1417651200000; // replace with calculated time/date ranges for report wanted (should be some sort of variable based on current time/date )
$end_time = 1420070399999; // replace with calculated time/date ranges for report wanted (should be some sort of variable based on current time/date )
// We are now authed into Twitter. Let's get the information for the tweets
// generate json
$sTarget = "https://analytics.twitter.com/user/$username/tweets/export.json?start_time=$start_time&end_time=$end_time&lang=en";
curl_setopt($ch, CURLOPT_URL, $sTarget);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
// download json
//$sTarget = "https://analytics.twitter.com/user/$username/tweets/bundle?start_time=$start_time&end_time=$end_time&lang=en";
//curl_setopt($ch, CURLOPT_URL, $sTarget);
//curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
//curl_setopt($ch, CURLOPT_HEADER, true);
// display server response
$data = curl_exec ($ch);
$error = curl_error($ch);
// Overwrite the last scrape data with new scrape data. On website, have PHP convert CSV into multidimenional array to use. Careful though, as doing this with large
// time differences for start and end could make an extremely large array. If you want to make a history, what you can do is save the CSV data to a database,
// and then you would only need to pull in a few values on each curl get. Just a thought.
$destination = "./tweets.csv";
$file = fopen($destination, "w+"); // overwrite the previous file
fputs($file, $data);
fclose($file);
// Now lets get the small graph data:
$sTarget = "https://analytics.twitter.com/user/$username/tweets/account_stats.json?start_time=$start_time&end_time=$end_time";
curl_setopt($ch, CURLOPT_URL, $sTarget);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
$data = curl_exec ($ch);
$error = curl_error($ch);
$destination = "./graphs.json";
$file = fopen($destination, "w+"); // overwrite the previous file
fputs($file, $data);
fclose($file);
// Now lets get the data to make the SVG timeline:
$sTarget = "https://analytics.twitter.com/user/$username/tweets/timeline.json?start_time=$start_time&max_id=0&end_time=$end_time&page=0&filter=no_replies&metric=clicks&lang=en";
curl_setopt($ch, CURLOPT_URL, $sTarget);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec ($ch);
$error = curl_error($ch);
$destination = "./timeline.json";
$file = fopen($destination, "w+"); // overwrite the previous file
fputs($file, $data);
fclose($file);
curl_close ($ch);
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment