Skip to content

Instantly share code, notes, and snippets.

@Lemmings19
Last active April 24, 2017 06:53
Show Gist options
  • Save Lemmings19/4a9b76727a5f64dedf87618a94ba449e to your computer and use it in GitHub Desktop.
Save Lemmings19/4a9b76727a5f64dedf87618a94ba449e to your computer and use it in GitHub Desktop.
Hit StackOverflow's API and save to a file.
<?php
/**
* This script hits StackOverflow's API and prints the results out to a file in JSON format.
*
* This script is specifically targeted at tags, but you could manipulate it target other things.
*/
$file = fopen('/mnt/projects/tags.txt', 'a');
/**
* Create a filter an pick a site here: https://api.stackexchange.com/docs/create-filter
* This example uses a filter created here: https://api.stackexchange.com/docs/tags
* This filter will fetch tags.
*
* This script requires that your filter include the following variables:
* - has_more
* - quota_remaining
*/
$filter = '!*MPoAKy)55yzL2y';
$site = 'stackoverflow';
$pageSize = 100; // limit is 100
$pages = 999; // You will probably run out of results or hit your quota before a high number like 999 is reached
// Just keeping track of how many we've processed:
$count = 0;
// If we want to rate limit by time, in seconds:
$timeBetweenCurls = 0.5;
$curl = curl_init();
$items = [];
$reponse = null;
$forcedExit = false;
$processedCount = 0;
print("Pages processed:\n");
for ($i = 1; $i <= $pages; $i++) {
print('.');
$url = 'https://api.stackexchange.com/2.2/tags?page=' . $i . '&pagesize=' . $pageSize . '&order=asc&sort=name&site=' . $site . '&filter=' . $filter;
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_ENCODING, 'gzip'); // StackOverflow's API encodes its responses
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$response = json_decode(curl_exec($curl), true);
foreach ($response['items'] as $item) {
array_push($items, $item);
$processedCount++;
}
if (!$response['has_more']) {
$forcedExit = true;
print("\nNo results left.");
break;
} else if ($response['quota_remaining'] == 0) {
$forcedExit = true;
print("\nQuota exceeded.");
break;
}
sleep($timeBetweenCurls);
}
print("\nPrinting to file...");
fwrite($file, json_encode($items));
curl_close($curl);
fclose($file);
if (!$forcedExit) {
print("\nFinished without error.");
}
print("\nRemaining quota is " . $response['quota_remaining'] . ".");
print("\nProcessed " . $processedCount . " items.");
print("\nExiting...\n");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment