Last active
November 29, 2022 02:29
-
-
Save jamescridland/e982520faa5e5bc3cfcf926e768c3126 to your computer and use it in GitHub Desktop.
Get all OP3 podcast hits from a single day
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// To get all podcast hits on a specific day | |
// Very rough PHP code from [email protected] | |
if (empty($config['op3-bearer'])) { | |
// Grab a API key here: https://op3.dev/api/keys | |
// Get the bearer and add it in this line below | |
// For testing purposes, the preview bearer given here will also work | |
$config['op3-bearer']="preview07ce"; | |
} | |
$datefrom="2022-11-25"; // this is the date we are going to grab from (at midnight) | |
$dateto=date_create($datefrom); | |
$dateto=date_add($dateto,date_interval_create_from_date_string("1 day")); // this calculates the end date: one day after (at midnight) | |
$dateto=date_format($dateto, "Y-m-d\TH:i:s"); | |
$requests=1000; //we are going to grab 1,000 requests every time, to keep the number of requests low | |
// Here is the URL search we're doing. Podnews uses the "pg" - podcast guid - URL structure here | |
// Podnews, all episodes | |
$itemurl='https://op3.dev/e,pg=9b024349-ccf0-5f69-a609-6b82873eab3c/podnews.net/audio/podnews*'; | |
// Alternatives for fun | |
// $itemurl='https://op3.dev/e/mp3s.nashownotes.com/PC20-111-*'; // Podcasting 2.0 podcast, episode 111, released 18 Nov 22 | |
// $itemurl='https://op3.dev/e/chrt.fm/track/4EB79A/https://verifi.podscribe.com/rss/p/traffic.megaphone.fm/GLSS7428822213.mp3*'; // Practical Stoicism, episode S3/E26, released 25 Nov 22 | |
// $itemurl='https://op3.dev/e/https://dts.podtrac.com/redirect.mp3/www.buzzsprout.com/231452/11757910-apple-s-hidden-project-to-help-podcasts-get-discovered.mp3'; // Buzzcast ep 90, released 25 Nov 22 | |
//$itemurl='https://op3.dev/e/mp3s.nashownotes.com/NA-1506-*'; // NoAgenda, episode 1506, released 24 Nov 22. Caution: not recommended. VERY big: more than 384,000 hits in the one day. Will result in 384 API grabs, and probably exhausting all your memory. | |
$context = stream_context_create(array( | |
'http'=>array( | |
'method'=>"GET", | |
'ignore_errors'=>true, | |
'header'=>"User-Agent: PodnewsBot/1.0 https://podnews.net\r\n" | |
) | |
) | |
); //Set a useragent, but also mark errors to be ignored so that we can grab the error messsages | |
$data=array(); //this is where all the data will go. | |
$fetch=true; | |
$fetchdate=$datefrom; | |
while($fetch) { | |
$op3url="https://op3.dev/api/1/redirect-logs?startAfter=".substr(date_format(date_create($fetchdate), "Y-m-d\TH:i:s.u"),0,23).'Z'."&end=".substr(date_format(date_create($dateto), "Y-m-d\TH:i:s.u"),0,23).'Z'."&format=json&url=".$itemurl."&limit=".$requests."&token=".$config['op3-bearer']; | |
// uncomment the below to see every call this makes to OP3. It should be one every $request hits. | |
// echo '<hr>Calling URL: '.$op3url; | |
$op3data=json_decode(file_get_contents($op3url,false,$context),TRUE); | |
if (isset($op3data['message'])){ | |
echo $op3data['message']; exit; | |
} | |
$data=array_merge($data,$op3data['rows']); // add to $data all the data we've just got. | |
if(!empty($op3data['rows'][$requests-1]['time'])) { | |
// we asked for a total of $requests and we have them. So, reset the date to the latest one for the query, and continue stepping through | |
// note: for a *very* heavy podcast, there is a slight possibility that multiple hits happened on the same microsecond. This could | |
// skip those hits, but only at this exact moment when making multiple calls. | |
// Possibility for skipping one or more downloads in this way is 1 in 86,400,000 per thousand hits. | |
$fetchdate=$op3data['rows'][$requests-1]['time']; | |
} else { | |
// we didn't get the full number of $requests and so we're done. Don't fetch any more | |
$fetch=false; | |
} | |
} | |
echo '<h1>OP3 stats</h1>'; | |
echo 'For: '.$itemurl.'<br>'; | |
echo 'On: '.date_format(date_create($datefrom), "Y-m-d\TH:i:s.u"); | |
echo "<h2>Totals</h2>"; | |
echo number_format(count($data),0).' hits'; | |
echo number_format(count(array_unique(array_column($data,'hashedIpAddress')))).' unique IPs'; | |
echo "<h2>Continents</h2>"; | |
$continentcodes=array("AF"=>"Africa","NA"=>"North America","OC"=>"Oceania","AN"=>"Antarctica","AS"=>"Asia","EU"=>"Europe","SA"=>"South America"); | |
$continents=array_count_values(array_column($data,'continent')); // count all the 'continent' values | |
arsort($continents); // sort so biggest is top | |
foreach($continents AS $continentcode=>$continentcount) { | |
echo $continentcodes[$continentcode].': '.$continentcount.'<br>'; | |
} | |
echo "<h2>Countries</h2>"; | |
$countries=array_count_values(array_column($data,'country')); | |
arsort($countries); | |
print_r($countries); | |
echo "<h2>Timezones</h2>"; | |
$timezones=array_count_values(array_column($data,'timezone')); | |
arsort($timezones); | |
print_r($timezones); | |
echo "<h2>Referers</h2>"; | |
$referers=array_count_values(array_column($data,'referer')); | |
arsort($referers); | |
print_r($referers); | |
echo '<h2>Total data</h2>'; | |
echo '<pre>'; | |
print_r($data); | |
echo '</pre>'; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment