Last active
April 20, 2017 08:14
-
-
Save CNG/1019db1a536ad325590f to your computer and use it in GitHub Desktop.
Movable Type + Field Day popular stories script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
This script looks up items most viewed according to Google Analytics in a given time span and given certain blog or entry restrictions, returning either a listing for debugging or JSON data for further processing. | |
The possible parameters are: | |
* days int Number of days of history to pull results from. Default is 7. | |
* entry_id int Entry IDs (from Movable Type) of entries that should NOT be included in results, such as to prevent the currently viewed story from showing up in a "Read Next" sidebar. Multiple IDs can be provided by supplying multiple instances of this parameter, i.e. entry_id=300&entry_id=301 | |
* incl_path string To limit the results to certain URL paths, supply one or more of this parameter. For example, to only show entries from the Blog A and Blog B blogs, provide these parameters: incl_path=/blogs/blog-a&incl_path=/blogs/blog-b. Default is /articles and /blogs, excluding blog indexes. | |
* excl_path string To exclude certain URL paths from the results, supply one or more of this parameter. For example, to show no entries from the Blog A and Blog B blogs, provide these parameters: excl_path=/blogs/blog-a&excl_path=/blogs/blog-b. | |
* limit int Maximum number of results to return. Default is 60. Maximum without supplying password parameter is 10. If a higher limit is supplied, but no password parameter is given, the limit is reduced to 10. | |
* password string Must be set to bellyflop to allow for limits higher than 10. | |
* format string Set to JSON or HTML depending on return format desired. | |
Example call excluding a certain entry and including only Blog A items, looking at past 7 days of hits: pop.php?days=3&entry_id=101&incl_path=/blogs/blog-b | |
Must update below: | |
- PASSWORD | |
- server.com | |
- REFRESH_TOKEN | |
- CLIENT_ID | |
- CLIENT_SECRET | |
- 620 | |
*/ | |
/* | |
* STAGE ONE: PARSE MOVABLE TYPE CONFIGURATION FILE, SET UP DATABASE CONNECTION | |
*/ | |
// retrieve MT configuration file so we can get the DB connection info | |
$config = array(); | |
$lines = explode( "\n", file_get_contents('/var/www/cgi-bin/mt/mt-config.cgi') ); | |
foreach( $lines as &$line ){ | |
$line = preg_split('/\s+/', $line, 2); | |
if( substr( $line[0], 0, 1 ) !== '#' && $line[0] !== '' ){ | |
$config[$line[0]] = $line[1]; | |
} | |
} | |
// create database connection using info found in MT config file | |
$pdo = new PDO('mysql:host='.$config['DBHost'].';dbname='.$config['Database'], $config['DBUser'], $config['DBPassword']); | |
/* | |
* STAGE TWO: RETRIEVE POPULAR ENTRIES FROM GOOGLE CORE REPORTING API | |
*/ | |
// parse query parameters such that multiples can be supplied and recognized | |
$query = explode('&', $_SERVER['QUERY_STRING']); | |
$query = array_filter($query); | |
$params = array(); | |
foreach( $query as $param ){ | |
list($name, $value) = array_pad(explode('=', $param, 2), 2, null); | |
$params[urldecode($name)][] = urldecode($value); | |
} | |
foreach( $params as $key => $value){ | |
$params[$key] = array_unique($value); | |
} | |
// ensure params meet expected values and set the variables we will pass to getResults() | |
if( $params['entry_id'] ){ | |
$entry_ids = $params['entry_id']; | |
array_walk( $entry_ids, 'intval' ); | |
} | |
if( $params['excl_path'] ){ | |
$excl_paths = $params['excl_path']; | |
array_walk( $excl_paths, 'cleanURLSegment' ); | |
} | |
if( $params['incl_path'] ){ | |
$incl_paths = $params['incl_path']; | |
array_walk( $incl_paths, 'cleanURLSegment' ); | |
} | |
if( $params['days'] ) $days = intval( $params['days'][0] ); | |
if( $params['limit'] ){ | |
$limit = intval( $params['limit'][0] ); | |
} else { | |
$limit = 10; | |
} | |
if( $limit > 10 ){ | |
// lower limit to 1000 unless password set correctly | |
if( ! $params['password'][0] || $params['password'][0] !== 'PASSWORD' ){ | |
$limit = 10; | |
} | |
} | |
// DETERMINE DATE OF LAST ENTRY IN MT, TO AVOID GETTING TOO MUCH CONTENT THAT ISN'T ON DEV | |
// this would work better if we instead filtered the GA call to look only at entry IDs less than the most recent entry ID in MT | |
// but we'll just set this to 0 on server.com for now | |
$days_offset = 0; | |
$isAppServer = !strcmp($_SERVER['SERVER_NAME'],'server.com'); | |
if( ! $isAppServer ){ | |
$days_offset = getDaysOffset( $pdo, $incl_paths ); | |
} | |
// perform query | |
$results = getResults($entry_ids, $excl_paths, $incl_paths, $days_offset, $days, $limit); | |
/* | |
* STAGE THREE: COMPILE OUTPUT WITH ADDITIONAL INFO FROM MT DATABASE ABOUT THOSE ENTRIES | |
*/ | |
$output = array(); | |
$counter = 0; | |
foreach($results as $result) { | |
if( count( $output ) >= $limit ){ break; } | |
list( $pageTitle, $pagePath, $entry_id, $pageviews ) = $result; | |
$mt_lookup = getInfo( $pdo, $entry_id ); | |
if( $mt_lookup === false ){ continue; } | |
extract($mt_lookup); | |
$output[] = array( | |
'path' => $pagePath, | |
'title' => $title, | |
'id' => $entry_id, | |
'views' => $pageviews, | |
'img' => $img, | |
); | |
} | |
/* | |
* STAGE FOUR: OUTPUT | |
*/ | |
$format = isset( $params['format'] ) ? $params['format'][0] : 'HTML'; | |
// $output[5]['title'] = substr($output[5]['title'], 0, -1); | |
switch( $format ){ | |
case 'JSON': | |
echo json_encode($output); | |
break; | |
case 'HTML': | |
default: | |
//echo mb_detect_encoding($output[5]['title'], 'UTF-8'); | |
//echo var_dump($output); | |
foreach( $output as $result ){ | |
extract($result); | |
echo "<p><a href='$path'>$title</a> (#$id, $views views)</p><p><img src='$img'></p>"; | |
} | |
break; | |
} | |
/* | |
* FUNCTIONS | |
*/ | |
function cleanURLSegment( &$item, $key ){ | |
$item = preg_replace( '/[^-_\/a-z0-9]*/i', '', $item ); | |
} | |
function getResults($entry_ids = false, $excl_paths = false, $incl_paths = false, $days_offset = 0, $days = 7, $limit = 60){ | |
$data = array( | |
'refresh_token' => 'REFRESH_TOKEN', | |
'client_id' => 'CLIENT_ID', | |
'client_secret' => 'CLIENT_SECRET', | |
'grant_type' => 'refresh_token' | |
); | |
// Retrieve access token using stored refresh token | |
$ch = curl_init(); | |
curl_setopt( $ch, CURLOPT_URL, 'https://accounts.google.com/o/oauth2/token' ); | |
curl_setopt( $ch, CURLOPT_POST, 1 ); | |
curl_setopt( $ch, CURLOPT_POSTFIELDS, http_build_query( $data ) ); | |
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); | |
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 ); | |
$data = curl_exec( $ch ); | |
curl_close( $ch ); | |
$results = json_decode( $data, true ); | |
if( ! isset( $results['access_token'] ) ){ | |
return false; | |
} | |
// build filters first since they are more complex | |
{ | |
// hold excluded entries | |
$entry_excl = ''; | |
if( ! empty( $entry_ids ) ){ | |
if( is_array( $entry_ids ) ) { | |
$entry_ids = implode( '|', $entry_ids ); | |
} | |
$entry_excl = "ga:customVarValue5!~^($entry_ids)$;"; | |
} | |
// hold excluded blog addresses | |
$blog_excl = ''; | |
if( ! empty( $excl_paths ) ){ | |
if( ! is_array( $excl_paths ) ){ | |
$excl_paths = array( $excl_paths ); | |
} | |
foreach( $excl_paths as $path ){ | |
if( strlen( $path ) ){ // ignore empty strings | |
$blog_excl .= "ga:pagePath!@$path;"; | |
} | |
} | |
} | |
$filters_or = array(); // hold the OR statements, which take precedance over AND | |
// hold included blog addresses | |
if( ! empty( $incl_paths ) ){ | |
if( ! is_array( $incl_paths ) ){ | |
$incl_paths = array( $incl_paths ); | |
} | |
foreach( $incl_paths as $path ){ | |
if( strlen( $path ) ){ // ignore empty strings | |
$filters_or[] = "ga:pagePath=@$path"; | |
} | |
} | |
} | |
if( ! count( $filters_or ) ){ // in case previous block didn't yield anything due to empty strings | |
// show articles | |
$filters_or[] = 'ga:pagePath=@/articles'; | |
// show blog entries | |
$filters_or[] = 'ga:pagePath=@/blogs'; | |
} | |
$blog_incl = 'ga:pagePath!~^/blogs.*index\.(html|php)$;' . implode( ',', $filters_or ); | |
} | |
// prevent errors later | |
date_default_timezone_set('America/New_York'); | |
$data = array( | |
'key' => 'AIzaSyDW5RwT0eTOkHYSoBEx6rrLaQw2enQ11CI', | |
'ids' => 'ga:3360489', | |
'dimensions' => 'ga:pageTitle,ga:pagePath,ga:customVarValue5', // simply including customVarValue5 here precludes results without this set, so we don't need to try to filter for only entries | |
'metrics' => 'ga:pageviews', | |
//'filters' => 'ga:customVarName5==Entry ID', // this isn't necessary, but here in case other non-entry pages get a customVar5 in the future used for a different purpose | |
'filters' => $blog_excl . $entry_excl . $blog_incl, // without this, we get packages and maybe other things that are individual entries and hence have "Entry ID" custom var set | |
'sort' => '-ga:pageviews', | |
'start-date' => date( 'Y-m-d', strtotime( '-' . ( $days + $days_offset ) . ' days' ) ), | |
'end-date' => date( 'Y-m-d', strtotime( '-' . ( $days_offset ) . ' days' ) ), | |
'max-results' => ceil( $limit * 1.5 + 5 ), | |
); | |
$url = 'https://www.googleapis.com/analytics/v3/data/ga?' . http_build_query( $data ); | |
$ch = curl_init(); | |
curl_setopt( $ch, CURLOPT_URL, $url ); | |
curl_setopt( $ch, CURLOPT_HTTPHEADER, array( 'Authorization: Bearer ' . $results['access_token'] ) ); | |
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); | |
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 ); | |
$data = curl_exec( $ch ); | |
curl_close( $ch ); | |
file_put_contents( $json_file, $data ); | |
$results = json_decode( $data, true ); | |
if( ! isset( $results['rows'] ) ){ | |
return false; | |
} | |
$deduped = array(); | |
foreach( $results['rows'] as $row ){ | |
if( ! isset( $deduped[ $row[2] ] ) ){ | |
$deduped[ $row[2] ] = $row; | |
} | |
} | |
return $deduped; | |
} | |
/** | |
* getDaysOffset returns the days since date of last published entry matching the paths supplied | |
* | |
* @access public | |
* @param mixed $pdo PDO object for MT database | |
* @param array $incl_paths | |
* @return int Number of days ago last entry was | |
*/ | |
function getDaysOffset( $pdo, $incl_paths = array() ){ | |
$where_paths = ''; | |
if( isset( $incl_paths ) && count( $incl_paths ) > 0 ){ | |
$where_paths .= 'AND ( '; | |
$index = 0; | |
foreach( $incl_paths as $incl_path ){ | |
$index++; | |
if( $index > 1 ){ | |
$where_paths .= ' OR '; | |
} | |
$where_paths .= "blog_site_path LIKE '%" . rtrim( $incl_path, '/' ) . "%'"; // not sure why LIKE doesn't work with the trailing slash | |
} | |
$where_paths .= ')'; | |
} | |
$statement = $pdo->query(" | |
SELECT entry_authored_on | |
FROM mt_entry | |
JOIN mt_blog ON entry_blog_id = blog_id | |
WHERE entry_status = 2 $where_paths | |
ORDER BY entry_authored_on DESC | |
LIMIT 1 | |
"); | |
$row = $statement->fetch(PDO::FETCH_ASSOC); | |
// continue unless no results | |
if( $row === false ){ | |
return 0; | |
} | |
extract($row); | |
$last_entry = new DateTime( $entry_authored_on ); | |
$now = new DateTime( "now" ); | |
$diff = $now->diff( $last_entry )->format("%a"); | |
if( $diff < 0 ){ $diff = 0; } | |
return $diff; | |
} | |
/** | |
* getInfo returns the thumbnail and title for a given entry. | |
* | |
* @access public | |
* @param mixed $pdo PDO object for MT database | |
* @param mixed $entry_id entry_id for entry to retrieve | |
* @return array Entry info as an array with keys 'img' and 'title', or false if entry ID invalid or image not found. | |
*/ | |
function getInfo( $pdo, $entry_id ){ | |
// WHERE ( f.fdvalue_key = 'blog_image_asset' OR f.fdvalue_key = 'article_image_asset' ) | |
$statement = $pdo->query(" | |
SELECT asset_file_path,asset_file_name,asset_created_on,asset_id, | |
entry_title, | |
bc.blog_site_url AS blog_site_url,bc.blog_site_path AS blog_site_path, | |
bp.blog_site_path AS website_site_path,bp.blog_site_url AS website_site_url,bp.blog_id AS website_blog_id | |
FROM mt_asset as a | |
JOIN mt_fdvalue as f ON a.asset_id = f.fdvalue_value | |
JOIN mt_entry as e ON e.entry_id = f.fdvalue_object_id | |
JOIN mt_blog as bc ON bc.blog_id = e.entry_blog_id | |
JOIN mt_blog as bp ON bp.blog_id = bc.blog_parent_id | |
WHERE f.fdvalue_key = 'blog_image_asset' | |
AND f.fdvalue_object_id = $entry_id | |
AND bp.blog_id = 91 | |
LIMIT 1 | |
"); | |
$row = $statement->fetch(PDO::FETCH_ASSOC); | |
// continue unless no results | |
if( $row === false ){ | |
return false; | |
} | |
extract($row); | |
// clean up URLs and paths from MT database | |
$blog_site_url = explode( '/::/', $blog_site_url ); // first part is subdomain, which we're ignoring here… | |
$asset_file_path = ltrim( $asset_file_path, '%r/' ); | |
$website_site_url = rtrim( $website_site_url, '/' ) . '/'; | |
$website_site_path = rtrim( $website_site_path, '/' ) . '/'; | |
$blog_site_url = rtrim( $blog_site_url[1], '/' ) . '/'; | |
$blog_site_path = rtrim( $blog_site_path, '/' ) . '/'; | |
// save normalized URL and path | |
$site_url = $website_site_url . $blog_site_url; // blog_site_url always segment that must be added to website_site_url | |
$site_path = ( $blog_site_path === '/' ) ? $website_site_path : $blog_site_path; // blog_site_path seems complete if set | |
// save main image URL and path | |
$thumb_url = $site_url . $asset_file_path; | |
$thumb_path = $site_path . $asset_file_path; | |
// if image too wide, check for smaller autogenerated thumbnail | |
$img_info = getimagesize( $thumb_path ); | |
if( $img_info[0] > 620 ){ | |
$Y = substr( $asset_created_on, 0, 4 ); | |
$M = substr( $asset_created_on, 5, 2 ); | |
$D = substr( $asset_created_on, 8, 2 ); | |
$ext = strtolower( pathinfo( $asset_file_name, PATHINFO_EXTENSION ) ); | |
$name = pathinfo( $asset_file_name, PATHINFO_FILENAME ); | |
// commented section was to deal with Blog C, but that uses | |
// multiple thumbnail sizes, so decided to forgo this for now | |
//if( $website_blog_id == 92 ){ | |
// $thumb_width = 1200; | |
//} else { | |
$thumb_width = 620; | |
//} | |
// save thumbnail image URL and path instead of main image, which was too wide | |
$thumb_url = $site_url . "assets_c/$Y/$M/$name-thumb-{$thumb_width}xauto-$asset_id.$ext"; | |
$thumb_path = $site_path . "assets_c/$Y/$M/$name-thumb-{$thumb_width}xauto-$asset_id.$ext"; | |
} | |
// verify whatever image we settled on actually exists and return the info | |
if( file_exists( $thumb_path ) ){ | |
return array( | |
'img' => $thumb_url, | |
//'title' => $entry_title, | |
'title' => utf8_encode($entry_title), // need to encode or weird characters can cause problems | |
); | |
} else { | |
return false; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment