Skip to content

Instantly share code, notes, and snippets.

@CNG
Last active April 20, 2017 08:14
Show Gist options
  • Save CNG/1019db1a536ad325590f to your computer and use it in GitHub Desktop.
Save CNG/1019db1a536ad325590f to your computer and use it in GitHub Desktop.
Movable Type + Field Day popular stories script
<?php
/*
This script looks up items most viewed according to Google Analytics in a given time span and given certain blog or entry restrictions, returning either a listing for debugging or JSON data for further processing.
The possible parameters are:
* days int Number of days of history to pull results from. Default is 7.
* entry_id int Entry IDs (from Movable Type) of entries that should NOT be included in results, such as to prevent the currently viewed story from showing up in a "Read Next" sidebar. Multiple IDs can be provided by supplying multiple instances of this parameter, i.e. entry_id=300&entry_id=301
* incl_path string To limit the results to certain URL paths, supply one or more of this parameter. For example, to only show entries from the Blog A and Blog B blogs, provide these parameters: incl_path=/blogs/blog-a&incl_path=/blogs/blog-b. Default is /articles and /blogs, excluding blog indexes.
* excl_path string To exclude certain URL paths from the results, supply one or more of this parameter. For example, to show no entries from the Blog A and Blog B blogs, provide these parameters: excl_path=/blogs/blog-a&excl_path=/blogs/blog-b.
* limit int Maximum number of results to return. Default is 60. Maximum without supplying password parameter is 10. If a higher limit is supplied, but no password parameter is given, the limit is reduced to 10.
* password string Must be set to bellyflop to allow for limits higher than 10.
* format string Set to JSON or HTML depending on return format desired.
Example call excluding a certain entry and including only Blog A items, looking at past 7 days of hits: pop.php?days=3&entry_id=101&incl_path=/blogs/blog-b
Must update below:
- PASSWORD
- server.com
- REFRESH_TOKEN
- CLIENT_ID
- CLIENT_SECRET
- 620
*/
/*
* STAGE ONE: PARSE MOVABLE TYPE CONFIGURATION FILE, SET UP DATABASE CONNECTION
*/
// retrieve MT configuration file so we can get the DB connection info
$config = array();
$lines = explode( "\n", file_get_contents('/var/www/cgi-bin/mt/mt-config.cgi') );
foreach( $lines as &$line ){
$line = preg_split('/\s+/', $line, 2);
if( substr( $line[0], 0, 1 ) !== '#' && $line[0] !== '' ){
$config[$line[0]] = $line[1];
}
}
// create database connection using info found in MT config file
$pdo = new PDO('mysql:host='.$config['DBHost'].';dbname='.$config['Database'], $config['DBUser'], $config['DBPassword']);
/*
* STAGE TWO: RETRIEVE POPULAR ENTRIES FROM GOOGLE CORE REPORTING API
*/
// parse query parameters such that multiples can be supplied and recognized
$query = explode('&', $_SERVER['QUERY_STRING']);
$query = array_filter($query);
$params = array();
foreach( $query as $param ){
list($name, $value) = array_pad(explode('=', $param, 2), 2, null);
$params[urldecode($name)][] = urldecode($value);
}
foreach( $params as $key => $value){
$params[$key] = array_unique($value);
}
// ensure params meet expected values and set the variables we will pass to getResults()
if( $params['entry_id'] ){
$entry_ids = $params['entry_id'];
array_walk( $entry_ids, 'intval' );
}
if( $params['excl_path'] ){
$excl_paths = $params['excl_path'];
array_walk( $excl_paths, 'cleanURLSegment' );
}
if( $params['incl_path'] ){
$incl_paths = $params['incl_path'];
array_walk( $incl_paths, 'cleanURLSegment' );
}
if( $params['days'] ) $days = intval( $params['days'][0] );
if( $params['limit'] ){
$limit = intval( $params['limit'][0] );
} else {
$limit = 10;
}
if( $limit > 10 ){
// lower limit to 1000 unless password set correctly
if( ! $params['password'][0] || $params['password'][0] !== 'PASSWORD' ){
$limit = 10;
}
}
// DETERMINE DATE OF LAST ENTRY IN MT, TO AVOID GETTING TOO MUCH CONTENT THAT ISN'T ON DEV
// this would work better if we instead filtered the GA call to look only at entry IDs less than the most recent entry ID in MT
// but we'll just set this to 0 on server.com for now
$days_offset = 0;
$isAppServer = !strcmp($_SERVER['SERVER_NAME'],'server.com');
if( ! $isAppServer ){
$days_offset = getDaysOffset( $pdo, $incl_paths );
}
// perform query
$results = getResults($entry_ids, $excl_paths, $incl_paths, $days_offset, $days, $limit);
/*
* STAGE THREE: COMPILE OUTPUT WITH ADDITIONAL INFO FROM MT DATABASE ABOUT THOSE ENTRIES
*/
$output = array();
$counter = 0;
foreach($results as $result) {
if( count( $output ) >= $limit ){ break; }
list( $pageTitle, $pagePath, $entry_id, $pageviews ) = $result;
$mt_lookup = getInfo( $pdo, $entry_id );
if( $mt_lookup === false ){ continue; }
extract($mt_lookup);
$output[] = array(
'path' => $pagePath,
'title' => $title,
'id' => $entry_id,
'views' => $pageviews,
'img' => $img,
);
}
/*
* STAGE FOUR: OUTPUT
*/
$format = isset( $params['format'] ) ? $params['format'][0] : 'HTML';
// $output[5]['title'] = substr($output[5]['title'], 0, -1);
switch( $format ){
case 'JSON':
echo json_encode($output);
break;
case 'HTML':
default:
//echo mb_detect_encoding($output[5]['title'], 'UTF-8');
//echo var_dump($output);
foreach( $output as $result ){
extract($result);
echo "<p><a href='$path'>$title</a> (#$id, $views views)</p><p><img src='$img'></p>";
}
break;
}
/*
* FUNCTIONS
*/
function cleanURLSegment( &$item, $key ){
$item = preg_replace( '/[^-_\/a-z0-9]*/i', '', $item );
}
function getResults($entry_ids = false, $excl_paths = false, $incl_paths = false, $days_offset = 0, $days = 7, $limit = 60){
$data = array(
'refresh_token' => 'REFRESH_TOKEN',
'client_id' => 'CLIENT_ID',
'client_secret' => 'CLIENT_SECRET',
'grant_type' => 'refresh_token'
);
// Retrieve access token using stored refresh token
$ch = curl_init();
curl_setopt( $ch, CURLOPT_URL, 'https://accounts.google.com/o/oauth2/token' );
curl_setopt( $ch, CURLOPT_POST, 1 );
curl_setopt( $ch, CURLOPT_POSTFIELDS, http_build_query( $data ) );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 );
$data = curl_exec( $ch );
curl_close( $ch );
$results = json_decode( $data, true );
if( ! isset( $results['access_token'] ) ){
return false;
}
// build filters first since they are more complex
{
// hold excluded entries
$entry_excl = '';
if( ! empty( $entry_ids ) ){
if( is_array( $entry_ids ) ) {
$entry_ids = implode( '|', $entry_ids );
}
$entry_excl = "ga:customVarValue5!~^($entry_ids)$;";
}
// hold excluded blog addresses
$blog_excl = '';
if( ! empty( $excl_paths ) ){
if( ! is_array( $excl_paths ) ){
$excl_paths = array( $excl_paths );
}
foreach( $excl_paths as $path ){
if( strlen( $path ) ){ // ignore empty strings
$blog_excl .= "ga:pagePath!@$path;";
}
}
}
$filters_or = array(); // hold the OR statements, which take precedance over AND
// hold included blog addresses
if( ! empty( $incl_paths ) ){
if( ! is_array( $incl_paths ) ){
$incl_paths = array( $incl_paths );
}
foreach( $incl_paths as $path ){
if( strlen( $path ) ){ // ignore empty strings
$filters_or[] = "ga:pagePath=@$path";
}
}
}
if( ! count( $filters_or ) ){ // in case previous block didn't yield anything due to empty strings
// show articles
$filters_or[] = 'ga:pagePath=@/articles';
// show blog entries
$filters_or[] = 'ga:pagePath=@/blogs';
}
$blog_incl = 'ga:pagePath!~^/blogs.*index\.(html|php)$;' . implode( ',', $filters_or );
}
// prevent errors later
date_default_timezone_set('America/New_York');
$data = array(
'key' => 'AIzaSyDW5RwT0eTOkHYSoBEx6rrLaQw2enQ11CI',
'ids' => 'ga:3360489',
'dimensions' => 'ga:pageTitle,ga:pagePath,ga:customVarValue5', // simply including customVarValue5 here precludes results without this set, so we don't need to try to filter for only entries
'metrics' => 'ga:pageviews',
//'filters' => 'ga:customVarName5==Entry ID', // this isn't necessary, but here in case other non-entry pages get a customVar5 in the future used for a different purpose
'filters' => $blog_excl . $entry_excl . $blog_incl, // without this, we get packages and maybe other things that are individual entries and hence have "Entry ID" custom var set
'sort' => '-ga:pageviews',
'start-date' => date( 'Y-m-d', strtotime( '-' . ( $days + $days_offset ) . ' days' ) ),
'end-date' => date( 'Y-m-d', strtotime( '-' . ( $days_offset ) . ' days' ) ),
'max-results' => ceil( $limit * 1.5 + 5 ),
);
$url = 'https://www.googleapis.com/analytics/v3/data/ga?' . http_build_query( $data );
$ch = curl_init();
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_HTTPHEADER, array( 'Authorization: Bearer ' . $results['access_token'] ) );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 5 );
$data = curl_exec( $ch );
curl_close( $ch );
file_put_contents( $json_file, $data );
$results = json_decode( $data, true );
if( ! isset( $results['rows'] ) ){
return false;
}
$deduped = array();
foreach( $results['rows'] as $row ){
if( ! isset( $deduped[ $row[2] ] ) ){
$deduped[ $row[2] ] = $row;
}
}
return $deduped;
}
/**
* getDaysOffset returns the days since date of last published entry matching the paths supplied
*
* @access public
* @param mixed $pdo PDO object for MT database
* @param array $incl_paths
* @return int Number of days ago last entry was
*/
function getDaysOffset( $pdo, $incl_paths = array() ){
$where_paths = '';
if( isset( $incl_paths ) && count( $incl_paths ) > 0 ){
$where_paths .= 'AND ( ';
$index = 0;
foreach( $incl_paths as $incl_path ){
$index++;
if( $index > 1 ){
$where_paths .= ' OR ';
}
$where_paths .= "blog_site_path LIKE '%" . rtrim( $incl_path, '/' ) . "%'"; // not sure why LIKE doesn't work with the trailing slash
}
$where_paths .= ')';
}
$statement = $pdo->query("
SELECT entry_authored_on
FROM mt_entry
JOIN mt_blog ON entry_blog_id = blog_id
WHERE entry_status = 2 $where_paths
ORDER BY entry_authored_on DESC
LIMIT 1
");
$row = $statement->fetch(PDO::FETCH_ASSOC);
// continue unless no results
if( $row === false ){
return 0;
}
extract($row);
$last_entry = new DateTime( $entry_authored_on );
$now = new DateTime( "now" );
$diff = $now->diff( $last_entry )->format("%a");
if( $diff < 0 ){ $diff = 0; }
return $diff;
}
/**
* getInfo returns the thumbnail and title for a given entry.
*
* @access public
* @param mixed $pdo PDO object for MT database
* @param mixed $entry_id entry_id for entry to retrieve
* @return array Entry info as an array with keys 'img' and 'title', or false if entry ID invalid or image not found.
*/
function getInfo( $pdo, $entry_id ){
// WHERE ( f.fdvalue_key = 'blog_image_asset' OR f.fdvalue_key = 'article_image_asset' )
$statement = $pdo->query("
SELECT asset_file_path,asset_file_name,asset_created_on,asset_id,
entry_title,
bc.blog_site_url AS blog_site_url,bc.blog_site_path AS blog_site_path,
bp.blog_site_path AS website_site_path,bp.blog_site_url AS website_site_url,bp.blog_id AS website_blog_id
FROM mt_asset as a
JOIN mt_fdvalue as f ON a.asset_id = f.fdvalue_value
JOIN mt_entry as e ON e.entry_id = f.fdvalue_object_id
JOIN mt_blog as bc ON bc.blog_id = e.entry_blog_id
JOIN mt_blog as bp ON bp.blog_id = bc.blog_parent_id
WHERE f.fdvalue_key = 'blog_image_asset'
AND f.fdvalue_object_id = $entry_id
AND bp.blog_id = 91
LIMIT 1
");
$row = $statement->fetch(PDO::FETCH_ASSOC);
// continue unless no results
if( $row === false ){
return false;
}
extract($row);
// clean up URLs and paths from MT database
$blog_site_url = explode( '/::/', $blog_site_url ); // first part is subdomain, which we're ignoring here…
$asset_file_path = ltrim( $asset_file_path, '%r/' );
$website_site_url = rtrim( $website_site_url, '/' ) . '/';
$website_site_path = rtrim( $website_site_path, '/' ) . '/';
$blog_site_url = rtrim( $blog_site_url[1], '/' ) . '/';
$blog_site_path = rtrim( $blog_site_path, '/' ) . '/';
// save normalized URL and path
$site_url = $website_site_url . $blog_site_url; // blog_site_url always segment that must be added to website_site_url
$site_path = ( $blog_site_path === '/' ) ? $website_site_path : $blog_site_path; // blog_site_path seems complete if set
// save main image URL and path
$thumb_url = $site_url . $asset_file_path;
$thumb_path = $site_path . $asset_file_path;
// if image too wide, check for smaller autogenerated thumbnail
$img_info = getimagesize( $thumb_path );
if( $img_info[0] > 620 ){
$Y = substr( $asset_created_on, 0, 4 );
$M = substr( $asset_created_on, 5, 2 );
$D = substr( $asset_created_on, 8, 2 );
$ext = strtolower( pathinfo( $asset_file_name, PATHINFO_EXTENSION ) );
$name = pathinfo( $asset_file_name, PATHINFO_FILENAME );
// commented section was to deal with Blog C, but that uses
// multiple thumbnail sizes, so decided to forgo this for now
//if( $website_blog_id == 92 ){
// $thumb_width = 1200;
//} else {
$thumb_width = 620;
//}
// save thumbnail image URL and path instead of main image, which was too wide
$thumb_url = $site_url . "assets_c/$Y/$M/$name-thumb-{$thumb_width}xauto-$asset_id.$ext";
$thumb_path = $site_path . "assets_c/$Y/$M/$name-thumb-{$thumb_width}xauto-$asset_id.$ext";
}
// verify whatever image we settled on actually exists and return the info
if( file_exists( $thumb_path ) ){
return array(
'img' => $thumb_url,
//'title' => $entry_title,
'title' => utf8_encode($entry_title), // need to encode or weird characters can cause problems
);
} else {
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment