Created
February 3, 2017 15:10
-
-
Save langemike/bf136171b49e8db787b7e5780531a3cb to your computer and use it in GitHub Desktop.
Simple YouTube API Crawler class for Wordpress
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use Illuminate\Support\Arr; | |
use Illuminate\Support\Str; | |
use Illuminate\Database\Capsule\Manager as Capsule; | |
/** | |
* Simple YouTube Crawler class for Worpress | |
* | |
* @author Mike van Veenhuijzen <[email protected]> | |
* | |
**/ | |
class YoutubeCrawler { | |
const MAX_RESULTS = 50; | |
const VIDEO_STATUS_SUCCESS = 'success'; | |
const VIDEO_STATUS_FAILED = 'failure'; | |
const VIDEO_STATUS_SKIPPED = 'skipped'; | |
/** | |
* YouTube API | |
* @var Google_Service_YouTube $api | |
*/ | |
protected $api = null; | |
/** | |
* Write to log | |
* @var bool $log | |
**/ | |
protected $log = true; | |
/** | |
* YouTube API items criteria | |
* @var $title_match array | |
**/ | |
protected $criteria = array( | |
'playlistItem' => array( | |
'snippet.title' => array( | |
'compare' => 'contains', | |
'value' => array('VR', 'Virtual reality', '360') | |
), | |
'status.embeddable' => array( | |
'compare' => '=', | |
'value' => true | |
), | |
'status.privacyStatus' => array( | |
'compare' => '=', | |
'value' => 'public' | |
) | |
), | |
'video' => array( | |
'statistics.viewCount' => array( | |
'compare' => '>=', | |
'value' => 500 | |
) | |
), | |
'channel' => array( | |
'contentDetails.relatedPlaylists' => array( | |
'compare' => 'has', | |
'value' => array('uploads') | |
) | |
) | |
); | |
/** | |
* Initialize class | |
**/ | |
public function __construct() | |
{ | |
$this->setup(); | |
} | |
/** | |
* Execute crawler functionality | |
* @return array | |
**/ | |
public function run() | |
{ | |
// Get least updated channel | |
$channel = Channel::orderBy('updated_at', 'ASC')->first(); | |
// Fail if none found | |
if ($channel === null) { | |
throw new Exeception('No channel found'); | |
} | |
// Get playlist ID if it's missing | |
if (empty($channel->playlist_id)) { | |
$channel->playlist_id = $this->youtube_playlist_id($channel->url, 'uploads'); | |
$channel->timestamps = false; | |
$channel->save(); | |
} | |
// Fail if still nothing found | |
if (empty($channel->playlist_id)) { | |
throw new Exeception('No playlist found'); | |
} | |
// Get playlist items | |
$videos = $this->youtube_playlist_items(array( | |
'playlistId' => $channel->playlist_id | |
)); | |
// Update video items with supplemented parts | |
if (! empty($videos)) { | |
$videos = $this->supplement($videos, array('statistics')); | |
} | |
// Save videos | |
$statistics = $this->save($videos); | |
// Log statistics | |
$this->log(strtr('Latest API call resulted in %total% videos from which %success% were saved, %skipped% were skipped and %failed% failed.', array( | |
'%total%' => count($videos), | |
'%success%' => count(array_keys($statistics, self::VIDEO_STATUS_SUCCESS, true)), | |
'%failed%' => count(array_keys($statistics, self::VIDEO_STATUS_FAILED, true)), | |
'%skipped%' => count(array_keys($statistics, self::VIDEO_STATUS_SKIPPED, true)), | |
))); | |
// Update channel statistics | |
$channel->timestamps = true; | |
$channel->increment('video_count', count($videos)); // Estimation because we save API returned video count (and not Wordpress posts) | |
$channel->touch(); | |
$channel->save(); | |
return true; | |
} | |
/** | |
* Write log to file | |
* @param mixed $data | |
* @return bool | |
*/ | |
public function log($data) | |
{ | |
if (! $this->log) { | |
return null; | |
} | |
$time = date('Y-m-d H:i:s'); | |
$message = is_string($data) ? $data : print_r($data, true); | |
return file_put_contents(YTC_PLUGIN_DIR . 'log.txt', sprintf('[%s] %s', $time, $message) . PHP_EOL, FILE_APPEND) !== false; | |
} | |
/** | |
* Supplement videos with extra parts | |
* @param array $videos | |
* @param array $parts | |
* @param array | |
**/ | |
protected function supplement($videos, $parts) | |
{ | |
// Remove already present videos form API call | |
$videos_filtered = array_filter($videos, function($video) { | |
return ! $this->video_exists($video); | |
}); | |
// Return original array if supplementing is unneeded | |
if (empty($videos_filtered)) { | |
return $videos; | |
} | |
// Collect video IDs | |
$video_ids = array_map(function($video) { | |
return $video->snippet->resourceId->videoId; | |
}, $videos_filtered); | |
// Get videos with supplemented parts | |
$videos_with_supplemented_data = $this->youtube_videos(array( | |
'part' => implode(',', $parts), | |
'id' => implode(',', $video_ids) | |
)); | |
return array_merge_recursive($videos, $videos_with_supplemented_data); | |
} | |
/** | |
* YouTube videos API call | |
* @link https://developers.google.com/youtube/v3/docs/videos/list | |
* @param array $parameters | |
* @return array Video collection matching search criteria | |
**/ | |
protected function youtube_videos(array $parameters) | |
{ | |
$defaults = array( | |
'maxResults' => self::MAX_RESULTS, | |
'userIp' => $_SERVER['REMOTE_ADDR'] | |
); | |
$parameters = array_merge($defaults, $parameters); | |
$this->log('Fire API call: ' . http_build_query($parameters)); | |
$response = $this->api->videos->listVideos(null, $parameters); | |
return $this->parse_response($response); | |
} | |
/** | |
* YouTube PlaylistItems API call | |
* @link https://developers.google.com/youtube/v3/docs/playlistItems/list | |
* @param array $parameters | |
* @return array Video collection matching search criteria | |
**/ | |
protected function youtube_playlist_items(array $parameters) | |
{ | |
if ( ! isset($parameters['playlistId'])) { | |
throw new Exeception('Required playlistId parameter is missing'); | |
} | |
$parts = array( | |
'id', | |
'snippet', | |
'status' | |
); | |
$defaults = array( | |
'part' => implode(',', $parts), | |
'maxResults' => self::MAX_RESULTS, | |
'userIp' => $_SERVER['REMOTE_ADDR'] | |
); | |
$parameters = array_merge($defaults, $parameters); | |
$this->log('Fire API call: ' . http_build_query($parameters)); | |
$response = $this->api->playlistItems->listPlaylistItems(null, $parameters); | |
return $this->parse_response($response); | |
} | |
/** | |
* YouTube Search API call | |
* @link https://developers.google.com/youtube/v3/docs/search/list | |
* @param array $parameters | |
* @return array Video collection matching search criteria | |
**/ | |
protected function youtube_search(array $parameters) | |
{ | |
if ( ! isset($parameters['channelId'])) { | |
//throw new Exeception('Required channelId parameter is missing'); | |
} | |
$parts = array( | |
'id', | |
'snippet', | |
); | |
$defaults = array( | |
'part' => implode(',', $parts), | |
'maxResults' => self::MAX_RESULTS, | |
'safeSearch' => 'none', | |
'order' => 'date', | |
'type' => 'video', | |
'videoEmbeddable' => 'true', | |
'videoSyndicated' => 'true', | |
'userIp' => $_SERVER['REMOTE_ADDR'] | |
); | |
$parameters = array_merge($defaults, $parameters); | |
$this->log('Fire API call: ' . http_build_query($parameters)); | |
$response = $this->api->search->listSearch(null, $parameters); | |
return $this->parse_response($response); | |
} | |
/** | |
* Get Youtube channel ID fomr URL | |
* @param string $url | |
* @param string $playlist uploads, favorites, likes etc. | |
* @return string|null | |
**/ | |
public function youtube_playlist_id($url, $playlist) | |
{ | |
$parameters = array(); | |
//@todo use regex | |
if (strpos($url, 'youtube.com/user/') !== false) { | |
$username = explode('youtube.com/user/', $url)[1]; | |
$parameters['forUsername'] = trim($username); | |
$this->log("Try to get playlist_id for {$username} with playlist {$playlist}"); | |
} | |
if (empty($parameters)) { | |
return null; | |
} | |
$response = $this->api->channels->listChannels('contentDetails', $parameters); | |
$items = $this->parse_response($response); | |
return Arr::get($items, '0.contentDetails.relatedPlaylists.' . $playlist); | |
} | |
/** | |
* Parse Youtube API response | |
* @param Google_Collection $response | |
* @return array | |
**/ | |
protected function parse_response(Google_Collection $response) | |
{ | |
//@todo capture errors defined in https://developers.google.com/youtube/v3/docs/errors | |
$items = array(); | |
foreach ($response->items as $item) { | |
$kind = str_replace('youtube#', '', $item->getKind()); | |
$model = (array) $item->toSimpleObject(); | |
$tests = array(); | |
$results = array(); | |
$passes = 0; | |
// Override tests | |
if (isset($this->criteria[$kind])) { | |
$tests = $this->criteria[$kind]; | |
} | |
// When no criteria if found for resource. It passes successfully | |
if (empty($tests)) { | |
$results[] = "{$key} PASSED the test because {$kind} doesn't have any criteria"; | |
} | |
// Loop through required criteria and count passes | |
foreach ($tests as $key => $condition) { | |
$value = Arr::get($model, $key); | |
if (is_null($value)) { | |
$results[] = "{$key} PASSED the '{$condition['compare']}' test because a value was missing or incomplete"; | |
$passes++; | |
} else if($this->test($condition, $value, $item)) { | |
$results[] = "{$key} PASSED the '{$condition['compare']}' test with value --> {$value}"; | |
$passes++; | |
} else { | |
$results[] = "{$key} FAILED the '{$condition['compare']}' test with value --> {$value}"; | |
} | |
} | |
// Append to array if all passed successfully | |
if ($passes === count($tests)) { | |
$items[] = $item; | |
$this->log('A MATCH is found with the following passes ' . print_r($results, true)); | |
} | |
} | |
return $items; | |
} | |
/** | |
* Execute critrium on $value | |
* @param array $condition | |
* @param mixed $value | |
* @param Google_Model $model | |
* @return bool | |
**/ | |
protected function test($condition, $value, Google_Model $model = null) | |
{ | |
if (! isset($condition['compare'])) { | |
throw new Exeception('Condition compare value is required'); | |
} | |
if (! isset($condition['value'])) { | |
throw new Exeception('Condition value is required for comparison'); | |
} | |
// Detection settings | |
$strict = isset($condition['strict']) ? $condition['strict'] : false; | |
$inverse = isset($condition['inverse']) ? $condition['inverse'] : false; | |
// Execute test | |
switch(strtolower($condition['compare'])) { | |
case '>=' : | |
$result = $value >= $condition['value']; | |
break; | |
case '>' : | |
$result = $value > $condition['value']; | |
break; | |
case '<=' : | |
$result = $value <= $condition['value']; | |
break; | |
case '<' : | |
$result = $value < $condition['value']; | |
break; | |
case '<>' : | |
case '!=' : | |
$result = $strict ? ($value !== $condition['value']) : ($value != $condition['value']); | |
break; | |
case '=' : | |
case '==' : | |
$result = $strict ? ($value === $condition['value']) : ($value == $condition['value']); | |
break; | |
case 'between' : | |
list($minimum, $maximum) = $condition['value']; | |
$result = ($value >= $minimum && $value <= $maximum); | |
break; | |
case 'in' : | |
$result = in_array($value, $condition['value'], $strict); | |
break; | |
case 'has' : | |
$result = Arr::has($value, $condition['value']); | |
break; | |
case 'startswith' : | |
$result = Str::startsWith($value, $condition['value']); | |
break; | |
case 'endswith' : | |
$result = Str::endsWith($value, $condition['value']); | |
break; | |
case 'is' : | |
case 'regex' : | |
$result = Str::is($condition['value'], $value); | |
break; | |
case 'contains' : | |
if (!$strict) { | |
// Case-insensitive string matching | |
$value = strtolower($value); | |
$values = array_map('strtolower', (array) $condition['value']); | |
$condition['value'] = is_string($condition['value']) ? $values[0] : $values; | |
} | |
if (empty($condition['value'])) { | |
$result = true; | |
break; | |
} | |
$result = Str::contains($value, $condition['value']); | |
break; | |
default: | |
throw new Exception('Unknown condition compare value'); | |
} | |
return $inverse ? !$result : $result; | |
} | |
/** | |
* Save video as Wordpress post | |
* @param array|object $video | |
* @return string | |
**/ | |
public function save($video) | |
{ | |
// Walk through collection if it's an array | |
if (is_array($video)) { | |
return array_map(array($this, 'save'), $video); | |
} | |
// Skip already existing videos | |
if ($this->video_exists($video)) { | |
return self::VIDEO_STATUS_SKIPPED; | |
} | |
// Define post and meta data | |
$post = array(); | |
$meta = array(); | |
// Default post data | |
$post['post_title'] = $video->id; | |
$post['post_type'] = 'post'; | |
$post['post_status'] = 'draft'; | |
// Default meta data | |
$meta['ytc_video_id'] = $video->id; | |
// Extend with video snippet data | |
if (isset($video->snippet)) { | |
$post['post_title'] = $video->snippet->title; | |
$post['post_name'] = sanitize_title($video->snippet->title); | |
$post['post_content'] = $video->snippet->description; | |
$post['post_content'] .= "\n\nhttps://www.youtube.com/watch?v=" . $video->snippet->resourceId->videoId; // Append YouTube video URL | |
//$post['post_date'] => $video->snippet->publishedAt; | |
$meta['ytc_video_id'] = $video->snippet->resourceId->videoId; | |
$meta['ytc_published_at'] = $video->snippet->publishedAt; | |
$meta['ytc_channel_id'] = $video->snippet->channelId; | |
$meta['ytc_channel_title'] = $video->snippet->channelTitle; | |
} | |
// Extend with video statistics data | |
if (isset($video->statistics)) { | |
$meta['ytc_view_count'] = $video->statistics->viewCount; | |
$meta['ytc_like_count'] = $video->statistics->likeCount; | |
} | |
// Save post | |
$post_id = wp_insert_post($post); | |
// Failed because of unknown reason | |
if (empty($post_id)) { | |
return self::VIDEO_STATUS_FAILED; | |
} | |
// Save meta data | |
foreach ($meta as $key => $value) { | |
if (! is_null($value)) { | |
update_post_meta($post_id, $key, $value); | |
} | |
} | |
return self::VIDEO_STATUS_SUCCESS; | |
} | |
/** | |
* Check if video already exists in database | |
* @param Google_Model $video | |
* @return bool | |
**/ | |
public function video_exists(Google_Model $video) | |
{ | |
// Or check video ID presence in post description?? | |
//$post_query = Capsule::table('posts')->where('post_content', 'like', '%' . $video->id . '%'); | |
$postmeta_query = Capsule::table('postmeta')->where(array( | |
'meta_key' => 'ytc_video_id', | |
'meta_value' => $video->id | |
)); | |
if (isset($video->snippet)) { | |
$postmeta_query->orWhere(array( | |
'meta_key' => 'ytc_video_id', | |
'meta_value' => $video->snippet->resourceId->videoId | |
)); | |
} | |
return $postmeta_query->exists(); | |
} | |
/** | |
* Setup requirements such as database and API connection | |
* @global $wpdb | |
* @return void | |
*/ | |
public function setup() | |
{ | |
global $wpdb; | |
// Database settings | |
$db = array( | |
'driver' => 'mysql', | |
'host' => DB_HOST, | |
'database' => DB_NAME, | |
'username' => DB_USER, | |
'password' => DB_PASSWORD, | |
'charset' => 'utf8', | |
'collation' => 'utf8_general_ci', | |
'prefix' => $wpdb->prefix, | |
); | |
// Fix for port within hostname | |
if (strpos($db['host'], ':') !== false) { | |
list($host, $port) = explode(':', $db['host']); | |
$db['host'] = $host; | |
$db['port'] = $port; | |
} | |
// Set timezone same as Wordpress | |
date_default_timezone_set(get_option('timezone_string', 'UTC')); | |
// Connect to database | |
$capsule = new Capsule; | |
$capsule->addConnection($db); | |
$capsule->setAsGlobal(); | |
$capsule->bootEloquent(); | |
// Setup YouTube API | |
if (! defined('YOUTUBE_API_KEY')) { | |
throw new Exception('YOUTUBE_API_KEY should be defined within wp-config.php or somewhere else'); | |
} | |
$client = new Google_Client(); | |
$client->setDeveloperKey(YOUTUBE_API_KEY); | |
// Circumvent SSL errors (non-secure but works) | |
if (empty($_SERVER['HTTPS']) || $_SERVER['HTTPS'] === 'off') { | |
$httpClient = new GuzzleHttp\Client([ | |
'verify' => false, // otherwise HTTPS requests will fail. | |
]); | |
$client->setHttpClient($httpClient); | |
} | |
$this->api = new Google_Service_YouTube($client); | |
} | |
/** | |
* Plugin activation procedure | |
* @return void | |
**/ | |
public static function plugin_activation() | |
{ | |
Channel::initialize(); | |
} | |
/** | |
* Plugin deactivation procedure | |
* @return void | |
**/ | |
public static function plugin_deactivation() | |
{ | |
//nothing needed here. | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I just pasted this here to hopefully inspire others to continue this work :)
This class has more dependencies then are pasted in this gist. If somebody is interested in it, please let me know.