Skip to content

Instantly share code, notes, and snippets.

@ghankerson
Created October 30, 2014 18:31
Show Gist options
  • Select an option

  • Save ghankerson/b0d22abc1f500b15ad6c to your computer and use it in GitHub Desktop.

Select an option

Save ghankerson/b0d22abc1f500b15ad6c to your computer and use it in GitHub Desktop.
<?php
require 'vendor/autoload.php';
$client = new Elasticsearch\Client();
function feedQuery($count = false, $start = 0, $max = 100) {
$q = new \EntityFieldQuery();
$q->entityCondition('entity_type', 'node')
->entityCondition('bundle', 'story')
->propertyCondition('status', 1)
->fieldOrderBy('field_date', 'value', 'DESC')
->count();
if($count) {
$result_count = $q->execute();
return intval($result_count);
}
else {
$countq = clone $q;
$countq->count = FALSE;
$countq->range($start, $max);
$result = $countq->execute();
$nids = array_keys($result['node']);
return $nids;
}
}
/*
* creates feed items based on query results
*/
function createItems($nids) {
$nodes = node_load_multiple($nids);
foreach ($nodes as $node) {
$item = new AtomFeedItem();
$item->set_nid($node);
//$item->set_media_image($node);
//$item->set_slideshow_images($node);
//$item->set_video($node);
$item->set_enclosure($node);
$item->set_unit('title', $node->title);
$item->set_description($node);
$item->set_summary($node);
$item->set_authors($node);
$item->set_show($node);
$item->set_collections($node);
$item->set_tags($node);
$item->set_unit('link', url('node/' . $node->nid, array('absolute' => TRUE)));
$field_date = field_get_items('node', $node, 'field_date', $node->language);
$date = new \DateTime($field_date[0]['value']);
$updated_date = new \DateTime(date('c', $node->revision_timestamp));
$item->set_unit('pubdate', $date->format("Y-m-d\TH:i:s"));
$item->set_updated($updated_date);
$item->set_authors($node);
$items[] = $item;
}
return $items;
}
class JsonFeedWriter {
public function render(array $items) {
$json = array();
$date = new \DateTime();
foreach($items as $item) {
$audio = $item->get_enclosure();
$json[] = array(
'id' => $item->get_id(),
'title' => $item->get_title(),
'summary' => $item->get_summary(),
'body' => $item->get_description(),
'link' => $item->get_link(),
'pubDate'=> $item->get_pubdate(),
//'freeform_content' => $item->get_video(),
'audio' => $audio,
//'primary_image' => array (
//'large' => $item->get_media_image(),
//'small' => $item->get_small_media_image(),
//'mprnews' => $item->get_mprnews_media_image(),
//),
'show' => $item->get_show(),
//'respond' => $item->get_app_respond(),
//'respond_question' => $item->get_app_respond_question(),
//'respond_type' => $item->get_app_respond_type(),
'topics' => $item->get_tags(),
'collections' => $item->get_collections(),
'authors' => $item->get_authors(),
//'slideshow' => $item->get_slideshow_images(),
);
}
return $json;
}
}
/*
* class to represent an RSS feed item xml node
*/
class RssFeedItem {
protected $title;
protected $summary;
protected $description;
protected $link;
protected $pubdate;
protected $enclosure;
protected $id;
/*
* general setter method
*/
public function set_unit($name, $value) {
$this->$name = $value;
}
public function set_id($nid) {
$this->nid = $nid;
}
public function get_id() {
return $this->nid;
}
public function set_summary($node) {
$field = field_get_items('node', $node, 'field_lede', $node->language);
if (!empty($field[0]['safe_value'])) {
$this->summary = $field[0]['safe_value'];
}
}
public function set_description($node) {
//die(kpr($node));
$field = field_get_items('node', $node, 'field_description', $node->language);
// Get the story description
if (!empty($node->field_description['und']['0']['safe_value'])) {
$story_description = $node->field_description['und']['0']['safe_value'];
}
if (!empty($node->field_transcript['und']['0']['safe_value'])) {
$story_transcript = $node->field_transcript['und']['0']['safe_value'];
}
if ($node->field_image) {
$story_image = theme_image_style(array(
'style_name' => 'primary-image-610x340',
'path' => $node->field_image['und']['0']['uri'],
'alt' => $node->field_image['und']['0']['alt'],
'width' => $node->field_image['und']['0']['width'],
'height' => $node->field_image['und']['0']['height'],
'title' => $node->field_image['und']['0']['title'],
'attributes' => array(
'class' => 'image',
),
));
}
$description = array(
'image' => array(
'#markup' => empty($story_image) ? '' : $story_image,
),
'description' => array(
'#prefix' => "<div class='description'>",
'#markup' => empty($story_description) ? '' : $story_description,
'#suffix' => "</div>",
),
);
if (!empty($story_transcript)) {
$description['transcript'] = array(
'#prefix' => "<div class='transcript'>",
'#markup' => empty($node->field_transcript['und']['0']['safe_value']) ? '' : $node->field_transcript['und']['0']['safe_value'],
'#suffix' => "</div>",
);
}
$this->description = drupal_render($description);
}
public function set_enclosure($node) {
$field = field_get_items('node', $node, 'field_file_podcast', $node->language);
if (!empty($field[0]['uri'])) {
$url = $field[0]['uri'];
$this->enclosure = array(
'url' => $url,
'nid' => $node->nid,
'fid' => $field[0]['fid'],
'type' => file_get_mimetype($url),
);
//'type' => file_get_mimetype($url),
// 'length' => $this->get_file_length($url) comment out as it takes to long when mp3 file does not exist
//die(var_dump($this->enclosure));
}
}
/*
* function curl request to get lenght of remote mp3 file
* param url a url string
* return content lenght of remote file in bytes
*/
public function get_file_length($url) {
$remote = 0;
try {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, TRUE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($ch, CURLOPT_NOBODY, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 2);
curl_setopt($ch, CURLOPT_TIMEOUT, 3);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$curl = curl_exec($ch);
} catch (\Exception $e) {
echo "Caught exception " . $e->get_message();
}
$response = curl_getinfo($ch);
if (!empty($response['download_content_length'])) {
$remote = intval($response['download_content_length']);
}
return $remote;
}
public function get_title() {
return htmlspecialchars($this->title);
}
public function get_description() {
return $this->description;
}
public function get_summary() {
return $this->summary;
}
public function get_link() {
return $this->link;
}
public function get_pubdate() {
return $this->pubdate;
}
public function get_enclosure() {
return $this->enclosure;
}
}
class AtomFeedItem extends RssFeedItem {
protected $updated;
protected $transcript;
protected $tags = array();
protected $collections = array();
protected $nid;
protected $show;
protected $authors = array();
protected $app_respond;
protected $respond_question;
protected $respond_type;
protected $syndication = array();
function set_updated(\DateTime $date) {
$this->updated = $date;
}
function set_nid($node) {
$this->nid = $node->nid;
}
function get_nid() {
return $this->nid;
}
function get_updated() {
return $this->updated->format(DATE_ATOM);
}
function set_transcript($node) {
if (!empty($node->field_transcript[LANGUAGE_NONE])) {
$this->transcript = $node->field_transcript[LANGUAGE_NONE]['0']['safe_value'];
}
}
function get_transcript() {
return $this->transcript;
}
function set_tags($node) {
if (isset($node->field_ref_topic[LANGUAGE_NONE])) {
foreach ($node->field_ref_topic[LANGUAGE_NONE]as $nid) {
$node = node_load($nid['nid']);
$this->tags[] = htmlspecialchars($node->title);
}
}
}
function get_tags() {
return $this->tags;
}
function set_collections($node) {
if (!empty($node->field_ref_collection[LANGUAGE_NONE][0])) {
foreach ($node->field_ref_collection[LANGUAGE_NONE] as $collection) {
$node = node_load($collection['nid']);
$this->collections[] = $node->title;
}
}
}
function get_collections() {
return $this->collections;
}
function set_authors($node) {
$nids = array();
$authors = array();
if (!empty($node->field_ref_bio_multi[LANGUAGE_NONE])) {
foreach ($node->field_ref_bio_multi[LANGUAGE_NONE] as $nid) {
$nids[] = $nid['nid'];
}
$author_nodes = node_load_multiple($nids);
foreach ($author_nodes as $author_node) {
$authors[] = $author_node->title;
}
$str_authors = \implode(",", $authors);
$this->authors = $authors;
}
}
function get_authors() {
return $this->authors;
}
function get_authors_forxmlnode() {
return $this->authors;
}
function set_show($node) {
$field = field_get_items('node', $node, 'field_ref_episode', $node->language);
if (!empty($field[0]['nid'])) {
$episode_id = $field[0]['nid'];
$node = node_load($episode_id);
$this->show = $node->title;
}
}
function get_show() {
if (!empty($this->show)) {
return htmlspecialchars($this->show);
}
}
}
$count = feedQuery(true);
$start = 0;
$max = 1000;
$indexParams['index'] = 'stories';
try {
//$client->indices()->delete($indexParams);
$client->indices()->create($indexParams);
}
catch(Exception $e) {}
while ($start < $count) {
$running = $start + $max;
print "Indexing items $start - $running of $count\n";
$nids = feedQuery(false, $start, $max);
$nodes = createItems($nids);
$writer = new JsonFeedWriter();
$items = $writer->render($nodes);
foreach($items as $item) {
//var_dump($item['id']);
$params = array(
'index' => $indexParams['index'],
'type' => 'story',
'body' => json_encode($item),
'id' => $item['id'],
);
$client->index($params);
}
$start += $max;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment