Created
October 30, 2014 18:31
-
-
Save ghankerson/b0d22abc1f500b15ad6c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| require 'vendor/autoload.php'; | |
| $client = new Elasticsearch\Client(); | |
| function feedQuery($count = false, $start = 0, $max = 100) { | |
| $q = new \EntityFieldQuery(); | |
| $q->entityCondition('entity_type', 'node') | |
| ->entityCondition('bundle', 'story') | |
| ->propertyCondition('status', 1) | |
| ->fieldOrderBy('field_date', 'value', 'DESC') | |
| ->count(); | |
| if($count) { | |
| $result_count = $q->execute(); | |
| return intval($result_count); | |
| } | |
| else { | |
| $countq = clone $q; | |
| $countq->count = FALSE; | |
| $countq->range($start, $max); | |
| $result = $countq->execute(); | |
| $nids = array_keys($result['node']); | |
| return $nids; | |
| } | |
| } | |
| /* | |
| * creates feed items based on query results | |
| */ | |
| function createItems($nids) { | |
| $nodes = node_load_multiple($nids); | |
| foreach ($nodes as $node) { | |
| $item = new AtomFeedItem(); | |
| $item->set_nid($node); | |
| //$item->set_media_image($node); | |
| //$item->set_slideshow_images($node); | |
| //$item->set_video($node); | |
| $item->set_enclosure($node); | |
| $item->set_unit('title', $node->title); | |
| $item->set_description($node); | |
| $item->set_summary($node); | |
| $item->set_authors($node); | |
| $item->set_show($node); | |
| $item->set_collections($node); | |
| $item->set_tags($node); | |
| $item->set_unit('link', url('node/' . $node->nid, array('absolute' => TRUE))); | |
| $field_date = field_get_items('node', $node, 'field_date', $node->language); | |
| $date = new \DateTime($field_date[0]['value']); | |
| $updated_date = new \DateTime(date('c', $node->revision_timestamp)); | |
| $item->set_unit('pubdate', $date->format("Y-m-d\TH:i:s")); | |
| $item->set_updated($updated_date); | |
| $item->set_authors($node); | |
| $items[] = $item; | |
| } | |
| return $items; | |
| } | |
| class JsonFeedWriter { | |
| public function render(array $items) { | |
| $json = array(); | |
| $date = new \DateTime(); | |
| foreach($items as $item) { | |
| $audio = $item->get_enclosure(); | |
| $json[] = array( | |
| 'id' => $item->get_id(), | |
| 'title' => $item->get_title(), | |
| 'summary' => $item->get_summary(), | |
| 'body' => $item->get_description(), | |
| 'link' => $item->get_link(), | |
| 'pubDate'=> $item->get_pubdate(), | |
| //'freeform_content' => $item->get_video(), | |
| 'audio' => $audio, | |
| //'primary_image' => array ( | |
| //'large' => $item->get_media_image(), | |
| //'small' => $item->get_small_media_image(), | |
| //'mprnews' => $item->get_mprnews_media_image(), | |
| //), | |
| 'show' => $item->get_show(), | |
| //'respond' => $item->get_app_respond(), | |
| //'respond_question' => $item->get_app_respond_question(), | |
| //'respond_type' => $item->get_app_respond_type(), | |
| 'topics' => $item->get_tags(), | |
| 'collections' => $item->get_collections(), | |
| 'authors' => $item->get_authors(), | |
| //'slideshow' => $item->get_slideshow_images(), | |
| ); | |
| } | |
| return $json; | |
| } | |
| } | |
| /* | |
| * class to represent an RSS feed item xml node | |
| */ | |
| class RssFeedItem { | |
| protected $title; | |
| protected $summary; | |
| protected $description; | |
| protected $link; | |
| protected $pubdate; | |
| protected $enclosure; | |
| protected $id; | |
| /* | |
| * general setter method | |
| */ | |
| public function set_unit($name, $value) { | |
| $this->$name = $value; | |
| } | |
| public function set_id($nid) { | |
| $this->nid = $nid; | |
| } | |
| public function get_id() { | |
| return $this->nid; | |
| } | |
| public function set_summary($node) { | |
| $field = field_get_items('node', $node, 'field_lede', $node->language); | |
| if (!empty($field[0]['safe_value'])) { | |
| $this->summary = $field[0]['safe_value']; | |
| } | |
| } | |
| public function set_description($node) { | |
| //die(kpr($node)); | |
| $field = field_get_items('node', $node, 'field_description', $node->language); | |
| // Get the story description | |
| if (!empty($node->field_description['und']['0']['safe_value'])) { | |
| $story_description = $node->field_description['und']['0']['safe_value']; | |
| } | |
| if (!empty($node->field_transcript['und']['0']['safe_value'])) { | |
| $story_transcript = $node->field_transcript['und']['0']['safe_value']; | |
| } | |
| if ($node->field_image) { | |
| $story_image = theme_image_style(array( | |
| 'style_name' => 'primary-image-610x340', | |
| 'path' => $node->field_image['und']['0']['uri'], | |
| 'alt' => $node->field_image['und']['0']['alt'], | |
| 'width' => $node->field_image['und']['0']['width'], | |
| 'height' => $node->field_image['und']['0']['height'], | |
| 'title' => $node->field_image['und']['0']['title'], | |
| 'attributes' => array( | |
| 'class' => 'image', | |
| ), | |
| )); | |
| } | |
| $description = array( | |
| 'image' => array( | |
| '#markup' => empty($story_image) ? '' : $story_image, | |
| ), | |
| 'description' => array( | |
| '#prefix' => "<div class='description'>", | |
| '#markup' => empty($story_description) ? '' : $story_description, | |
| '#suffix' => "</div>", | |
| ), | |
| ); | |
| if (!empty($story_transcript)) { | |
| $description['transcript'] = array( | |
| '#prefix' => "<div class='transcript'>", | |
| '#markup' => empty($node->field_transcript['und']['0']['safe_value']) ? '' : $node->field_transcript['und']['0']['safe_value'], | |
| '#suffix' => "</div>", | |
| ); | |
| } | |
| $this->description = drupal_render($description); | |
| } | |
| public function set_enclosure($node) { | |
| $field = field_get_items('node', $node, 'field_file_podcast', $node->language); | |
| if (!empty($field[0]['uri'])) { | |
| $url = $field[0]['uri']; | |
| $this->enclosure = array( | |
| 'url' => $url, | |
| 'nid' => $node->nid, | |
| 'fid' => $field[0]['fid'], | |
| 'type' => file_get_mimetype($url), | |
| ); | |
| //'type' => file_get_mimetype($url), | |
| // 'length' => $this->get_file_length($url) comment out as it takes to long when mp3 file does not exist | |
| //die(var_dump($this->enclosure)); | |
| } | |
| } | |
| /* | |
| * function curl request to get lenght of remote mp3 file | |
| * param url a url string | |
| * return content lenght of remote file in bytes | |
| */ | |
| public function get_file_length($url) { | |
| $remote = 0; | |
| try { | |
| $ch = curl_init($url); | |
| curl_setopt($ch, CURLOPT_HEADER, TRUE); | |
| curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); | |
| curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)"); | |
| curl_setopt($ch, CURLOPT_NOBODY, TRUE); | |
| curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 2); | |
| curl_setopt($ch, CURLOPT_TIMEOUT, 3); | |
| curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); | |
| $curl = curl_exec($ch); | |
| } catch (\Exception $e) { | |
| echo "Caught exception " . $e->get_message(); | |
| } | |
| $response = curl_getinfo($ch); | |
| if (!empty($response['download_content_length'])) { | |
| $remote = intval($response['download_content_length']); | |
| } | |
| return $remote; | |
| } | |
| public function get_title() { | |
| return htmlspecialchars($this->title); | |
| } | |
| public function get_description() { | |
| return $this->description; | |
| } | |
| public function get_summary() { | |
| return $this->summary; | |
| } | |
| public function get_link() { | |
| return $this->link; | |
| } | |
| public function get_pubdate() { | |
| return $this->pubdate; | |
| } | |
| public function get_enclosure() { | |
| return $this->enclosure; | |
| } | |
| } | |
| class AtomFeedItem extends RssFeedItem { | |
| protected $updated; | |
| protected $transcript; | |
| protected $tags = array(); | |
| protected $collections = array(); | |
| protected $nid; | |
| protected $show; | |
| protected $authors = array(); | |
| protected $app_respond; | |
| protected $respond_question; | |
| protected $respond_type; | |
| protected $syndication = array(); | |
| function set_updated(\DateTime $date) { | |
| $this->updated = $date; | |
| } | |
| function set_nid($node) { | |
| $this->nid = $node->nid; | |
| } | |
| function get_nid() { | |
| return $this->nid; | |
| } | |
| function get_updated() { | |
| return $this->updated->format(DATE_ATOM); | |
| } | |
| function set_transcript($node) { | |
| if (!empty($node->field_transcript[LANGUAGE_NONE])) { | |
| $this->transcript = $node->field_transcript[LANGUAGE_NONE]['0']['safe_value']; | |
| } | |
| } | |
| function get_transcript() { | |
| return $this->transcript; | |
| } | |
| function set_tags($node) { | |
| if (isset($node->field_ref_topic[LANGUAGE_NONE])) { | |
| foreach ($node->field_ref_topic[LANGUAGE_NONE]as $nid) { | |
| $node = node_load($nid['nid']); | |
| $this->tags[] = htmlspecialchars($node->title); | |
| } | |
| } | |
| } | |
| function get_tags() { | |
| return $this->tags; | |
| } | |
| function set_collections($node) { | |
| if (!empty($node->field_ref_collection[LANGUAGE_NONE][0])) { | |
| foreach ($node->field_ref_collection[LANGUAGE_NONE] as $collection) { | |
| $node = node_load($collection['nid']); | |
| $this->collections[] = $node->title; | |
| } | |
| } | |
| } | |
| function get_collections() { | |
| return $this->collections; | |
| } | |
| function set_authors($node) { | |
| $nids = array(); | |
| $authors = array(); | |
| if (!empty($node->field_ref_bio_multi[LANGUAGE_NONE])) { | |
| foreach ($node->field_ref_bio_multi[LANGUAGE_NONE] as $nid) { | |
| $nids[] = $nid['nid']; | |
| } | |
| $author_nodes = node_load_multiple($nids); | |
| foreach ($author_nodes as $author_node) { | |
| $authors[] = $author_node->title; | |
| } | |
| $str_authors = \implode(",", $authors); | |
| $this->authors = $authors; | |
| } | |
| } | |
| function get_authors() { | |
| return $this->authors; | |
| } | |
| function get_authors_forxmlnode() { | |
| return $this->authors; | |
| } | |
| function set_show($node) { | |
| $field = field_get_items('node', $node, 'field_ref_episode', $node->language); | |
| if (!empty($field[0]['nid'])) { | |
| $episode_id = $field[0]['nid']; | |
| $node = node_load($episode_id); | |
| $this->show = $node->title; | |
| } | |
| } | |
| function get_show() { | |
| if (!empty($this->show)) { | |
| return htmlspecialchars($this->show); | |
| } | |
| } | |
| } | |
| $count = feedQuery(true); | |
| $start = 0; | |
| $max = 1000; | |
| $indexParams['index'] = 'stories'; | |
| try { | |
| //$client->indices()->delete($indexParams); | |
| $client->indices()->create($indexParams); | |
| } | |
| catch(Exception $e) {} | |
| while ($start < $count) { | |
| $running = $start + $max; | |
| print "Indexing items $start - $running of $count\n"; | |
| $nids = feedQuery(false, $start, $max); | |
| $nodes = createItems($nids); | |
| $writer = new JsonFeedWriter(); | |
| $items = $writer->render($nodes); | |
| foreach($items as $item) { | |
| //var_dump($item['id']); | |
| $params = array( | |
| 'index' => $indexParams['index'], | |
| 'type' => 'story', | |
| 'body' => json_encode($item), | |
| 'id' => $item['id'], | |
| ); | |
| $client->index($params); | |
| } | |
| $start += $max; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment