Last active
May 22, 2023 16:28
-
-
Save joshpangell/ea9a28035b75497de902f218065e38ce to your computer and use it in GitHub Desktop.
Query the Instagram API by username or tag. If we can't find the ID, scrape the HTML page for it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Instagram feed getter | |
* This will query the Instagram API and return a list of images | |
*/ | |
class InstagramFeed { | |
public $result; | |
public static $display_size = 'thumbnail'; // you can choose between "low_resolution", "thumbnail" and "standard_resolution" | |
public static $user_id; | |
public $feed; | |
private $offset_media_id; | |
private $count; | |
private $next_max_id; | |
private $client_id = "XXXXXX"; | |
private $client_secret = "XXXXXX"; | |
function __construct($count, $offset) { | |
$this->offset_media_id = $offset; | |
$this->count = $count; | |
} | |
/** | |
* Fetch the Instagram feed from API | |
* @param String $url URL to be fetching | |
* @return JSON JSON result from Instagram | |
*/ | |
public static function fetch($url){ | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_URL, $url); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 20); | |
$result = curl_exec($ch); | |
curl_close($ch); | |
return $result; | |
} | |
/** | |
* Get a feed from an instagram username | |
* Will attempt to get the id based on API search, | |
* but that does not always return the correct result because | |
* the API uses fuzzy search. In that case, scrape the actual | |
* page for the data, but only as a fallback | |
* | |
* @param String $user_name Instagram username | |
* @return Array | |
*/ | |
public function getFeedByUserName($user_name) { | |
// Try getting the ID the legit way | |
$user_id = $this->getUserID($user_name); | |
// If it is null, then try scraping | |
if(is_null($user_id)) { | |
$user_id = $this->scrapeForID($user_name); | |
} | |
// Now get the feed | |
$this->feed = $this->getFeedByID($user_id); | |
return $this->feed; | |
} | |
/** | |
* Return a feed based on tag name | |
* @param String $tag_name The tag | |
* @return JSON | |
*/ | |
public function getFeedByTagName($tag_name) { | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 20); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 20); | |
if($this->offset_media_id == 0) { | |
curl_setopt($ch, CURLOPT_URL, "https://api.instagram.com/v1/tags/$tag_name/media/recent/?count=$this->count&client_id=".$this->client_id); | |
} | |
else { | |
curl_setopt($ch, CURLOPT_URL, "https://api.instagram.com/v1/tags/$tag_name/media/recent/?count=$this->count&max_tag_id=$this->offset_media_id&client_id=".$this->client_id); | |
} | |
$posts = curl_exec($ch); | |
curl_close($ch); | |
$posts = preg_replace_callback('/\\\\u([0-9a-fA-F]{4})/', function ($match) { | |
return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE'); | |
}, $posts); | |
$posts = json_decode(html_entity_decode(utf8_encode($posts))); | |
// There was an error, return the error message | |
if($posts->meta && $posts->meta->error_type) { | |
return $posts->meta->error_message; | |
} | |
$this->next_max_id = $posts->pagination->next_max_id; | |
$posts = $posts->data; | |
$posts = $this->getStandardizedData($posts); | |
return $posts; | |
} | |
/** | |
* Get a user ID from username via API | |
* @param String $username username to search | |
* @return Int Instagram userid | |
*/ | |
private function getUserID($username) { | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 20); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 20); | |
curl_setopt($ch, CURLOPT_URL, "https://api.instagram.com/v1/users/search?q=$username&client_id=".$this->client_id); | |
$returned_users = curl_exec($ch); | |
curl_close($ch); | |
// Loop through the results and find the exact match | |
foreach (json_decode( $returned_users )->data as $row) { | |
if($row->username == $username) { | |
$user_id = $row->id; | |
continue; | |
} | |
} | |
return $user_id; | |
} | |
/** | |
* Scrape the contents of the actual Instagram HTML | |
* Use this as a fallback, because it may change and break | |
* Pulled from https://gist.github.com/cosmocatalano/4544576 | |
* | |
* @param String $username Instagram username | |
* @return Int Instagram userid | |
*/ | |
private function scrapeForID($username) { | |
$insta_source = file_get_contents('http://instagram.com/'.$username); | |
$shards = explode('window._sharedData = ', $insta_source); | |
$insta_json = explode(';</script>', $shards[1]); | |
$insta_array = json_decode($insta_json[0], TRUE); | |
return $insta_array['entry_data']['ProfilePage'][0]['user']['id']; | |
} | |
/** | |
* Query the Instagram API for a user's feed | |
* @param Int $user_id Instagram userid | |
* @return JSON | |
*/ | |
private function getFeedByID($user_id) { | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 20); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 20); | |
if($this->offset_media_id == 0) { | |
curl_setopt($ch, CURLOPT_URL, "https://api.instagram.com/v1/users/$user_id/media/recent/?count=$this->count&client_id=".$this->client_id); | |
} | |
else { | |
curl_setopt($ch, CURLOPT_URL, "https://api.instagram.com/v1/users/$user_id/media/recent/?count=$this->count&max_id=$this->offset_media_id&client_id=".$this->client_id); | |
} | |
$posts = curl_exec($ch); | |
curl_close($ch); | |
$posts = preg_replace_callback('/\\\\u([0-9a-fA-F]{4})/', function ($match) { | |
return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE'); | |
}, $posts); | |
$posts = json_decode(html_entity_decode(utf8_encode($posts))); | |
$this->next_max_id = $posts->pagination->next_max_id; | |
$posts = $posts->data; | |
$posts = $this->getStandardizedData($posts); | |
return $posts; | |
} | |
/** | |
* Get the next pagination ID | |
* @return Int | |
*/ | |
public function getNextMaxID() { | |
return $this->next_max_id; | |
} | |
/** | |
* Standardize the feed from Instagram | |
* @param Obj $data Instagram API data | |
* @return Array | |
*/ | |
private function getStandardizedData($data) { | |
$newData = array(); | |
foreach($data as $post) { | |
$images = array(); | |
array_push($images, $post->images->low_resolution); | |
array_push($images, $post->images->thumbnail); | |
array_push($images, $post->images->standard_resolution); | |
$alt_sizes = array('alt_sizes' => $images); | |
array_push($newData, array( | |
'post_url' => $post->link, | |
'photos' => array($alt_sizes), | |
'feed_source' => "instagram", | |
'id' => $post->id, | |
'title' => $post->user->username, | |
'feed_offset' =>$this->offset_media_id, | |
'next_max_id' =>$this->next_max_id, | |
'timestamp' => $post->created_time | |
)); | |
} | |
return $newData; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment