Skip to content

Instantly share code, notes, and snippets.

@ChrisHardie
Created October 14, 2019 16:50
Show Gist options
  • Save ChrisHardie/5d8d8dd0a6fe3e1c67df2a7e444594a7 to your computer and use it in GitHub Desktop.
Save ChrisHardie/5d8d8dd0a6fe3e1c67df2a7e444594a7 to your computer and use it in GitHub Desktop.
Sample PHP script to scrape an Instagram public profile and generate an RSS feed
<?php
$target_accounts = array(
'bikes_as_transportation',
);
foreach ( $target_accounts as $target_user ) {
$target_user = urlencode( $target_user );
$results_array = scrape_insta( $target_user );
$xml = new SimpleXMLElement('<rss/>');
$xml->addAttribute("version", "2.0");
$channel = $xml->addChild("channel");
$channel->addChild("title", sprintf( "%s's photos on Instagram", $target_user ) ) ;
$channel->addChild("link", sprintf( "https://www.instagram.com/%s/" , $target_user ) );
$channel->addChild("description", "Instagram Photos");
$channel->addChild("language", "en-us");
for ($i = 0; $i < 10; $i++) {
$feed_item = $results_array['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges'][$i]['node'];
$item = $channel->addChild("item");
$item->addChild("link", sprintf("https://www.instagram.com/p/%s/", $feed_item['shortcode'] ) );
if ( ! empty( $feed_item['edge_media_to_caption']['edges'][0]['node']['text'] ) ) {
$item->addChild("title", $feed_item['edge_media_to_caption']['edges'][0]['node']['text'] );
} else {
$item->addChild("title", sprintf( "Image %s from %s", $feed_item['shortcode'], $target_user ) );
}
$mediaGroup = $item->addChild('media:group', '', 'http://search.yahoo.com/mrss/');
$thumbnail = $mediaGroup->addChild( 'media:thumbnail' );
$thumbnail->addAttribute( 'url', $feed_item['thumbnail_src'] );
}
$filename = '/home/chris/feeds/www/instagram-' . $target_user . '.rss';
$rss_file = fopen( $filename, 'w' ) or die ("Unable to open $filename!" );
fwrite( $rss_file, $xml->asXML() );
fclose( $rss_file );
}
//returns a big old hunk of JSON from a non-private IG account page.
function scrape_insta($username) {
$options = array('http' => array('user_agent' => 'https://chrishardie.com/; [email protected]'));
$context = stream_context_create($options);
$insta_source = @file_get_contents('http://instagram.com/'. $username, false, $context );
if ( ! $insta_source ) {
return false;
}
$shards = explode('window._sharedData = ', $insta_source);
if ( ! isset( $shards[1] ) ) {
return false;
}
$insta_json = explode(';</script>', $shards[1]);
if ( is_array( $insta_json ) ) {
$insta_array = json_decode($insta_json[0], TRUE);
return $insta_array;
} else {
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment