Skip to content

Instantly share code, notes, and snippets.

@vijayanandrp
Last active November 27, 2017 04:55
Show Gist options
  • Save vijayanandrp/c6e4826d1aa937d7fa5a716da71c72c6 to your computer and use it in GitHub Desktop.
Save vijayanandrp/c6e4826d1aa937d7fa5a716da71c72c6 to your computer and use it in GitHub Desktop.
simple script to download all the images from facebook page. - https://informationcorners.com/facebook-page-image-scraper/
#!/usr/bin/python
# coding: Utf-8
import os
import pickle
import random
import string
import time
import urllib.request
import requests
import facebook
import rfc6266
# random string
def random_string(n=8):
chars = string.ascii_uppercase + string.ascii_lowercase + string.digits
return ''.join([random.choice(chars) for _ in range(n)])
def read(name='data'):
"""Read data from a file"""
with open('%s.lst' % name, 'r') as f:
res = pickle.load(f)
return res
def fetch(limit=100000, id=None, token=None):
"""Fetch the data using Facebook's Graph API"""
lst = []
graph = facebook.GraphAPI(token)
url = '%s/photos?type=uploaded' % id
args = {'fields': ['images', 'name'], 'limit': limit}
res = graph.request('%s/photos?type=uploaded' % id, args)
# res = graph.request('%s/albums' % id, args1)
# res = graph.request('%s/photos/?tab=album' % id, args)
for i in res['data']:
lst.append(i)
print(i)
try:
while 'paging' in res:
url = res['paging']['next']
print(url)
res = requests.get(url).json()
print(res.keys())
lst.extend(res.get('data'))
except Exception as error:
print(error)
pass
return lst
def download(items, id):
"""Download the list of files"""
print("Starting Download. Total images - ", str(len(items)))
t1 = time.time()
# To save images to the same directory
# IN this saving process we are just skipping the URL if there is any error
storage_path = 'media'
storage_path = os.path.join(os.getcwd(), storage_path)
path_to_store = os.path.join(storage_path, id)
if not os.path.exists(path_to_store):
os.makedirs(path_to_store)
k = 0
while k < len(items):
try:
header = dict()
header['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) " \
"Chrome/41.0.2228.0 Safari/537.36"
id = items[k]['id']
url = 'https://graph.facebook.com/{}/picture'.format(id)
req = urllib.request.Request(url, headers=header)
response = urllib.request.urlopen(req, None, 15)
file_name = rfc6266.parse_requests_response(response).filename_unsafe
file_to_write = os.path.join(path_to_store, random_string() + '_' + file_name)
with open(file_to_write, 'wb') as output_file:
data = response.read()
output_file.write(data)
response.close()
print("completed ====> " + str(k + 1))
k += 1
except Exception as e:
print("URLError - " + str(id))
k += 1
t2 = time.time() # stop the timer
total_time = t2 - t1 # Calculating the total time required to crawl,
print("Total time taken: " + str(total_time) + " Seconds")
if __name__ == '__main__':
# Your input here either page name or page id
ID = 'MinionQuotez'
# your official developer API token grabbed from - developers.facebook.com/tools/explorer/
TOKEN = 'XXXXXXXXXXX'
# download 500 photos, fetch details about 100 at a time
lst = fetch(limit=250, id=ID, token=TOKEN)
download(lst, ID)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment