lschwetlick · July 23, 2021 06:41
diff --git a/get_gallery.py b/get_gallery.py
 import flickrapi
 import json
 import pandas as pd
 import requests # to get image from the web
 import shutil # to save it locally
 from tqdm import tqdm


 def download_jpg(image_url, index):
  # Open the url image, set stream to True, this will return the stream content.
  r = requests.get(image_url, stream = True)
  filename = f"flickr_corpus/Potsdam_Corpus_{index:0>4}.jpg"

  # Check if the image was retrieved successfully
  if r.status_code == 200:
      # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
      r.raw.decode_content = True

      # Open a local file with wb ( write binary ) permission.
      with open(filename,'wb') as f:
          shutil.copyfileobj(r.raw, f)

      #print('Image sucessfully Downloaded: ',filename)
  else:
      print('Image {index:0>4} Couldn\'t be retreived ')
  return filename



 api_key = u'000'
 api_secret = u'000'

 flickr = flickrapi.FlickrAPI(api_key, api_secret)

 # get galleries made by my user
 galeries = flickr.galleries.getList(user_id="000", format='json')
 json_galeries = json.loads(galeries)
 gallery_dicts = json_galeries['galleries']['gallery']
 gallery_ids = [x['gallery_id'] for x in gallery_dicts]


 # table that will hold some info abou the pictures
 tab = pd.DataFrame()
 global_ix = 0

 # iterate over galleries
 for g_id in gallery_ids:
    # get a list of all fotos in that gallery
    json_fotos = flickr.galleries.getPhotos(gallery_id=g_id, format='json')
    foto_dicts = json.loads(json_fotos)['photos']['photo']

    for f in tqdm(range(len(foto_dicts))):
        foto = foto_dicts[f]
        # get basic info about the photo
        picjson = flickr.photos.getInfo(photo_id = foto["id"], format="json")
        pic_dict = json.loads(picjson)['photo']
        # get info about the size and the link
        sizejson = flickr.photos.getSizes(photo_id = pic_dict['id'], format="json")
        sizedata = json.loads(sizejson)['sizes']['size'][-1]
        # add to dict
        pic_dict.update(sizedata)
        # subset dict to have only relevant info
        names = { 'id', 'license', 'title' , 'source', 'width', 'height', 'realname'}
        pic_dict_short = { key:value for key,value in pic_dict.items() if key in names}
        pic_dict_short["corp_ix"] = f
        # download from url
        fname = download_jpg(pic_dict_short["source"], index = global_ix)
        # add to our table
        pic_dict_short["filename"] = fname
        pic_dict_short["gallery"] = g_id
        tab = tab.append(pic_dict_short, ignore_index=True)
        global_ix += 1
        #if global_ix > 3:
        #    break
 tab.to_csv("flickr_imgs.csv", index=False)
	import flickrapi
	import json
	import pandas as pd
	import requests # to get image from the web
	import shutil # to save it locally
	from tqdm import tqdm


	def download_jpg(image_url, index):
	# Open the url image, set stream to True, this will return the stream content.
	r = requests.get(image_url, stream = True)
	filename = f"flickr_corpus/Potsdam_Corpus_{index:0>4}.jpg"

	# Check if the image was retrieved successfully
	if r.status_code == 200:
	# Set decode_content value to True, otherwise the downloaded image file's size will be zero.
	r.raw.decode_content = True

	# Open a local file with wb ( write binary ) permission.
	with open(filename,'wb') as f:
	shutil.copyfileobj(r.raw, f)

	#print('Image sucessfully Downloaded: ',filename)
	else:
	print('Image {index:0>4} Couldn\'t be retreived ')
	return filename



	api_key = u'000'
	api_secret = u'000'

	flickr = flickrapi.FlickrAPI(api_key, api_secret)

	# get galleries made by my user
	galeries = flickr.galleries.getList(user_id="000", format='json')
	json_galeries = json.loads(galeries)
	gallery_dicts = json_galeries['galleries']['gallery']
	gallery_ids = [x['gallery_id'] for x in gallery_dicts]


	# table that will hold some info abou the pictures
	tab = pd.DataFrame()
	global_ix = 0

	# iterate over galleries
	for g_id in gallery_ids:
	# get a list of all fotos in that gallery
	json_fotos = flickr.galleries.getPhotos(gallery_id=g_id, format='json')
	foto_dicts = json.loads(json_fotos)['photos']['photo']

	for f in tqdm(range(len(foto_dicts))):
	foto = foto_dicts[f]
	# get basic info about the photo
	picjson = flickr.photos.getInfo(photo_id = foto["id"], format="json")
	pic_dict = json.loads(picjson)['photo']
	# get info about the size and the link
	sizejson = flickr.photos.getSizes(photo_id = pic_dict['id'], format="json")
	sizedata = json.loads(sizejson)['sizes']['size'][-1]
	# add to dict
	pic_dict.update(sizedata)
	# subset dict to have only relevant info
	names = { 'id', 'license', 'title' , 'source', 'width', 'height', 'realname'}
	pic_dict_short = { key:value for key,value in pic_dict.items() if key in names}
	pic_dict_short["corp_ix"] = f
	# download from url
	fname = download_jpg(pic_dict_short["source"], index = global_ix)
	# add to our table
	pic_dict_short["filename"] = fname
	pic_dict_short["gallery"] = g_id
	tab = tab.append(pic_dict_short, ignore_index=True)
	global_ix += 1
	#if global_ix > 3:
	# break
	tab.to_csv("flickr_imgs.csv", index=False)
No results found