Skip to content

Instantly share code, notes, and snippets.

@debboutr
Created June 29, 2018 19:33
Show Gist options
  • Save debboutr/e164a582177c9aed5d1b6cd2c04073d4 to your computer and use it in GitHub Desktop.
Save debboutr/e164a582177c9aed5d1b6cd2c04073d4 to your computer and use it in GitHub Desktop.
# import dependencies
import urllib2,json,arcpy
import numpy as np
import pandas as pd
from django.utils.encoding import smart_str
import time
##################################################################################
# VARIABLES
# set working directory
workingPath = 'D:/Projects/Panoramio'
# set ouput file path/name
outFileName = workingPath + '/outputPhotoList.csv'
# set bounding x,y values
minX = -92.335981
minY = 46.630695
maxX = -91.946101
maxY = 46.804721
# set up panoramio query strings ,
getPhotoCount(url)
url = 'https://api.instagram.com/v1/media/search?lat=34.048502&lng=-118.246008&distance=5000&access_token=1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5&callback=?&count=500'
initialGET = 'http://www.panoramio.com/map/get_panoramas.php?set=full&from=0&to=100&minx=%s&miny=%s&maxx=%s&maxy=%s&size=medium&mapfilter=true'%(minX,minY,maxX,maxY)
stringGET_1 = 'http://www.panoramio.com/map/get_panoramas.php?set=full&from='
stringGET_2 = '&to='
stringGET_3 = '&minx=%s&miny=%s&maxx=%s&maxy=%s&size=medium&mapfilter=true'%(minX,minY,maxX,maxY)
# output field name dictionary
outFileHeaders = ['upload_date','owner_name','photo_id','longitude','height','width','photo_title','latitude','owner_url','photo_url','photo_file_url','owner_id']
# arcpy variables
inTable = workingPath + '/outputPhotoList.csv'
xCoords = 'longitude'
yCoords = 'latitude'
outLayer = 'pointLayer'
savedLayer = workingPath + '/photoPoints.shp'
spRef = "C:\Users\Rdebbout\AppData\Roaming\ESRI\Desktop10.3\ArcMap\Coordinate Systems\GCS_WGS_1984.prj"
##################################################################################
# FUNCTIONS 5 km spacing 0.04491265 0.0449157 div by 2 : 0.022456325
def getPhotoCount(url):
# query website, parse JSON, and return photo count
urlResponse = urllib2.urlopen(url).read()
parsedResponse = json.loads(urlResponse)
queryCount = parsedResponse['data']
# print 'Query count returned: ' + str(len(queryCount))
return queryCount
def getRecords(data):
for loc in range(len(data)):
if data[loc]['id'] in klip:
print data[loc]['id']
if data[loc]['id'] not in klip:
ID = data[loc]['location']['id']
latitude = data[loc]['location']['latitude']
longitude = data[loc]['location']['longitude']
title = smart_str(data[loc]['location']['name'])
url = smart_str(data[loc]['images']['standard_resolution']['url'])
username = smart_str(data[loc]['user']['username'])
tbl = tbl.append(pd.DataFrame([[latitude, longitude, ID, username, title, url]], columns=cols), ignore_index=True)
klip.append(data[loc]['id'])
# 5km 0.0449157
# 2.5 km 0.02245785
# 1.25 km 0.011228925
# 0.625 km 0.0056144625
data[loc]['location']['name'].encode()
type(data[loc]['location']['name'])
# 7 km spacing ex.
klip = []
cols = ['latitude', 'longitude', 'id', 'username', 'title', 'url']
tbl = pd.DataFrame()
len(np.arange(minX, maxX, 0.0056144625))
len(np.arange(minY, maxY, 0.0056144625))
for xcoord in np.arange(minX, maxX, 0.0056144625):
for ycoord in np.arange(minY, maxY, 0.0056144625):
url = 'https://api.instagram.com/v1/media/search?lat=%s&lng=%s&distance=625&access_token=1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5&callback=?&count=100' % (ycoord, xcoord) # &callback=?&count=500
data = getPhotoCount(url)
if str(len(data)) == 100:
print 'X: %s Y: %s has %s returns.' % (xcoord, ycoord, str(len(data)))
for loc in range(len(data)):
# if data[loc]['id'] in klip:
# print data[loc]['id']
if data[loc]['id'] not in klip:
ID = data[loc]['location']['id']
latitude = data[loc]['location']['latitude']
longitude = data[loc]['location']['longitude']
title = smart_str(data[loc]['location']['name'])
url = smart_str(data[loc]['images']['standard_resolution']['url'])
username = smart_str(data[loc]['user']['username'])
tbl = tbl.append(pd.DataFrame([[latitude, longitude, ID, username, title, url]], columns=cols), ignore_index=True)
klip.append(data[loc]['id'])
tbl2.columns = ['latitude', 'longitude', 'insta_id', 'username', 'title', 'url']
tbl2.to_csv(workingPath + '/outputPhotoList_locID.csv',index=False)
t1 = pd.read_csv(workingPath + '/outputPhotoListCeck.csv')
t2 = pd.read_csv(workingPath + '/outputPhotoList.csv')
count = 0
t1.columns
for x in t2.insta_id.values:
if x not in t1.insta_id.values:
print x
count+=1
print count
url = 'https://api.instagram.com/v1/users/1771051239/?access_token=1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5'
arcpy.MakeXYEventLayer_management(inTable, xCoords, yCoords, outLayer, spRef)
arcpy.arcpy.CopyFeatures_management(outLayer, savedLayer)
arcpy.Delete_management(outLayer)
type(username)
import requests
results = requests.get(url)
type(results)
data = results.json()
type(data)
results.headers
data['data'][90]['images']['standard_resolution']['url']
data['data'][90]['videos']['standard_resolution']['url']
'videos' in data['data'][83]
payload = {'lat': '34.048502',
'lng': '-118.246008',
'distance': '250',
'access_token': '1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5',
'count': '100'}
url = 'https://api.instagram.com/v1/media/search'
#def getPhotoCount(url, payload):
# # query website, parse JSON, and return photo count
# r = requests.get(url, params=payload)
# parsedResponse = r.json()
# query= parsedResponse['data']
## print 'Query count returned: ' + str(len(query))
# return query
url = 'https://api.instagram.com/v1/users/self/media/recent/?access_token=1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5'
data = getPhotoCount(url)
r.text
klip = []
recirc = []
cols = ['latitude', 'longitude', 'id', 'username', 'title', 'tags', 'url']
tbl = pd.DataFrame()
token = '1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5'
#url = 'https://api.instagram.com/v1/media/search'
count = 0
for xcoord in np.arange(minX, maxX, 0.0449157):
for ycoord in np.arange(minY, maxY, 0.0449157):
url = 'https://api.instagram.com/v1/media/search?lat=%s&lng=%s&distance=5000&access_token=%s&callback=?&count=500' % (ycoord, xcoord, token) # &callback=?&count=500
data = getPhotoCount(url)
count += 1
print count
if len(data) == 100:
print 'X: %s Y: %s has %s returns.' % (xcoord, ycoord, str(len(data)))
for loc in range(len(data)):
chk = data[loc]['id']
if chk not in klip:
ID = data[loc]['location']['id']
if ID not in recirc:
recirc.append(ID)
latitude = data[loc]['location']['latitude']
longitude = data[loc]['location']['longitude']
title = smart_str(data[loc]['location']['name'])
if 'videos' in data[loc]:
# print 'video ' + str(loc)
url = data[loc]['videos']['standard_resolution']['url']
else:
url = smart_str(data[loc]['images']['standard_resolution']['url'])
username = smart_str(data[loc]['user']['username'])
tags = smart_str(", ".join(data[0]['tags']))
tbl = tbl.append(pd.DataFrame([[latitude, longitude, ID, username, title, tags, url]], columns=cols), ignore_index=True)
klip.append(chk)
print 'Recirc: %s' % str(len(recirc))
tbl2 = pd.DataFrame()
for rec in recirc:
print '********'
print rec
url = 'https://api.instagram.com/v1/locations/%s/media/recent?access_token=%s&count=500' % (rec, token)
try:
data = getPhotoCount(url)
except urllib2.HTTPError:
time.sleep(20)
data = getPhotoCount(url)
print '********'
print len(data)
for loc in range(len(data)):
chk = data[loc]['id']
if chk not in klip:
ID = data[loc]['location']['id']
print '********'
print ID
# recirc.append(ID)
latitude = data[loc]['location']['latitude']
longitude = data[loc]['location']['longitude']
title = smart_str(data[loc]['location']['name'])
if 'videos' in data[loc]:
# print 'video ' + str(loc)
url = data[loc]['videos']['standard_resolution']['url']
else:
url = smart_str(data[loc]['images']['standard_resolution']['url'])
username = smart_str(data[loc]['user']['username'])
tags = smart_str(", ".join(data[0]['tags']))
tbl2 = tbl2.append(pd.DataFrame([[latitude, longitude, ID, username, title, tags, url]], columns=cols), ignore_index=True)
chktbl = pd.concat([tbl1_25,tbl2_5])
str([str(x) for x in ", ".join(data[0]['tags'])])
tags = smart_str(", ".join(data[0]['tags']))
ycoord = np.arange(minY, maxY, 0.0449157)[0]
xcoord = np.arange(minX, maxX, 0.0449157)[1]
minX = -92.335981
minY = 46.630695
maxX = -91.946101
maxY = 46.804721
from math import radians, sin, cos, sqrt, asin
def haversine(lat1, lon1, lat2, lon2):
R = 6371 # Earth radius in kilometers
dLat = radians(lat2 - lat1)
dLon = radians(lon2 - lon1)
lat1 = radians(lat1)
lat2 = radians(lat2)
a = sin(dLat/2)**2 + cos(lat1)*cos(lat2)*sin(dLon/2)**2
c = 2*asin(sqrt(a))
return R * c
haversine(46.630695, -92.335981, 46.630695, -91.946101)
count = 0
for url in tbl.url.values:
if url in tbl2.url.values:
count += 1
chks = []
flop = []
for x in range(20):
data = getPhotoCount(url)
print len(data)
for loc in range(len(data)):
chk = data[loc]['id']
ID = data[loc]['location']['id']
chks.append(chk)
flop.append(ID)
len(set(chks))
import requests
results = requests.get(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment