Created
June 29, 2018 19:33
-
-
Save debboutr/e164a582177c9aed5d1b6cd2c04073d4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import dependencies | |
import urllib2,json,arcpy | |
import numpy as np | |
import pandas as pd | |
from django.utils.encoding import smart_str | |
import time | |
################################################################################## | |
# VARIABLES | |
# set working directory | |
workingPath = 'D:/Projects/Panoramio' | |
# set ouput file path/name | |
outFileName = workingPath + '/outputPhotoList.csv' | |
# set bounding x,y values | |
minX = -92.335981 | |
minY = 46.630695 | |
maxX = -91.946101 | |
maxY = 46.804721 | |
# set up panoramio query strings , | |
getPhotoCount(url) | |
url = 'https://api.instagram.com/v1/media/search?lat=34.048502&lng=-118.246008&distance=5000&access_token=1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5&callback=?&count=500' | |
initialGET = 'http://www.panoramio.com/map/get_panoramas.php?set=full&from=0&to=100&minx=%s&miny=%s&maxx=%s&maxy=%s&size=medium&mapfilter=true'%(minX,minY,maxX,maxY) | |
stringGET_1 = 'http://www.panoramio.com/map/get_panoramas.php?set=full&from=' | |
stringGET_2 = '&to=' | |
stringGET_3 = '&minx=%s&miny=%s&maxx=%s&maxy=%s&size=medium&mapfilter=true'%(minX,minY,maxX,maxY) | |
# output field name dictionary | |
outFileHeaders = ['upload_date','owner_name','photo_id','longitude','height','width','photo_title','latitude','owner_url','photo_url','photo_file_url','owner_id'] | |
# arcpy variables | |
inTable = workingPath + '/outputPhotoList.csv' | |
xCoords = 'longitude' | |
yCoords = 'latitude' | |
outLayer = 'pointLayer' | |
savedLayer = workingPath + '/photoPoints.shp' | |
spRef = "C:\Users\Rdebbout\AppData\Roaming\ESRI\Desktop10.3\ArcMap\Coordinate Systems\GCS_WGS_1984.prj" | |
################################################################################## | |
# FUNCTIONS 5 km spacing 0.04491265 0.0449157 div by 2 : 0.022456325 | |
def getPhotoCount(url): | |
# query website, parse JSON, and return photo count | |
urlResponse = urllib2.urlopen(url).read() | |
parsedResponse = json.loads(urlResponse) | |
queryCount = parsedResponse['data'] | |
# print 'Query count returned: ' + str(len(queryCount)) | |
return queryCount | |
def getRecords(data): | |
for loc in range(len(data)): | |
if data[loc]['id'] in klip: | |
print data[loc]['id'] | |
if data[loc]['id'] not in klip: | |
ID = data[loc]['location']['id'] | |
latitude = data[loc]['location']['latitude'] | |
longitude = data[loc]['location']['longitude'] | |
title = smart_str(data[loc]['location']['name']) | |
url = smart_str(data[loc]['images']['standard_resolution']['url']) | |
username = smart_str(data[loc]['user']['username']) | |
tbl = tbl.append(pd.DataFrame([[latitude, longitude, ID, username, title, url]], columns=cols), ignore_index=True) | |
klip.append(data[loc]['id']) | |
# 5km 0.0449157 | |
# 2.5 km 0.02245785 | |
# 1.25 km 0.011228925 | |
# 0.625 km 0.0056144625 | |
data[loc]['location']['name'].encode() | |
type(data[loc]['location']['name']) | |
# 7 km spacing ex. | |
klip = [] | |
cols = ['latitude', 'longitude', 'id', 'username', 'title', 'url'] | |
tbl = pd.DataFrame() | |
len(np.arange(minX, maxX, 0.0056144625)) | |
len(np.arange(minY, maxY, 0.0056144625)) | |
for xcoord in np.arange(minX, maxX, 0.0056144625): | |
for ycoord in np.arange(minY, maxY, 0.0056144625): | |
url = 'https://api.instagram.com/v1/media/search?lat=%s&lng=%s&distance=625&access_token=1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5&callback=?&count=100' % (ycoord, xcoord) # &callback=?&count=500 | |
data = getPhotoCount(url) | |
if str(len(data)) == 100: | |
print 'X: %s Y: %s has %s returns.' % (xcoord, ycoord, str(len(data))) | |
for loc in range(len(data)): | |
# if data[loc]['id'] in klip: | |
# print data[loc]['id'] | |
if data[loc]['id'] not in klip: | |
ID = data[loc]['location']['id'] | |
latitude = data[loc]['location']['latitude'] | |
longitude = data[loc]['location']['longitude'] | |
title = smart_str(data[loc]['location']['name']) | |
url = smart_str(data[loc]['images']['standard_resolution']['url']) | |
username = smart_str(data[loc]['user']['username']) | |
tbl = tbl.append(pd.DataFrame([[latitude, longitude, ID, username, title, url]], columns=cols), ignore_index=True) | |
klip.append(data[loc]['id']) | |
tbl2.columns = ['latitude', 'longitude', 'insta_id', 'username', 'title', 'url'] | |
tbl2.to_csv(workingPath + '/outputPhotoList_locID.csv',index=False) | |
t1 = pd.read_csv(workingPath + '/outputPhotoListCeck.csv') | |
t2 = pd.read_csv(workingPath + '/outputPhotoList.csv') | |
count = 0 | |
t1.columns | |
for x in t2.insta_id.values: | |
if x not in t1.insta_id.values: | |
print x | |
count+=1 | |
print count | |
url = 'https://api.instagram.com/v1/users/1771051239/?access_token=1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5' | |
arcpy.MakeXYEventLayer_management(inTable, xCoords, yCoords, outLayer, spRef) | |
arcpy.arcpy.CopyFeatures_management(outLayer, savedLayer) | |
arcpy.Delete_management(outLayer) | |
type(username) | |
import requests | |
results = requests.get(url) | |
type(results) | |
data = results.json() | |
type(data) | |
results.headers | |
data['data'][90]['images']['standard_resolution']['url'] | |
data['data'][90]['videos']['standard_resolution']['url'] | |
'videos' in data['data'][83] | |
payload = {'lat': '34.048502', | |
'lng': '-118.246008', | |
'distance': '250', | |
'access_token': '1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5', | |
'count': '100'} | |
url = 'https://api.instagram.com/v1/media/search' | |
#def getPhotoCount(url, payload): | |
# # query website, parse JSON, and return photo count | |
# r = requests.get(url, params=payload) | |
# parsedResponse = r.json() | |
# query= parsedResponse['data'] | |
## print 'Query count returned: ' + str(len(query)) | |
# return query | |
url = 'https://api.instagram.com/v1/users/self/media/recent/?access_token=1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5' | |
data = getPhotoCount(url) | |
r.text | |
klip = [] | |
recirc = [] | |
cols = ['latitude', 'longitude', 'id', 'username', 'title', 'tags', 'url'] | |
tbl = pd.DataFrame() | |
token = '1771051239.ab103e5.7e013b99ce924cb7a894ecd0dd030be5' | |
#url = 'https://api.instagram.com/v1/media/search' | |
count = 0 | |
for xcoord in np.arange(minX, maxX, 0.0449157): | |
for ycoord in np.arange(minY, maxY, 0.0449157): | |
url = 'https://api.instagram.com/v1/media/search?lat=%s&lng=%s&distance=5000&access_token=%s&callback=?&count=500' % (ycoord, xcoord, token) # &callback=?&count=500 | |
data = getPhotoCount(url) | |
count += 1 | |
print count | |
if len(data) == 100: | |
print 'X: %s Y: %s has %s returns.' % (xcoord, ycoord, str(len(data))) | |
for loc in range(len(data)): | |
chk = data[loc]['id'] | |
if chk not in klip: | |
ID = data[loc]['location']['id'] | |
if ID not in recirc: | |
recirc.append(ID) | |
latitude = data[loc]['location']['latitude'] | |
longitude = data[loc]['location']['longitude'] | |
title = smart_str(data[loc]['location']['name']) | |
if 'videos' in data[loc]: | |
# print 'video ' + str(loc) | |
url = data[loc]['videos']['standard_resolution']['url'] | |
else: | |
url = smart_str(data[loc]['images']['standard_resolution']['url']) | |
username = smart_str(data[loc]['user']['username']) | |
tags = smart_str(", ".join(data[0]['tags'])) | |
tbl = tbl.append(pd.DataFrame([[latitude, longitude, ID, username, title, tags, url]], columns=cols), ignore_index=True) | |
klip.append(chk) | |
print 'Recirc: %s' % str(len(recirc)) | |
tbl2 = pd.DataFrame() | |
for rec in recirc: | |
print '********' | |
print rec | |
url = 'https://api.instagram.com/v1/locations/%s/media/recent?access_token=%s&count=500' % (rec, token) | |
try: | |
data = getPhotoCount(url) | |
except urllib2.HTTPError: | |
time.sleep(20) | |
data = getPhotoCount(url) | |
print '********' | |
print len(data) | |
for loc in range(len(data)): | |
chk = data[loc]['id'] | |
if chk not in klip: | |
ID = data[loc]['location']['id'] | |
print '********' | |
print ID | |
# recirc.append(ID) | |
latitude = data[loc]['location']['latitude'] | |
longitude = data[loc]['location']['longitude'] | |
title = smart_str(data[loc]['location']['name']) | |
if 'videos' in data[loc]: | |
# print 'video ' + str(loc) | |
url = data[loc]['videos']['standard_resolution']['url'] | |
else: | |
url = smart_str(data[loc]['images']['standard_resolution']['url']) | |
username = smart_str(data[loc]['user']['username']) | |
tags = smart_str(", ".join(data[0]['tags'])) | |
tbl2 = tbl2.append(pd.DataFrame([[latitude, longitude, ID, username, title, tags, url]], columns=cols), ignore_index=True) | |
chktbl = pd.concat([tbl1_25,tbl2_5]) | |
str([str(x) for x in ", ".join(data[0]['tags'])]) | |
tags = smart_str(", ".join(data[0]['tags'])) | |
ycoord = np.arange(minY, maxY, 0.0449157)[0] | |
xcoord = np.arange(minX, maxX, 0.0449157)[1] | |
minX = -92.335981 | |
minY = 46.630695 | |
maxX = -91.946101 | |
maxY = 46.804721 | |
from math import radians, sin, cos, sqrt, asin | |
def haversine(lat1, lon1, lat2, lon2): | |
R = 6371 # Earth radius in kilometers | |
dLat = radians(lat2 - lat1) | |
dLon = radians(lon2 - lon1) | |
lat1 = radians(lat1) | |
lat2 = radians(lat2) | |
a = sin(dLat/2)**2 + cos(lat1)*cos(lat2)*sin(dLon/2)**2 | |
c = 2*asin(sqrt(a)) | |
return R * c | |
haversine(46.630695, -92.335981, 46.630695, -91.946101) | |
count = 0 | |
for url in tbl.url.values: | |
if url in tbl2.url.values: | |
count += 1 | |
chks = [] | |
flop = [] | |
for x in range(20): | |
data = getPhotoCount(url) | |
print len(data) | |
for loc in range(len(data)): | |
chk = data[loc]['id'] | |
ID = data[loc]['location']['id'] | |
chks.append(chk) | |
flop.append(ID) | |
len(set(chks)) | |
import requests | |
results = requests.get(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment