Skip to content

Instantly share code, notes, and snippets.

@marinamixpanel
Last active May 24, 2017 10:44
Show Gist options
  • Select an option

  • Save marinamixpanel/703ea4b5e1e84aefdb30 to your computer and use it in GitHub Desktop.

Select an option

Save marinamixpanel/703ea4b5e1e84aefdb30 to your computer and use it in GitHub Desktop.
Mixpanel - Exporting people profiles to CSV
''' people export'''
import hashlib
import time
import urllib #for url encoding
import urllib2 #for sending requests
import base64
import sys
import csv
try:
import json
except ImportError:
import simplejson as json
class Mixpanel(object):
def __init__(self, api_key, api_secret, token):
self.api_key = api_key
self.api_secret = api_secret
self.token = token
def request(self, params, format = 'json'):
'''let's craft the http request'''
params['api_key']=self.api_key
params['expire'] = int(time.time())+600 # 600 is ten minutes from now
if 'sig' in params: del params['sig']
params['sig'] = self.hash_args(params)
request_url = 'https://mixpanel.com/api/2.0/engage/?' + self.unicode_urlencode(params)
request = urllib.urlopen(request_url)
data = request.read()
#print request_url
return data
def hash_args(self, args, secret=None):
'''Hash dem arguments in the proper way
join keys - values and append a secret -> md5 it'''
for a in args:
if isinstance(args[a], list): args[a] = json.dumps(args[a])
args_joined = ''
for a in sorted(args.keys()):
if isinstance(a, unicode):
args_joined += a.encode('utf-8')
else:
args_joined += str(a)
args_joined += "="
if isinstance(args[a], unicode):
args_joined += args[a].encode('utf-8')
else:
args_joined += str(args[a])
hash = hashlib.md5(args_joined)
if secret:
hash.update(secret)
elif self.api_secret:
hash.update(self.api_secret)
return hash.hexdigest()
def unicode_urlencode(self, params):
''' Convert stuff to json format and correctly handle unicode url parameters'''
if isinstance(params, dict):
params = params.items()
for i, param in enumerate(params):
if isinstance(param[1], list):
params[i] = (param[0], json.dumps(param[1]),)
result = urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) for k, v in params])
return result
def export_csv(self, outfilename, fname):
"""
takes the json and returns a csv file
"""
subkeys = set()
with open(fname,'rb') as r:
with open(outfilename, 'wb') as w:
# Get all properties (will use this to create the header)
for line in r:
try:
subkeys.update(set(json.loads(line)['$properties'].keys()))
except:
pass
# Create the header
header = ['$distinct_id']
for key in subkeys:
header.append(key)
# Create the writer and write the header
writer = csv.writer(w)
writer.writerow(header)
#Return to the top of the file, then write the events out, one per row
r.seek(0,0)
for line in r:
entry = json.loads(line)
row = []
try:
row.append(entry['$distinct_id'])
except:
row.append('')
for subkey in subkeys:
try:
row.append((entry['$properties'][subkey]).encode('utf-8'))
except AttributeError:
row.append(entry['$properties'][subkey])
except KeyError:
row.append("")
writer.writerow(row)
if __name__ == '__main__':
api = Mixpanel(
api_key = raw_input('API Key: ' ),
api_secret = raw_input('API Secret: '),
token = raw_input('Token: ')
)
'''Here is the place to define your selector to target only the users that you're after'''
'''parameters = {'selector':'(properties["$email"] == "Albany") or (properties["$city"] == "Alexandria")'}'''
parameters = {'selector':''}
response = api.request(parameters)
parameters['session_id'] = json.loads(response)['session_id']
parameters['page']=0
global_total = json.loads(response)['total']
print "Session id is %s \n" % parameters['session_id']
print "Here are the # of people %d" % global_total
fname = "people.txt"
has_results = True
total = 0
with open(fname,'w') as f:
while has_results:
responser = json.loads(response)['results']
total += len(responser)
has_results = len(responser) == 1000
for data in responser:
f.write(json.dumps(data)+'\n')
print "%d / %d" % (total,global_total)
parameters['page'] += 1
if has_results:
response = api.request(parameters)
'''specify your output filename here'''
api.export_csv("people_export_"+str(int(time.time()))+".csv", fname)
@hoserdude
Copy link
Copy Markdown

shouldn't the request_url be https?

@namanrathi
Copy link
Copy Markdown

This scripts gives error when we have more than 1000 records (

The Scripts sends the engage request 2nd time with page=1 and session_id as received in the response of first request, but we get error response like below for the request.
Error Received: "error": "request for page in uncached query\nfor params -

@wgins
Copy link
Copy Markdown

wgins commented Mar 22, 2016

Hey @namanrathi check out my fork here.

We'll update this gist shortly. Thanks for the heads up!

@yzhong52
Copy link
Copy Markdown

Are you still going to update this gist here?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment