Skip to content

Instantly share code, notes, and snippets.

@Chitrank-Dixit
Forked from wgins/people2csv.py
Last active June 26, 2023 13:17
Show Gist options
  • Save Chitrank-Dixit/6b9157bdfa0f0e38d9eceb999fc43935 to your computer and use it in GitHub Desktop.
Save Chitrank-Dixit/6b9157bdfa0f0e38d9eceb999fc43935 to your computer and use it in GitHub Desktop.
Mixpanel - Exporting people profiles to CSV
PROJECT_TOKEN = "<mixpanel-project-token>"
API_SECRET = "<mixpanel-project-api-secret>"
#from mixpanel import Mixpanel
#mp = Mixpanel(PROJECT_TOKEN)
#dir(mp)
''' people export'''
import base64
import csv
import sys
import time
import urllib # for url encoding
import urllib2 # for sending requests
try:
import json
except ImportError:
import simplejson as json
class Mixpanel(object):
def __init__(self, api_secret):
self.api_secret = api_secret
def request(self, params, format='json'):
'''let's craft the http request'''
data = None
request_url = 'https://mixpanel.com/api/2.0/engage/?'
request_url = request_url + self.unicode_urlencode(params)
headers = {'Authorization': 'Basic {encoded_secret}'.format(encoded_secret=base64.b64encode(self.api_secret))}
request = urllib2.Request(request_url, data, headers)
response = urllib2.urlopen(request, timeout=120)
return response.read()
def unicode_urlencode(self, params):
''' Convert stuff to json format and correctly handle unicode url parameters'''
if isinstance(params, dict):
params = params.items()
for i, param in enumerate(params):
if isinstance(param[1], list):
params[i] = (param[0], json.dumps(param[1]),)
result = urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) for k, v in params])
return result
def get_and_write_results(self, params):
response = api.request(params)
parameters['session_id'] = json.loads(response)['session_id']
parameters['page'] = 0
global_total = json.loads(response)['total']
print "Session id is %s \n" % parameters['session_id']
print "Here are the # of people %d" % global_total
paged = self._page_results(response, parameters, global_total)
self.export_csv("people_export_" + str(int(time.time())) + ".csv", paged)
self.export_json("people_export_" + str(int(time.time())) + ".json", paged)
def _page_results(self, response, parameters, global_total):
fname = "people_export_" + str(int(time.time())) + ".txt"
parameters['page'] = 0
has_results = True
total = 0
while has_results:
responser = json.loads(response)['results']
total += len(responser)
has_results = len(responser) == 1000
self._write_results(responser, fname)
print "%d / %d" % (total, global_total)
parameters['page'] += 1
if has_results:
response = api.request(parameters)
return fname
def _write_results(self, results, fname):
with open(fname, 'a') as f:
for data in results:
f.write(json.dumps(data) + '\n')
def export_csv(self, outfilename, fname):
'''takes a file name of a file of json objects and the desired name of the csv file that will be written'''
subkeys = set()
with open(fname, 'rb') as r:
with open(outfilename, 'wb') as w:
# Get all properties (will use this to create the header)
for line in r:
try:
subkeys.update(set(json.loads(line)['$properties'].keys()))
except:
pass
# Create the header
header = ['$distinct_id']
for key in subkeys:
header.append(key.encode('utf-8'))
# Create the writer and write the header
writer = csv.writer(w)
writer.writerow(header)
# Return to the top of the file, then write the events out, one per row
r.seek(0, 0)
for line in r:
entry = json.loads(line)
row = []
try:
row.append(entry['$distinct_id'])
except:
row.append('')
for subkey in subkeys:
try:
row.append((entry['$properties'][subkey]).encode('utf-8'))
except AttributeError:
row.append(entry['$properties'][subkey])
except KeyError:
row.append("")
writer.writerow(row)
def export_json(self, outfilename, fname):
'''takes a file name of a file of json objects and the desired name of the csv file that will be written'''
subkeys = set()
data = []
with open(fname, 'rb') as r:
for line in r:
data.append(json.loads(line.replace("$", "")))
with open(outfilename, 'wb') as w:
# Get all properties (will use this to create the header)
w.write(json.dumps(data))
# for line in r:
# try:
# subkeys.update(set(json.loads(line)['$properties'].keys()))
# except:
# pass
#
# # Create the header
# header = ['$distinct_id']
# for key in subkeys:
# header.append(key.encode('utf-8'))
#
# # Create the writer and write the header
# writer = csv.writer(w)
# writer.writerow(header)
#
# # Return to the top of the file, then write the events out, one per row
# r.seek(0, 0)
# for line in r:
# entry = json.loads(line)
# row = []
# try:
# row.append(entry['$distinct_id'])
# except:
# row.append('')
#
# for subkey in subkeys:
# try:
# row.append((entry['$properties'][subkey]).encode('utf-8'))
# except AttributeError:
# row.append(entry['$properties'][subkey])
# except KeyError:
# row.append("")
# writer.writerow(row)
if __name__ == '__main__':
if len(sys.argv) > 1:
api_secret = sys.argv[1]
else:
api_secret = API_SECRET #raw_input("API Secret: ")
api = Mixpanel(
api_secret=api_secret
)
'''
Here is the place to define your selector to target only the users that you're after
selector = '(datetime(1458587013 - 86400) > properties["Created"] and behaviors["behavior_79"] > 0'
behaviors = '[{"window": "90d", "name": "behavior_79", "event_selectors": [{"event": "Edit Colors"}]}]'
'''
selector = ''
# Leave 'r' before the behaviors string so that it's interpreted as a string literal to handle escaped quotes
behaviors = r''
if not behaviors:
parameters = {'selector': selector}
else:
time_offset = int(raw_input("Project time offset from GMT (ex. PST = -8): "))
parameters = {'selector': selector, 'behaviors': behaviors, 'as_of_timestamp': int(time.time()) + (time_offset * 3600)}
api.get_and_write_results(parameters)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment