Skip to content

Instantly share code, notes, and snippets.

@eponkratova
Last active April 13, 2020 04:53
Show Gist options
  • Select an option

  • Save eponkratova/6f7a8efd06f21008f4e1053cdc2240a7 to your computer and use it in GitHub Desktop.

Select an option

Save eponkratova/6f7a8efd06f21008f4e1053cdc2240a7 to your computer and use it in GitHub Desktop.
Extracting_freshdesk_data
# importing libraries
import json
import pandas as pd
import numpy as np
import requests
from pandas.io.json import json_normalize
import time
import datetime
from furl import furl
import re
from sqlalchemy import create_engine
#adding connection details
api_key = "yourFreshDeskAPI"
domain = "yourFreshDeskDomain" #the part of your Freshdesk URL, e.g.yourFreshDeskDomain.freshdesk.com
password = "yourFreshDeskPassword" #the password you are using to log to your Freshdesk account
engine = create_engine('postgresql+psycopg2://username:password@endpoint/dbname') #your database credentials
#Doing a test API call to get ticket information for all new and open tickets
#Find the complete API documentation at https://developers.freshdesk.com/api/.
#initiating a pandas dataframe
df = pd.DataFrame()
request = requests.get("https://"+ domain +".freshdesk.com/api/v2/tickets?filter=new_and_my_open", auth = (api_key, password)).json()
objects = json_normalize(request)
df = df.append(objects, ignore_index = True)
#After reviewing the results and reading the documentation, I discovered that:'
#1. By default, only tickets that have been created within the past 30 days will be returned. For older tickets, use the updated_since filter.
#2. A maximum of 300 pages (9000 tickets) will be returned.' (documentation)
#3. All tickets have the updated_at col filled in, thus, updated_since would work as the created_at filter.
#And if you decide to use created_at as a part of your filter, e.g 'https://yourFreshDeskAPI.freshdesk.com/api/v2/search/tickets?query="updated_at:null%20AND%20created_at:%272019-04-10%27"', remember that 'the number of objects returned per page is 30 <...> and the page number starts with 1 and should not exceed 10' (documentation).
def data_extraction(endpoint, url):
"""The function calls an API depending on the endpoint, saves the outcome which later is merged with the relevant tables"""
i = 1
page_num = 300 #the max number of pages allowded
#initiating empty dataframe
df = pd.DataFrame()
while i <= page_num:
param = { 'page': i, 'per_page': 100}
formatted_url = furl(url).add(param).url
r = requests.get(formatted_url, auth = (api_key, password))
if r.status_code == 200:
response = r.json()
objects = json_normalize(response) #parsing json
df = df.append(objects, ignore_index = True) #appending the records
df.fillna(value = '', inplace = True) #replacing NA with empty values
df.columns
for c in df.columns:
df[c] = df[c].astype(str).str.strip('[') #replacing square brackets with nothing
df[c] = df[c].astype(str).str.strip(']')
if endpoint == 'tickets':
col = ['created_at', 'updated_at', 'due_by'] #changing datatype to dates as the default data type is object >> text when loaded in postgres
for cols in col:
df[cols] = pd.to_datetime(df[cols])
else:
col = ['created_at', 'updated_at']
for cols in col:
df[cols] = pd.to_datetime(df[cols])
i += 1
time.sleep(2)
else:
print("Failed to read tickets, errors are displayed below,")
response = json.loads(r.content)
print(response["errors"])
#outputting results to a table
with engine.connect() as conn, conn.begin():
df.to_sql(endpoint, con = conn, schema = 'public', if_exists = 'replace')
return df
groups = data_extraction('groups','https://yourFreshDeskAPI.freshdesk.com/api/v2/groups')
agents = data_extraction('agents','https://yourFreshDeskAPI.freshdesk.com/api/v2/agents')
companies = data_extraction('companies','https://yourFreshDeskAPI.freshdesk.com/api/v2/companies')
tickets = data_extraction('tickets','https://yourFreshDeskAPI.freshdesk.com/api/v2/tickets?updated_since=2019-08-01T02:00:00Z')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment