Last active
April 13, 2020 04:53
-
-
Save eponkratova/6f7a8efd06f21008f4e1053cdc2240a7 to your computer and use it in GitHub Desktop.
Extracting_freshdesk_data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # importing libraries | |
| import json | |
| import pandas as pd | |
| import numpy as np | |
| import requests | |
| from pandas.io.json import json_normalize | |
| import time | |
| import datetime | |
| from furl import furl | |
| import re | |
| from sqlalchemy import create_engine | |
| #adding connection details | |
| api_key = "yourFreshDeskAPI" | |
| domain = "yourFreshDeskDomain" #the part of your Freshdesk URL, e.g.yourFreshDeskDomain.freshdesk.com | |
| password = "yourFreshDeskPassword" #the password you are using to log to your Freshdesk account | |
| engine = create_engine('postgresql+psycopg2://username:password@endpoint/dbname') #your database credentials | |
| #Doing a test API call to get ticket information for all new and open tickets | |
| #Find the complete API documentation at https://developers.freshdesk.com/api/. | |
| #initiating a pandas dataframe | |
| df = pd.DataFrame() | |
| request = requests.get("https://"+ domain +".freshdesk.com/api/v2/tickets?filter=new_and_my_open", auth = (api_key, password)).json() | |
| objects = json_normalize(request) | |
| df = df.append(objects, ignore_index = True) | |
| #After reviewing the results and reading the documentation, I discovered that:' | |
| #1. By default, only tickets that have been created within the past 30 days will be returned. For older tickets, use the updated_since filter. | |
| #2. A maximum of 300 pages (9000 tickets) will be returned.' (documentation) | |
| #3. All tickets have the updated_at col filled in, thus, updated_since would work as the created_at filter. | |
| #And if you decide to use created_at as a part of your filter, e.g 'https://yourFreshDeskAPI.freshdesk.com/api/v2/search/tickets?query="updated_at:null%20AND%20created_at:%272019-04-10%27"', remember that 'the number of objects returned per page is 30 <...> and the page number starts with 1 and should not exceed 10' (documentation). | |
| def data_extraction(endpoint, url): | |
| """The function calls an API depending on the endpoint, saves the outcome which later is merged with the relevant tables""" | |
| i = 1 | |
| page_num = 300 #the max number of pages allowded | |
| #initiating empty dataframe | |
| df = pd.DataFrame() | |
| while i <= page_num: | |
| param = { 'page': i, 'per_page': 100} | |
| formatted_url = furl(url).add(param).url | |
| r = requests.get(formatted_url, auth = (api_key, password)) | |
| if r.status_code == 200: | |
| response = r.json() | |
| objects = json_normalize(response) #parsing json | |
| df = df.append(objects, ignore_index = True) #appending the records | |
| df.fillna(value = '', inplace = True) #replacing NA with empty values | |
| df.columns | |
| for c in df.columns: | |
| df[c] = df[c].astype(str).str.strip('[') #replacing square brackets with nothing | |
| df[c] = df[c].astype(str).str.strip(']') | |
| if endpoint == 'tickets': | |
| col = ['created_at', 'updated_at', 'due_by'] #changing datatype to dates as the default data type is object >> text when loaded in postgres | |
| for cols in col: | |
| df[cols] = pd.to_datetime(df[cols]) | |
| else: | |
| col = ['created_at', 'updated_at'] | |
| for cols in col: | |
| df[cols] = pd.to_datetime(df[cols]) | |
| i += 1 | |
| time.sleep(2) | |
| else: | |
| print("Failed to read tickets, errors are displayed below,") | |
| response = json.loads(r.content) | |
| print(response["errors"]) | |
| #outputting results to a table | |
| with engine.connect() as conn, conn.begin(): | |
| df.to_sql(endpoint, con = conn, schema = 'public', if_exists = 'replace') | |
| return df | |
| groups = data_extraction('groups','https://yourFreshDeskAPI.freshdesk.com/api/v2/groups') | |
| agents = data_extraction('agents','https://yourFreshDeskAPI.freshdesk.com/api/v2/agents') | |
| companies = data_extraction('companies','https://yourFreshDeskAPI.freshdesk.com/api/v2/companies') | |
| tickets = data_extraction('tickets','https://yourFreshDeskAPI.freshdesk.com/api/v2/tickets?updated_since=2019-08-01T02:00:00Z') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment