eponkratova · April 13, 2020 04:53
diff --git a/freshdesk_code.py b/freshdesk_code.py
 # importing libraries
 import json
 import pandas as pd
 import numpy as np
 import requests
 from pandas.io.json import json_normalize
 import time
 import datetime
 from furl import furl
 import re
 from sqlalchemy import create_engine

 #adding connection details
 api_key = "yourFreshDeskAPI"
 domain = "yourFreshDeskDomain" #the part of your Freshdesk URL, e.g.yourFreshDeskDomain.freshdesk.com
 password = "yourFreshDeskPassword" #the password you are using to log to your Freshdesk account
 engine = create_engine('postgresql+psycopg2://username:password@endpoint/dbname') #your database credentials

 #Doing a test API call to get ticket information for all new and open tickets
 #Find the complete API documentation at https://developers.freshdesk.com/api/.

 #initiating a pandas dataframe
 df = pd.DataFrame()
 request = requests.get("https://"+ domain +".freshdesk.com/api/v2/tickets?filter=new_and_my_open", auth = (api_key, password)).json()
 objects = json_normalize(request)
 df = df.append(objects, ignore_index = True) 

 #After reviewing the results and reading the documentation, I discovered that:'
 #1. By default, only tickets that have been created within the past 30 days will be returned. For older tickets, use the updated_since filter.
 #2. A maximum of 300 pages (9000 tickets) will be returned.' (documentation)
 #3. All tickets have the updated_at col filled in, thus, updated_since would work as the created_at filter.
 #And if you decide to use created_at as a part of your filter, e.g 'https://yourFreshDeskAPI.freshdesk.com/api/v2/search/tickets?query="updated_at:null%20AND%20created_at:%272019-04-10%27"', remember that 'the number of objects returned per page is 30 <...> and the page number starts with 1 and should not exceed 10' (documentation).

 def data_extraction(endpoint, url):
    """The function calls an API depending on the endpoint, saves the outcome which later is merged with the relevant tables"""
    i = 1
    page_num = 300 #the max number of pages allowded
    #initiating empty dataframe
    df = pd.DataFrame()
    while i <= page_num:
        param = { 'page': i, 'per_page': 100}
        formatted_url = furl(url).add(param).url
        r = requests.get(formatted_url, auth = (api_key, password))
        if r.status_code == 200:
            response = r.json()
            objects = json_normalize(response) #parsing json
            df = df.append(objects, ignore_index = True) #appending the records
            df.fillna(value = '', inplace = True) #replacing NA with empty values
            df.columns
            for c in df.columns:
                df[c] = df[c].astype(str).str.strip('[') #replacing square brackets with nothing
                df[c] = df[c].astype(str).str.strip(']')  
            if endpoint == 'tickets':
                col = ['created_at', 'updated_at', 'due_by'] #changing datatype to dates as the default data type is object >> text when loaded in postgres
                for cols in col:
                    df[cols] = pd.to_datetime(df[cols]) 
            else: 
                col = ['created_at', 'updated_at']
                for cols in col:
                    df[cols] = pd.to_datetime(df[cols])                 
            i += 1        
            time.sleep(2)  
        else:
          print("Failed to read tickets, errors are displayed below,")
          response = json.loads(r.content)
          print(response["errors"])   
    #outputting results to a table
    with engine.connect() as conn, conn.begin():
         df.to_sql(endpoint, con = conn, schema = 'public', if_exists = 'replace')             
    return df
        
 groups = data_extraction('groups','https://yourFreshDeskAPI.freshdesk.com/api/v2/groups')
 agents = data_extraction('agents','https://yourFreshDeskAPI.freshdesk.com/api/v2/agents')
 companies = data_extraction('companies','https://yourFreshDeskAPI.freshdesk.com/api/v2/companies')
 tickets = data_extraction('tickets','https://yourFreshDeskAPI.freshdesk.com/api/v2/tickets?updated_since=2019-08-01T02:00:00Z')
	# importing libraries
	import json
	import pandas as pd
	import numpy as np
	import requests
	from pandas.io.json import json_normalize
	import time
	import datetime
	from furl import furl
	import re
	from sqlalchemy import create_engine

	#adding connection details
	api_key = "yourFreshDeskAPI"
	domain = "yourFreshDeskDomain" #the part of your Freshdesk URL, e.g.yourFreshDeskDomain.freshdesk.com
	password = "yourFreshDeskPassword" #the password you are using to log to your Freshdesk account
	engine = create_engine('postgresql+psycopg2://username:password@endpoint/dbname') #your database credentials

	#Doing a test API call to get ticket information for all new and open tickets
	#Find the complete API documentation at https://developers.freshdesk.com/api/.

	#initiating a pandas dataframe
	df = pd.DataFrame()
	request = requests.get("https://"+ domain +".freshdesk.com/api/v2/tickets?filter=new_and_my_open", auth = (api_key, password)).json()
	objects = json_normalize(request)
	df = df.append(objects, ignore_index = True)

	#After reviewing the results and reading the documentation, I discovered that:'
	#1. By default, only tickets that have been created within the past 30 days will be returned. For older tickets, use the updated_since filter.
	#2. A maximum of 300 pages (9000 tickets) will be returned.' (documentation)
	#3. All tickets have the updated_at col filled in, thus, updated_since would work as the created_at filter.
	#And if you decide to use created_at as a part of your filter, e.g 'https://yourFreshDeskAPI.freshdesk.com/api/v2/search/tickets?query="updated_at:null%20AND%20created_at:%272019-04-10%27"', remember that 'the number of objects returned per page is 30 <...> and the page number starts with 1 and should not exceed 10' (documentation).

	def data_extraction(endpoint, url):
	"""The function calls an API depending on the endpoint, saves the outcome which later is merged with the relevant tables"""
	i = 1
	page_num = 300 #the max number of pages allowded
	#initiating empty dataframe
	df = pd.DataFrame()
	while i <= page_num:
	param = { 'page': i, 'per_page': 100}
	formatted_url = furl(url).add(param).url
	r = requests.get(formatted_url, auth = (api_key, password))
	if r.status_code == 200:
	response = r.json()
	objects = json_normalize(response) #parsing json
	df = df.append(objects, ignore_index = True) #appending the records
	df.fillna(value = '', inplace = True) #replacing NA with empty values
	df.columns
	for c in df.columns:
	df[c] = df[c].astype(str).str.strip('[') #replacing square brackets with nothing
	df[c] = df[c].astype(str).str.strip(']')
	if endpoint == 'tickets':
	col = ['created_at', 'updated_at', 'due_by'] #changing datatype to dates as the default data type is object >> text when loaded in postgres
	for cols in col:
	df[cols] = pd.to_datetime(df[cols])
	else:
	col = ['created_at', 'updated_at']
	for cols in col:
	df[cols] = pd.to_datetime(df[cols])
	i += 1
	time.sleep(2)
	else:
	print("Failed to read tickets, errors are displayed below,")
	response = json.loads(r.content)
	print(response["errors"])
	#outputting results to a table
	with engine.connect() as conn, conn.begin():
	df.to_sql(endpoint, con = conn, schema = 'public', if_exists = 'replace')
	return df

	groups = data_extraction('groups','https://yourFreshDeskAPI.freshdesk.com/api/v2/groups')
	agents = data_extraction('agents','https://yourFreshDeskAPI.freshdesk.com/api/v2/agents')
	companies = data_extraction('companies','https://yourFreshDeskAPI.freshdesk.com/api/v2/companies')
	tickets = data_extraction('tickets','https://yourFreshDeskAPI.freshdesk.com/api/v2/tickets?updated_since=2019-08-01T02:00:00Z')
No results found