Skip to content

Instantly share code, notes, and snippets.

@vijayanandrp
Last active February 2, 2017 10:20
Show Gist options
  • Save vijayanandrp/8cd9840549b5b268516f1e1ec506e122 to your computer and use it in GitHub Desktop.
Save vijayanandrp/8cd9840549b5b268516f1e1ec506e122 to your computer and use it in GitHub Desktop.
Pandas Recap
# DATE PARSER
def custom_time_parser(date_str):
try:
if '-' in date_str:
# 2013-12-08 16:54:23
return pd.datetime.strptime(date_str, '%Y-%d-%m %H:%M:%S')
elif '/' in date_str:
# 16/07/13 19:36:57
return pd.datetime.strptime(date_str, '%d/%m/%y %H:%M:%S')
else:
return date_str
except (ValueError, TypeError):
# print('Excpetion: {}'.format(date_str))
return date_str
index = 0
fault_values = 0
previous_value = None
for date_value in tickets_df['Open Date'].map(str):
result = custom_time_parser(date_value)
# IF SAME VALUE RETURNED MEANS THE UNKOWN FORMAT IS PROCESSED
if date_value == result:
fault_values += 1
# MODIFY THE EXISTING VALUE
tickets_df['Open Date'][index] = previous_value
else:
previous_value = result
# MODIFY THE EXISTING VALUE
tickets_df['Open Date'][index] = result
index += 1
print('Total Fault Values - {}'.format(fault_values))
import pandas as pd
import os
# GET THE CURRENT DIRECTORY
current_dir = os.getcwd()
# EXCEL FILE TO ANALYSE
excel_file = 'datasets_test.xlsx'
# READ EXCEL FILE
xlsx = pd.ExcelFile(excel_file)
# CONVERT INTO DATA FRAMES
tickets_df = pd.read_excel(xlsx, sheetname='Tickets', index_col=None, na_values=['NA'])
tickets_df.dropna(how='all') # to remove empty rows
# TO READ ANOTHER SHEET FROM SAME FILE
old_tickets_df = pd.read_excel(xlsx, sheetname='Old Tickets', index_col=None, na_values=['NA'])
old_tickets_df.dropna(how='all') # to remove empty rows
xlsx.close()
# HEAD VALUES OF THE DATA FRAMES
tickets_df.head()
# TAILE VALUES OF THE DATA FRAMES
old_tickets_df.tail()
# DIMENSIONS of the dataframe in (row X column) as a tuple
tickets_df.shape # Eg: (5858, 6)
# LENGTH OF THE COLUMN
print(len(tickets_df))
# FETCH COLUMN VALUES
print(ceb_tickets_df.columns)
# Output: Index(['Change ID', 'Title', 'Description', 'Open Date', 'Status', 'Assignment Group'], dtype='object')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment