Last active
February 2, 2017 10:20
-
-
Save vijayanandrp/8cd9840549b5b268516f1e1ec506e122 to your computer and use it in GitHub Desktop.
Pandas Recap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# DATE PARSER | |
def custom_time_parser(date_str): | |
try: | |
if '-' in date_str: | |
# 2013-12-08 16:54:23 | |
return pd.datetime.strptime(date_str, '%Y-%d-%m %H:%M:%S') | |
elif '/' in date_str: | |
# 16/07/13 19:36:57 | |
return pd.datetime.strptime(date_str, '%d/%m/%y %H:%M:%S') | |
else: | |
return date_str | |
except (ValueError, TypeError): | |
# print('Excpetion: {}'.format(date_str)) | |
return date_str | |
index = 0 | |
fault_values = 0 | |
previous_value = None | |
for date_value in tickets_df['Open Date'].map(str): | |
result = custom_time_parser(date_value) | |
# IF SAME VALUE RETURNED MEANS THE UNKOWN FORMAT IS PROCESSED | |
if date_value == result: | |
fault_values += 1 | |
# MODIFY THE EXISTING VALUE | |
tickets_df['Open Date'][index] = previous_value | |
else: | |
previous_value = result | |
# MODIFY THE EXISTING VALUE | |
tickets_df['Open Date'][index] = result | |
index += 1 | |
print('Total Fault Values - {}'.format(fault_values)) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import os | |
# GET THE CURRENT DIRECTORY | |
current_dir = os.getcwd() | |
# EXCEL FILE TO ANALYSE | |
excel_file = 'datasets_test.xlsx' | |
# READ EXCEL FILE | |
xlsx = pd.ExcelFile(excel_file) | |
# CONVERT INTO DATA FRAMES | |
tickets_df = pd.read_excel(xlsx, sheetname='Tickets', index_col=None, na_values=['NA']) | |
tickets_df.dropna(how='all') # to remove empty rows | |
# TO READ ANOTHER SHEET FROM SAME FILE | |
old_tickets_df = pd.read_excel(xlsx, sheetname='Old Tickets', index_col=None, na_values=['NA']) | |
old_tickets_df.dropna(how='all') # to remove empty rows | |
xlsx.close() | |
# HEAD VALUES OF THE DATA FRAMES | |
tickets_df.head() | |
# TAILE VALUES OF THE DATA FRAMES | |
old_tickets_df.tail() | |
# DIMENSIONS of the dataframe in (row X column) as a tuple | |
tickets_df.shape # Eg: (5858, 6) | |
# LENGTH OF THE COLUMN | |
print(len(tickets_df)) | |
# FETCH COLUMN VALUES | |
print(ceb_tickets_df.columns) | |
# Output: Index(['Change ID', 'Title', 'Description', 'Open Date', 'Status', 'Assignment Group'], dtype='object') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment