Created
May 21, 2018 22:46
-
-
Save widoyo/22cf2c06b734f01d08b66b69485c68f5 to your computer and use it in GitHub Desktop.
Temukan Saran Data ketika ketemu data anomaly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Anomali Data Detection | |
import pandas as pd | |
#----------Data Preparation | |
df = pd.read_csv("E:/2.KULIAH/KP2_BBWS/DEBIT/tma_wonogiri.csv",header=None) | |
#colNaming | |
df.columns = ["DATE", "TIME", "OLD_VAL"] | |
#Combine column | |
df["DATETIME"] = df["DATE"].map(str) + " " + df["TIME"] | |
#Drop unused column | |
df.drop(["DATE","TIME"], axis=1, inplace=True) | |
#Reorder the column | |
df = df.reindex(columns=["DATETIME", "OLD_VAL"]) | |
#sort data based on column DATETIME | |
df['DATETIME'] = pd.to_datetime(df["DATETIME"], format='%Y-%m-%d %H:%M:%S') | |
df = df.sort_values(axis=0, ascending=True, by="DATETIME") | |
#reset index | |
df = df.reset_index(drop=True) | |
#Change DataFrame column into list | |
dflist = df["OLD_VAL"].values | |
#Use Standar Deviation to check the data later | |
deviasi = df["OLD_VAL"].std() | |
#----------NEW ADD | |
#Change DataFrame column into list | |
dflist_tanggal = df["DATETIME"].tolist() #output = list of timestamp format | |
#dflist_tanggal = df["DATETIME"].values #output = list of numpy.datetime64 format | |
#---------- | |
#Creating function to detect and replace outlier data | |
def anomali(list_value,list_tanggal,std) : | |
jumlahdata = len(list_value) | |
list_baru=[] | |
for y in xrange(0,jumlahdata): | |
anomali = True | |
if y == 0 : | |
#If first data on list | |
while anomali == True : | |
if (list_value[y] < list_value[y+1]-std) or (list_value[y] > list_value[y+1]+std) : | |
if (list_value[y+1] < list_value[y+2]) : #-----------------------Jika perbandingan data setelahnya cenderung naik | |
newdata = list_value[y+1]-((list_value[y+1]+list_value[y+2])/2) | |
list_data = [list_tanggal[y],list_value[y],newdata] | |
list_baru.append(list_data) | |
elif (list_value[y+1] > list_value[y+2]) : #---------------------Jika perbandingan data setelahnya cenderung turun | |
newdata = list_value[y+1]+((list_value[y+1]+list_value[y+2])/2) | |
list_data = [list_tanggal[y],list_value[y],newdata] | |
list_baru.append(list_data) | |
else : #-------------------------------------------------Jika perbandingan data setelahnya sama | |
newdata = list_value[y+1] | |
list_data = [list_tanggal[y],list_value[y],newdata] | |
list_baru.append(list_data) | |
anomali = False | |
elif y == (jumlahdata-1) : | |
#If last data on list | |
while anomali == True : | |
if (list_value[y] < list_value[y-1]-std) or (list_value[y] > list_value[y-1]+std) : | |
if (list_value[y-2] < list_value[y-1]) : #-----------------------Jika perbandingan data setelahnya cenderung naik | |
newdata = list_value[y-1]+((list_value[y-1]+list_value[y-2])/2) | |
list_data = [list_tanggal[y],list_value[y],newdata] | |
list_baru.append(list_data) | |
elif (list_value[y-2] > list_value[y-1]) : #---------------------Jika perbandingan data setelahnya cenderung turun | |
newdata = list_value[y-1]-((list_value[y-1]+list_value[y-2])/2) | |
list_data = [list_tanggal[y],list_value[y],newdata] | |
list_baru.append(list_data) | |
else : #-------------------------------------------------Jika perbandingan data setelahnya sama | |
newdata = list_value[y+1] | |
list_data = [list_tanggal[y],list_value[y],newdata] | |
list_baru.append(list_data) | |
anomali = False | |
else : | |
#If data is not first or last data | |
while anomali == True : | |
if ((list_value[y] < list_value[y-1]-std) and (list_value[y] < list_value[y+1]-std)) or ((list_value[y] > list_value[y-1]+std) and (list_value[y] > list_value[y+1]+std)) : | |
newdata = (list_value[y-1]+list_value[y+1])/2 | |
list_data = [list_tanggal[y].strftime('%Y-%m-%d %H:%M:%S'),list_value[y],newdata] #-----strftime to convert timestamp into readable string | |
list_baru.append(list_data) | |
anomali = False | |
return list_baru #----------Get list as return | |
#Declare list from function | |
data_anomali=anomali(dflist,dflist_tanggal,deviasi) | |
#Convert List into DataFrame | |
df_anomaly = pd.DataFrame(data_anomali, columns=["WAKTU", "NILAI_ASLI", "NILAI_USULAN"]) | |
#Show the data | |
print(data_anomali) | |
print(len(data_anomali)) | |
print(df_anomaly) | |
''' | |
#Write data into csv file | |
df_anomaly.to_csv("E:/2.KULIAH/KP2_BBWS/DEBIT/tma_usulan.csv",header=True,index=False) | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment