Last active
August 29, 2015 14:22
-
-
Save priyankajayaswal1/4c0e4177f56be1e01a86 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE) | |
x$Date<- as.Date(x$Date,"%d/%m/%Y") | |
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
x$Global_active_power = as.numeric(x$Global_active_power) | |
x$Global_reactive_power = as.numeric(x$Global_reactive_power) | |
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power) | |
x$Day = weekdays(x$Date) | |
# Creating sample pools for six month data | |
first<- as.Date('16-12-2008',format = '%d-%m-%Y') | |
second <- as.Date('30-06-2009',format = '%d-%m-%Y') | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Creating sample pools of data at time 0, 15min, 30min, 45min | |
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'") | |
n = as.numeric(sqldf("select count(distinct Time) from z")) | |
rep_data = sqldf("select count(distinct Time) from z") | |
# Checking for which central tendency is apt | |
t = z | |
#t$Time = format(as.POSIXlt(t$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
tn = as.numeric(sqldf("select count(distinct Day) from t")) | |
final=data.frame( Day=as.Date(character()), | |
Time=character(), | |
Rep_KVA=character(), | |
stringsAsFactors=FALSE) | |
for (j in 1:n) | |
{ | |
e = fn$sqldf("select * from t where Time=(select Time from t limit $j-1,1)") | |
for (i in 1:7) | |
{ | |
f = fn$sqldf("select * from e where Date%7=$i-1") | |
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.") | |
lines(f$Date,f$KVA) | |
# Finding the best optimized central tendency for the dataset | |
#(forparticular weekday and particular time) | |
nn = as.numeric(sqldf("select count(*) from f")) | |
hippo =d = NULL | |
#k=0 | |
for (i1 in seq(1,nn)) | |
{ | |
d=NULL | |
d1 = as.numeric(fn$sqldf("select KVA from f limit $i1-1,1")) | |
for (j1 in 1:nn) | |
{ | |
d2 = as.numeric(fn$sqldf("select KVA from f limit $j1-1,1")) | |
d[j1]=(d2-d1)*.16 | |
} | |
f$hippo[i1]=sum(d) | |
#k = k+1 | |
#f$hippo[k]=hippo[i1] | |
#print(paste0("Hippo: for ", d1, " and ", d2," = ", hippo[i1])) | |
} | |
rep_data=sqldf("select * from f where hippo=(select max(hippo)from f) limit 1") | |
repkva = as.numeric((sqldf("select KVA from rep_data"))) | |
reptime = as.character(sqldf("select Time from rep_data")) | |
repweekday = weekdays(rep_data$Date) | |
final=rbind(final,data.frame(Day=repweekday,Time=reptime,Rep_KVA=repkva)) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE) | |
x$Date<- as.Date(x$Date,"%d/%m/%Y") | |
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
x$Global_active_power = as.numeric(x$Global_active_power) | |
x$Global_reactive_power = as.numeric(x$Global_reactive_power) | |
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power) | |
x$Day = weekdays(x$Date) | |
# Creating sample pools for six month data | |
first<- as.Date('16-12-2008',format = '%d-%m-%Y') | |
second <- as.Date('30-06-2009',format = '%d-%m-%Y') | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Creating sample pools of data at time 0, 15min, 30min, 45min | |
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'") | |
n = as.numeric(sqldf("select count(distinct Time) from z")) | |
rep_data = sqldf("select count(distinct Time) from z") | |
# Checking for which central tendency is apt | |
t = z | |
#t$Time = format(as.POSIXlt(t$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
tn = as.numeric(sqldf("select count(distinct Day) from t")) | |
final=data.frame( Day=as.Date(character()), | |
Time=character(), | |
Rep_KVA=character(), | |
stringsAsFactors=FALSE) | |
for (j in 1:n) | |
{ | |
e = fn$sqldf("select * from t where Time=(select Time from t limit $j-1,1)") | |
for (i in 1:7) | |
{ | |
f = fn$sqldf("select * from e where Date%7=$i-1") | |
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.") | |
lines(f$Date,f$KVA) | |
# Finding the best optimized central tendency for the dataset | |
#(forparticular weekday and particular time) | |
nn = as.numeric(sqldf("select count(*) from f")) | |
hippo =d = NULL | |
#k=0 | |
for (i1 in seq(1,nn)) | |
{ | |
d=NULL | |
d1 = as.numeric(fn$sqldf("select KVA from f limit $i1-1,1")) | |
for (j1 in 1:nn) | |
{ | |
d2 = as.numeric(fn$sqldf("select KVA from f limit $j1-1,1")) | |
d[j1]=(d2-d1)*.16 | |
} | |
f$hippo[i1]=sum(d) | |
#k = k+1 | |
#f$hippo[k]=hippo[i1] | |
#print(paste0("Hippo: for ", d1, " and ", d2," = ", hippo[i1])) | |
} | |
if(!is.na(f$hippo[1])) | |
{ | |
rep_data=sqldf("select * from f where hippo=(select max(hippo)from f) limit 1") | |
repkva = as.numeric((sqldf("select KVA from rep_data"))) | |
reptime = as.character(sqldf("select Time from rep_data")) | |
repweekday = weekdays(rep_data$Date) | |
final=rbind(final,data.frame(Day=repweekday,Time=reptime,Rep_KVA=repkva)) | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
d = NULL | |
hippo=NULL | |
for (i in seq(1,nn)) | |
{ | |
d=NULL | |
d1 = as.numeric(fn$sqldf("select KVA from f limit $i-1,1")) | |
for (j in 1:nn) | |
{ | |
d2 = as.numeric(fn$sqldf("select KVA from f limit $j-1,1")) | |
if(d2>d1) | |
{ | |
d[j]=(d2-d1)*.16 | |
} | |
else d[j]=0 | |
} | |
hippo[i]=sum(d) | |
print(paste0("Hippo: for ", d1, d2, hippo)) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE) | |
x$Date<- as.Date(x$Date,"%d/%m/%Y") | |
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
x$Global_active_power = as.numeric(x$Global_active_power) | |
x$Global_reactive_power = as.numeric(x$Global_reactive_power) | |
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power) | |
# Creating sample pools for six month data | |
first<- as.Date('16-12-2008',format = '%d-%m-%Y') | |
second <- as.Date('30-06-2009',format = '%d-%m-%Y') | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Creating sample pools of data at time 0, 15min, 30min, 45min | |
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'") | |
# Checking for which central tendency is apt (using checking of skewdness and outliners concept). | |
n = as.numeric(sqldf("select count(distinct Time) from z")) | |
t = sqldf("select distinct Time from z") | |
for (j in 1:n) | |
{ | |
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)") | |
plot(e$KVA,xlab='Day No.', main = "Graphical representation of KVA value on 17:30:00 in 6 months.") | |
} | |
summary(e) | |
n = as.numeric(sqldf("select Time(*) as m from e")) | |
for (j in 1:as.numeric(sqldf("select count(*) as m from x"))) | |
{ | |
r <- fn$sqldf("select * from x limit $j-1,1") | |
for( i in 3:length(x)) | |
{ | |
r[as.character(colnames(r)[i])] <- as.numeric(r[as.character(colnames(r)[i])]) | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE) | |
x$Date<- as.Date(x$Date,"%d/%m/%Y") | |
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
x$Global_active_power = as.numeric(x$Global_active_power) | |
x$Global_reactive_power = as.numeric(x$Global_reactive_power) | |
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power) | |
# Creating sample pools for six month data | |
first<- as.Date('16-12-2008',format = '%d-%m-%Y') | |
second <- as.Date('30-06-2009',format = '%d-%m-%Y') | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Creating sample pools of data at time 0, 15min, 30min, 45min | |
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'") | |
# Checking for which central tendency is apt (using checking of skewdness and outliners concept). | |
n = as.numeric(sqldf("select count(distinct Time) from z")) | |
t = sqldf("select distinct Time from z") | |
for (j in 1:n) | |
{ | |
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)") | |
plot(e$KVA,xlab='Day No.', main = "Graphical representation of KVA value on 17:30:00 in 6 months.") | |
} | |
summary(e) | |
n = as.numeric(sqldf("select Time(*) as m from e")) | |
for (j in 1:as.numeric(sqldf("select count(*) as m from x"))) | |
{ | |
r <- fn$sqldf("select * from x limit $j-1,1") | |
for( i in 3:length(x)) | |
{ | |
r[as.character(colnames(r)[i])] <- as.numeric(r[as.character(colnames(r)[i])]) | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE) | |
x$Date<- as.Date(x$Date,"%d/%m/%Y") | |
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
x$Global_active_power = as.numeric(x$Global_active_power) | |
x$Global_reactive_power = as.numeric(x$Global_reactive_power) | |
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power) | |
# Creating sample pools for six month data | |
first<- as.Date('16-12-2008',format = '%d-%m-%Y') | |
second <- as.Date('30-06-2009',format = '%d-%m-%Y') | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Creating sample pools of data at time 0, 15min, 30min, 45min | |
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'") | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Checking for which central tendency is apt (using checking of skewdness and outliners concept). | |
n = as.numeric(sqldf("select count(distinct Time) from z")) | |
t = sqldf("select distinct Time from z") | |
for (j in 1:n) | |
{ | |
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)") | |
for (i in 1:7) | |
{ | |
f = fn$sqldf("select * from e where Date%7=$j") | |
#plot(e$KVA,xlab='Day No.', main = "Graphical representation of KVA value in 6 months for particular time.") | |
qplot(f$Date,f$KVA) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE) | |
x$Date<- as.Date(x$Date,"%d/%m/%Y") | |
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
x$Global_active_power = as.numeric(x$Global_active_power) | |
x$Global_reactive_power = as.numeric(x$Global_reactive_power) | |
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power) | |
# Creating sample pools for six month data | |
first<- as.Date('16-12-2008',format = '%d-%m-%Y') | |
second <- as.Date('30-06-2009',format = '%d-%m-%Y') | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Creating sample pools of data at time 0, 15min, 30min, 45min | |
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'") | |
# Checking for which central tendency is apt (using checking of skewdness and outliners concept). | |
n = as.numeric(sqldf("select count(distinct Time) from z")) | |
t = sqldf("select distinct Time from z") | |
e = sqldf("select * from z where Time=(select Time from t limit 1,1)") | |
f = sqldf("select * from e where Date%7=1") | |
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.") | |
u = lines(f$Date,f$KVA) | |
for (j in 1:n) | |
{ | |
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)") | |
for (i in 1:7) | |
{ | |
f = fn$sqldf("select * from e where Date%7=$i") | |
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.") | |
lines(f$Date,f$KVA) | |
} | |
} | |
nn = as.numeric(sqldf("select count(*) from f")) | |
if(nn%%2==1) | |
{ | |
nn = as.integer(nn/2)+1 | |
} | |
b=NULL | |
for (i in seq(nn/2,nn)) | |
{ | |
b[i-nn/2] = nn*mean(fn$sqldf("select KVA from (select * from f order by KVA Desc) limit $i")$KVA) | |
} | |
sort(b,decreasing = TRUE) | |
trapz(as.vector(f$Date),as.vector(f$KVA)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE) | |
x$Date<- as.Date(x$Date,"%d/%m/%Y") | |
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
x$Global_active_power = as.numeric(x$Global_active_power) | |
x$Global_reactive_power = as.numeric(x$Global_reactive_power) | |
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power) | |
# Creating sample pools for six month data | |
first<- as.Date('16-12-2008',format = '%d-%m-%Y') | |
second <- as.Date('30-06-2009',format = '%d-%m-%Y') | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Creating sample pools of data at time 0, 15min, 30min, 45min | |
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'") | |
# Checking for which central tendency is apt (using checking of skewdness and outliners concept). | |
n = as.numeric(sqldf("select count(distinct Time) from z")) | |
t = sqldf("select distinct Time from z") | |
e = sqldf("select * from z where Time=(select Time from t limit 1,1)") | |
f = sqldf("select * from e where Date%7=1") | |
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.") | |
u = lines(f$Date,f$KVA) | |
for (j in 1:n) | |
{ | |
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)") | |
for (i in 1:7) | |
{ | |
f = fn$sqldf("select * from e where Date%7=$i") | |
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.") | |
lines(f$Date,f$KVA) | |
} | |
} | |
nn = as.numeric(sqldf("select count(*) from f")) | |
d = NULL | |
hippo=NULL | |
k=0 | |
for (i in seq(1,nn)) | |
{ | |
d=NULL | |
d1 = as.numeric(fn$sqldf("select KVA from f limit $i-1,1")) | |
for (j in 1:nn) | |
{ | |
d2 = as.numeric(fn$sqldf("select KVA from f limit $j-1,1")) | |
d[j]=(d2-d1)*.16 | |
} | |
hippo[i]=sum(d) | |
k = k+1 | |
f$hippo[k]=hippo[i] | |
print(paste0("Hippo: for ", d1, " and ", d2," = ", hippo[i])) | |
} | |
if(nn%%2==1) | |
{ | |
nn = as.integer(nn/2)+1 | |
} | |
b=NULL | |
meanfromarea =NULL | |
for (i in seq(nn/2,nn)) | |
{ | |
gg= fn$sqldf("select KVA from (select * from f order by KVA) limit $i") | |
b[i+1-(nn/2)] = mean(gg$KVA) | |
} | |
summary(e) | |
n = as.numeric(sqldf("select Time(*) as m from e")) | |
for (j in 1:as.numeric(sqldf("select count(*) as m from x"))) | |
{ | |
r <- fn$sqldf("select * from x limit $j-1,1") | |
for( i in 3:length(x)) | |
{ | |
r[as.character(colnames(r)[i])] <- as.numeric(r[as.character(colnames(r)[i])]) | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment