Skip to content

Instantly share code, notes, and snippets.

@priyankajayaswal1
Last active August 29, 2015 14:22
Show Gist options
  • Save priyankajayaswal1/4c0e4177f56be1e01a86 to your computer and use it in GitHub Desktop.
Save priyankajayaswal1/4c0e4177f56be1e01a86 to your computer and use it in GitHub Desktop.
library(sqldf)
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE)
x$Date<- as.Date(x$Date,"%d/%m/%Y")
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S")
x$Global_active_power = as.numeric(x$Global_active_power)
x$Global_reactive_power = as.numeric(x$Global_reactive_power)
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power)
x$Day = weekdays(x$Date)
# Creating sample pools for six month data
first<- as.Date('16-12-2008',format = '%d-%m-%Y')
second <- as.Date('30-06-2009',format = '%d-%m-%Y')
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Creating sample pools of data at time 0, 15min, 30min, 45min
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'")
n = as.numeric(sqldf("select count(distinct Time) from z"))
rep_data = sqldf("select count(distinct Time) from z")
# Checking for which central tendency is apt
t = z
#t$Time = format(as.POSIXlt(t$Time, format = "%H:%M:%S"), format="%H:%M:%S")
tn = as.numeric(sqldf("select count(distinct Day) from t"))
final=data.frame( Day=as.Date(character()),
Time=character(),
Rep_KVA=character(),
stringsAsFactors=FALSE)
for (j in 1:n)
{
e = fn$sqldf("select * from t where Time=(select Time from t limit $j-1,1)")
for (i in 1:7)
{
f = fn$sqldf("select * from e where Date%7=$i-1")
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.")
lines(f$Date,f$KVA)
# Finding the best optimized central tendency for the dataset
#(forparticular weekday and particular time)
nn = as.numeric(sqldf("select count(*) from f"))
hippo =d = NULL
#k=0
for (i1 in seq(1,nn))
{
d=NULL
d1 = as.numeric(fn$sqldf("select KVA from f limit $i1-1,1"))
for (j1 in 1:nn)
{
d2 = as.numeric(fn$sqldf("select KVA from f limit $j1-1,1"))
d[j1]=(d2-d1)*.16
}
f$hippo[i1]=sum(d)
#k = k+1
#f$hippo[k]=hippo[i1]
#print(paste0("Hippo: for ", d1, " and ", d2," = ", hippo[i1]))
}
rep_data=sqldf("select * from f where hippo=(select max(hippo)from f) limit 1")
repkva = as.numeric((sqldf("select KVA from rep_data")))
reptime = as.character(sqldf("select Time from rep_data"))
repweekday = weekdays(rep_data$Date)
final=rbind(final,data.frame(Day=repweekday,Time=reptime,Rep_KVA=repkva))
}
}
library(sqldf)
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE)
x$Date<- as.Date(x$Date,"%d/%m/%Y")
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S")
x$Global_active_power = as.numeric(x$Global_active_power)
x$Global_reactive_power = as.numeric(x$Global_reactive_power)
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power)
x$Day = weekdays(x$Date)
# Creating sample pools for six month data
first<- as.Date('16-12-2008',format = '%d-%m-%Y')
second <- as.Date('30-06-2009',format = '%d-%m-%Y')
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Creating sample pools of data at time 0, 15min, 30min, 45min
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'")
n = as.numeric(sqldf("select count(distinct Time) from z"))
rep_data = sqldf("select count(distinct Time) from z")
# Checking for which central tendency is apt
t = z
#t$Time = format(as.POSIXlt(t$Time, format = "%H:%M:%S"), format="%H:%M:%S")
tn = as.numeric(sqldf("select count(distinct Day) from t"))
final=data.frame( Day=as.Date(character()),
Time=character(),
Rep_KVA=character(),
stringsAsFactors=FALSE)
for (j in 1:n)
{
e = fn$sqldf("select * from t where Time=(select Time from t limit $j-1,1)")
for (i in 1:7)
{
f = fn$sqldf("select * from e where Date%7=$i-1")
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.")
lines(f$Date,f$KVA)
# Finding the best optimized central tendency for the dataset
#(forparticular weekday and particular time)
nn = as.numeric(sqldf("select count(*) from f"))
hippo =d = NULL
#k=0
for (i1 in seq(1,nn))
{
d=NULL
d1 = as.numeric(fn$sqldf("select KVA from f limit $i1-1,1"))
for (j1 in 1:nn)
{
d2 = as.numeric(fn$sqldf("select KVA from f limit $j1-1,1"))
d[j1]=(d2-d1)*.16
}
f$hippo[i1]=sum(d)
#k = k+1
#f$hippo[k]=hippo[i1]
#print(paste0("Hippo: for ", d1, " and ", d2," = ", hippo[i1]))
}
if(!is.na(f$hippo[1]))
{
rep_data=sqldf("select * from f where hippo=(select max(hippo)from f) limit 1")
repkva = as.numeric((sqldf("select KVA from rep_data")))
reptime = as.character(sqldf("select Time from rep_data"))
repweekday = weekdays(rep_data$Date)
final=rbind(final,data.frame(Day=repweekday,Time=reptime,Rep_KVA=repkva))
}
}
}
d = NULL
hippo=NULL
for (i in seq(1,nn))
{
d=NULL
d1 = as.numeric(fn$sqldf("select KVA from f limit $i-1,1"))
for (j in 1:nn)
{
d2 = as.numeric(fn$sqldf("select KVA from f limit $j-1,1"))
if(d2>d1)
{
d[j]=(d2-d1)*.16
}
else d[j]=0
}
hippo[i]=sum(d)
print(paste0("Hippo: for ", d1, d2, hippo))
}
library(sqldf)
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE)
x$Date<- as.Date(x$Date,"%d/%m/%Y")
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S")
x$Global_active_power = as.numeric(x$Global_active_power)
x$Global_reactive_power = as.numeric(x$Global_reactive_power)
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power)
# Creating sample pools for six month data
first<- as.Date('16-12-2008',format = '%d-%m-%Y')
second <- as.Date('30-06-2009',format = '%d-%m-%Y')
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Creating sample pools of data at time 0, 15min, 30min, 45min
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'")
# Checking for which central tendency is apt (using checking of skewdness and outliners concept).
n = as.numeric(sqldf("select count(distinct Time) from z"))
t = sqldf("select distinct Time from z")
for (j in 1:n)
{
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)")
plot(e$KVA,xlab='Day No.', main = "Graphical representation of KVA value on 17:30:00 in 6 months.")
}
summary(e)
n = as.numeric(sqldf("select Time(*) as m from e"))
for (j in 1:as.numeric(sqldf("select count(*) as m from x")))
{
r <- fn$sqldf("select * from x limit $j-1,1")
for( i in 3:length(x))
{
r[as.character(colnames(r)[i])] <- as.numeric(r[as.character(colnames(r)[i])])
}
}
library(sqldf)
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE)
x$Date<- as.Date(x$Date,"%d/%m/%Y")
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S")
x$Global_active_power = as.numeric(x$Global_active_power)
x$Global_reactive_power = as.numeric(x$Global_reactive_power)
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power)
# Creating sample pools for six month data
first<- as.Date('16-12-2008',format = '%d-%m-%Y')
second <- as.Date('30-06-2009',format = '%d-%m-%Y')
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Creating sample pools of data at time 0, 15min, 30min, 45min
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'")
# Checking for which central tendency is apt (using checking of skewdness and outliners concept).
n = as.numeric(sqldf("select count(distinct Time) from z"))
t = sqldf("select distinct Time from z")
for (j in 1:n)
{
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)")
plot(e$KVA,xlab='Day No.', main = "Graphical representation of KVA value on 17:30:00 in 6 months.")
}
summary(e)
n = as.numeric(sqldf("select Time(*) as m from e"))
for (j in 1:as.numeric(sqldf("select count(*) as m from x")))
{
r <- fn$sqldf("select * from x limit $j-1,1")
for( i in 3:length(x))
{
r[as.character(colnames(r)[i])] <- as.numeric(r[as.character(colnames(r)[i])])
}
}
library(sqldf)
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE)
x$Date<- as.Date(x$Date,"%d/%m/%Y")
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S")
x$Global_active_power = as.numeric(x$Global_active_power)
x$Global_reactive_power = as.numeric(x$Global_reactive_power)
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power)
# Creating sample pools for six month data
first<- as.Date('16-12-2008',format = '%d-%m-%Y')
second <- as.Date('30-06-2009',format = '%d-%m-%Y')
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Creating sample pools of data at time 0, 15min, 30min, 45min
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'")
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Checking for which central tendency is apt (using checking of skewdness and outliners concept).
n = as.numeric(sqldf("select count(distinct Time) from z"))
t = sqldf("select distinct Time from z")
for (j in 1:n)
{
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)")
for (i in 1:7)
{
f = fn$sqldf("select * from e where Date%7=$j")
#plot(e$KVA,xlab='Day No.', main = "Graphical representation of KVA value in 6 months for particular time.")
qplot(f$Date,f$KVA)
}
}
library(sqldf)
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE)
x$Date<- as.Date(x$Date,"%d/%m/%Y")
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S")
x$Global_active_power = as.numeric(x$Global_active_power)
x$Global_reactive_power = as.numeric(x$Global_reactive_power)
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power)
# Creating sample pools for six month data
first<- as.Date('16-12-2008',format = '%d-%m-%Y')
second <- as.Date('30-06-2009',format = '%d-%m-%Y')
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Creating sample pools of data at time 0, 15min, 30min, 45min
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'")
# Checking for which central tendency is apt (using checking of skewdness and outliners concept).
n = as.numeric(sqldf("select count(distinct Time) from z"))
t = sqldf("select distinct Time from z")
e = sqldf("select * from z where Time=(select Time from t limit 1,1)")
f = sqldf("select * from e where Date%7=1")
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.")
u = lines(f$Date,f$KVA)
for (j in 1:n)
{
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)")
for (i in 1:7)
{
f = fn$sqldf("select * from e where Date%7=$i")
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.")
lines(f$Date,f$KVA)
}
}
nn = as.numeric(sqldf("select count(*) from f"))
if(nn%%2==1)
{
nn = as.integer(nn/2)+1
}
b=NULL
for (i in seq(nn/2,nn))
{
b[i-nn/2] = nn*mean(fn$sqldf("select KVA from (select * from f order by KVA Desc) limit $i")$KVA)
}
sort(b,decreasing = TRUE)
trapz(as.vector(f$Date),as.vector(f$KVA))
library(sqldf)
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE)
x$Date<- as.Date(x$Date,"%d/%m/%Y")
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S")
x$Global_active_power = as.numeric(x$Global_active_power)
x$Global_reactive_power = as.numeric(x$Global_reactive_power)
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power)
# Creating sample pools for six month data
first<- as.Date('16-12-2008',format = '%d-%m-%Y')
second <- as.Date('30-06-2009',format = '%d-%m-%Y')
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Creating sample pools of data at time 0, 15min, 30min, 45min
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'")
# Checking for which central tendency is apt (using checking of skewdness and outliners concept).
n = as.numeric(sqldf("select count(distinct Time) from z"))
t = sqldf("select distinct Time from z")
e = sqldf("select * from z where Time=(select Time from t limit 1,1)")
f = sqldf("select * from e where Date%7=1")
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.")
u = lines(f$Date,f$KVA)
for (j in 1:n)
{
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)")
for (i in 1:7)
{
f = fn$sqldf("select * from e where Date%7=$i")
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.")
lines(f$Date,f$KVA)
}
}
nn = as.numeric(sqldf("select count(*) from f"))
d = NULL
hippo=NULL
k=0
for (i in seq(1,nn))
{
d=NULL
d1 = as.numeric(fn$sqldf("select KVA from f limit $i-1,1"))
for (j in 1:nn)
{
d2 = as.numeric(fn$sqldf("select KVA from f limit $j-1,1"))
d[j]=(d2-d1)*.16
}
hippo[i]=sum(d)
k = k+1
f$hippo[k]=hippo[i]
print(paste0("Hippo: for ", d1, " and ", d2," = ", hippo[i]))
}
if(nn%%2==1)
{
nn = as.integer(nn/2)+1
}
b=NULL
meanfromarea =NULL
for (i in seq(nn/2,nn))
{
gg= fn$sqldf("select KVA from (select * from f order by KVA) limit $i")
b[i+1-(nn/2)] = mean(gg$KVA)
}
summary(e)
n = as.numeric(sqldf("select Time(*) as m from e"))
for (j in 1:as.numeric(sqldf("select count(*) as m from x")))
{
r <- fn$sqldf("select * from x limit $j-1,1")
for( i in 3:length(x))
{
r[as.character(colnames(r)[i])] <- as.numeric(r[as.character(colnames(r)[i])])
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment