nickpettican · November 9, 2015 23:01
diff --git a/stats02.R b/stats02.R
 # CHAPTER 3

 #ENTRAL TENDENCY

 yvals<-read.csv("C:\\MSc\\Statistics\\Data\\yvalues.csv")
 # open file

 attach(yvals)
 hist(yvals)
 # the data values that occour the most are called 'mode'

 # the most straighforward quantitative measure of central tendency is the arithmetic mean of the data

 total<-sum(y)
 # 'sum' works out the total for any length of vector

 n<-length(y)
 # calculates the length of all data values

 ybar<-total/n
 # calculates the arithmetic mean

 arithmetic.mean<-function(x)sum(x)/length(x)
 # we define a function

 data<-(3,4,6,7)
 # we define a new set of values in order to test the data

 arithmetic.mean(data)
 # it works

 airthmetic.mean(y)

 # R has built-in functions for calculating arithmetic means directly
 mean(y)

 # however, the arithmetic mean is highly sensitive to outliners
 # a single extremely large/small value will have a big effect on the arithmetic mean
 # so we need to use the median which is the middle value of the data set
 # to do this:

 sorted<-sort(y)
 # sorts the data into ascending order

 length(y)/2
 # number of numbers, the middle value is half of this
 # it has to be odd ta have a middle value

 ceiling(length(y)/2)
 # converts the value of length(y)/2 into an integer

 sorted[20]
 # extracts the median value of y

 sorted[ceiling(length(y)/2)]
 # a more general way of writing it

 sort(y)[ceiling(length(y)/2)]
 # another more general way of defining it

 # in the case that the vector contains an even number of numbers
 y.even<-y[-1]
 length(y.even)
 # we need to drop the first element called y

 # we will work out the arithmetic average of the two values of y on either side of the middle
 # in this case, the average of the 19th and 20th sorted values

 sort(y.even)[19]
 sort(y.even)[20]

 # so in this case the median would be:

 (sort(y.even)[19]+sort(y.even)[20])/2

 # but we can replace 19 and 20 with 'ceiling(length(y.even)/2)' and 'ceiling(1+length(y.even)/2)'

 # there is trick to find out if the length is odd or even: 'modulo', which is the remainder when one integer is divided by another
 # even = modulo 0 and odd = modulo 1
 # modulo funtion is '%%'

 38%%2
 # 0
 39%%2
 # 1

 med<-function(x){
  modulo<-length(x)%%2
  if (modulo==0) (sort(x)[ceiling(length(x)/2)]+sort(x)[ceiling(1+length(x)/2)])/2
  else sort(x)[ceiling(length(x)/2)]
  }
 # if we have an even number of number then the 'if' statement in evaluated
 # if we have an odd number of number (modulo==1) then else is evaluated

 med(y)
 med(y.even)

 # there is a built-in function for calculating the median

 median(y)
 median(y.even)

 # PROCESSES THAT CHANGE MULTIPLICATIVELY

 # in these conditions the appropriate measure is the geometric mean
 # definition: the "n"th root of the product of the data

 # E.G. 10,1,1000,1,10 multiplying them together give 100000; we want the fifth root of this (5 numbers)
 # roots are fractional powers, so the 5th root 1/5=0.2
 # R denotes 'powers' by '^'

 100000^0.2
 # 10

 # the arithmetic mean on the other hand would give this

 insects<-c(1,10,1000,10,1)
 mean(insects)
 # 204.4

 # another way to calculate the geometric mean is using algorithms
 # we should be able to calculate it by finding the antilog (exp) of the average of the algorithms (log) of the data

 exp(mean(log(insects)))
 # 10

 # THE HARMONIC MEAN

 # this is the reciprocal of the average of the reciprocals
 # a reciprocal is 'one over'
 # the average of our reciprocals is 'average' and the reciprocal of this average is the harmonic mean

 v<-c(1,2,4,1)
 length(v)/sum(1/v)
 # or
 1/mean(1/v)

 # CHAPTER 4

 #VARIENCE

 # measure of variability is very important
 # the greater the variability the greater will be our uncertainty in the values of parameters estimated from the data
 # and the lower will be our ability to distinguish between competing hypotheses about the data

 y<-c(13,7,5,12,9,15,6,11,9,7,12)
 plot(y)

 range(y)
 # this quantifies the variation

 plot(1:11,y,ylim = c(0,20),pch=16,col="blue")
 lines(c(4.5,4.5),c(5,15),col="brown")
 lines(c(4.5,3.5),c(5,5),col="brown")
 lines(c(4.5,5.5),c(15,15),col="brown")
 # plots a graph with lines delimiting the maximum and minimum values
 # this is too dependent on the outlying values

 plot(1:11,y,ylim = c(0,20),pch=16,col="blue")
 abline(h=mean(y),col="green")
 for(i in 1:11)lines(c(i,i),c(mean(y),y[i]),col="red")
 # this ensures all the data contributes to the measure of variability
 # it also estimates de mean value (green)
 # the (red) lines go from the mean to the values, the longer they are the more variable the data

 # adding the lengths of the red lines proves unyielding as the top positive lines will be cancelled by the negative
 # so we do the SUM OF SQUARES

 y-mean(y)
 (y-mean(y))^2
 sum((y-mean(y))^2)

 # VARIANCE

 variance<-function(x) sum((x-mean(x))^2)/(length(x)-1)
 variance(y)
 var(y)

 ozone<-read.csv("c:\\MSc\\Statistics\\Data\\gardens.csv")
 attach(ozone)
 ozone

 mean(gardenA)
 gardenA-mean(gardenA)
 (gardenA-mean(gardenA))^2
 sum((gardenA-mean(gardenA))^2)/9
 mean(gardenB)
 gardenB-mean(gardenB)
 (gardenB-mean(gardenB))^2
 sum((gardenB-mean(gardenB))^2)
 sum((gardenB-mean(gardenB))^2)/9
 mean(gardenC)
 gardenC-mean(gardenC)
 (gardenC-mean(gardenC))^2
 sum((gardenC-mean(gardenC))^2)
 sum((gardenC-mean(gardenC))^2)/9
 var(gardenC)/var(gardenB)
 2*(1-pf(10.667,9,9))
 var.test(gardenB,gardenC)
 plot(c(0,32),c(0,15),type="n",xlab="Samplesize",ylab="Variance")

 for(n in seq(3,31,2)){
  for(i in 1:30){
    x<-norm(n,mean=10,sd=2)
    points(n,var(x))}}

 sqrt(var(gardenA)/10)
 sqrt(var(gardenB)/10)
 sqrt(var(gardenC)/10)

 qt(.025,9)
 qt(.975,9)
 qt(.995,9)
 qt(.9975,9)
 qt(.975,9)*sqrt(1.33333/10)

 # BOOTSTRAP

 data<-read.csv("c:\\MSc\\Statistics\\Data\\skewdata.csv")
 attach(data)
 names(data)

 plot(c(0,30),c(0,60),type="n",xlab="Sample size",
  ylab="Confidence interval")
 for(k in seq(5,30,3)){
  a<-numeric(10000)
  for(i in 1;10000){
    a[i]<-mean(sample(values,k,replace=T))
  }
 points(c(k,k),quantile(a,c(.025,.975)),type="b",pch=21,bg="red")

 quantile(a,c(0.025,0.975))

 xv<-seq(5,30,0.1)
 yv<-mean(values)+1.96*sqrt(var(values)/xv)
 lines(xv,yv,col="blue")
 yv<-mean(values)-1.96*sqrt(var(values)/xv)
 lines(xv,yv,col="blue")

 yv<-mean(values)-qt(.975,xv-1)*sqrt(var(values)/xv)
 lines(xv,yv,lty=2,col="green")
 yv<-mean(values)+qt(.975,xv-1)*sqrt(var(values)/xv)
 lines(xv,yv,lty=2,col="green")

 # CHAPTER 5

 #DATA SUMMARY IN THE ONE-SAMPLE CASE

 data<-read.csv("c:\\MSc\\Statistics\\Data\\example.csv")
 attach(data)
 summary(y)
 boxplot(y)
 hist(y)
 length(table(y))

 plot(range(y),c(0,10),type="n",xlab="yvalues",ylab="")
 for(i in 1:100)lines(c(y[i],y[i]),c(0,1),col="blue")

 (max(y)-min(y))/10
 diff(range(y))/11

 # NORMAL DISTRIBUTION

 score<-2:12
 ways<-c(1,2,3,4,5,6,5,4,3,2,1)
 game<-rep(score,ways)
 game

 sample(game,1)

 outcome<-numeric(10000)
 for(i in 1:10000) outcome[i]<-sample(game,1)

 hist(outcome,breaks=(1.5:12.5))

 mean.score<-numeric(10000)
 for(i in 1:10000)mean.score[i]<-mean(sample(game,3))
 hist(mean.score,break=(1.5:12.5))

 mean(mean.score)
 sd(mean.score)
 xv<-seq(2,12,0.1)

 yv<-10000*dnorm(xv,mean(mean.score),sd(mean.score))
 hist(mean.score,breaks=(1.5:12.5),ylim=c(0,3000),
  col="yellow",main="")
 lines (xv,yv,col="red")

 standard.deviations<-seq(-3,3,0.01)
 pd<-dnorm(standard.deviations)
 plot(standard.deviations,pd,type="l",col="blue")

 pnorm(-2)
 pnorm(-1)

 1-pnorm(3)

 qnorm(c(0.025,0.0975))

 ht<-seq(150,190,0.01)
 plot(ht,dnorm(ht,170,8),type="l",col="brown",
  ylab="Probability density",xlab="Height")

 pnorm(-1.25)
 pnorm(1.875)
 1-pnorm(1.875)

 pnorm(1.25)-pnorm(-0.625)

 par(mfrow=c(2,2))

 ht<-seq(150,190,0.01)
 pd<-dnorm(ht,170,8)

 plot(ht,dnorm(ht,170,8),type="l",col="brown",
  ylab="Probability density",xlab="Height")

 plot(ht,dnorm(ht,170,8),type="l",col="brown",
  ylab="Probability density",xlab="Height")
 yv<-pd[ht<=160]

 xv<-ht[ht<=160]
 xv<-c(xv,160,150)
 yv<-c(yv,yv[1],yv[1])
 polygon(xv,yv,col="orange")

 plot(ht,dnorm(ht,170,8),type="l",col="brown",
  ylab="Probability density",xlab="Height")

 xv<-ht[ht>=185]
 yv<-pd[ht>=185]
 xv<-c(xv,190,185)
 yv<-c(yv,yv[501],yv[501])
 polygon(xv,yv,col="blue")

 plot(ht,dnorm(ht,170,8),type="l",col="brown",
  ylab="Probability density",xlab="Height")
 xv<-ht[ht>=160 & ht<=180]
 vv<-ht[ht>=160 & ht<=180]
 xv<-c(xv,180,160)
 yv<-c(yv,pd[1],pd[1])
 polygon(xv,yv,col="green")

 # PLOTS FOR TESTING NORMALITY OF SINGLE SAMPLES

 data<-read.csv("c:\\MSc\\Statistics\\Data\\skewdata.csv")
 attach(data)
 qqnorm(values)
 qqline(values,lty=2)

 light<-read.csv("c:\\MSc\\Statistics\\Data\\light.csv")
 attach(light)
 names(light)

 hist(speed)

 summary(speed)

 # INFERENCE IN THE ON-SAMPLE CASE

 wilcox.test(speed,mu=990)

 # BOOTSTRAP IN HYPOTHESIS TESTING WITH SINGLE SAMPLES

 a<-numeric(10000)
 for(i in 1:10000) a[i]<-mean(sample(speed,replace=T))
 hist(a)
 max(a)

 # STUDENT'S t DISTRIBUTION

 plot(c(0,30)c(0,10),type="n",
  xlab="Degrees of freedom",
  ylab="Students t value")
 lines(1:30,qt(0.975,df=1:30),col="red")
 abline(h=1.96,lty=2,col="green")

 xvs<-seq(-4,4,0.01)
 plot(xvs,dnorm(xvs),type="l",
  ylab="Probability density",xlab="Deviates")

 lines(xvs,dt(xvs,df=5),col="red")

 qt(0.975,5)

 # SKEW

 skew<-function(x){
  m3<-sum((x-mean(x))^3)/length(x)
  s3<-sqrt(var(x))^3
  m3/s3 }

 hist(values,main="",col="green")

 skew(values)

 skew(values)/sqrt(6/length(values))

 1-pt(2.949,28)

 skew(sqrt(values))/sqrt(6/length(values))

 skew(log(values))/sqrt(6/length(values))

 # KURTOSIS

 kurtosis<-function(x){
  m4<-sum((x-mean(x))^4)/length(x)
  s4<-var(x)^2
  m4/s4-3 }
  
 kurtosis(values)

 kurtosis(values)/sqrt(24/length(values))
	# CHAPTER 3

	#ENTRAL TENDENCY

	yvals<-read.csv("C:\\MSc\\Statistics\\Data\\yvalues.csv")
	# open file

	attach(yvals)
	hist(yvals)
	# the data values that occour the most are called 'mode'

	# the most straighforward quantitative measure of central tendency is the arithmetic mean of the data

	total<-sum(y)
	# 'sum' works out the total for any length of vector

	n<-length(y)
	# calculates the length of all data values

	ybar<-total/n
	# calculates the arithmetic mean

	arithmetic.mean<-function(x)sum(x)/length(x)
	# we define a function

	data<-(3,4,6,7)
	# we define a new set of values in order to test the data

	arithmetic.mean(data)
	# it works

	airthmetic.mean(y)

	# R has built-in functions for calculating arithmetic means directly
	mean(y)

	# however, the arithmetic mean is highly sensitive to outliners
	# a single extremely large/small value will have a big effect on the arithmetic mean
	# so we need to use the median which is the middle value of the data set
	# to do this:

	sorted<-sort(y)
	# sorts the data into ascending order

	length(y)/2
	# number of numbers, the middle value is half of this
	# it has to be odd ta have a middle value

	ceiling(length(y)/2)
	# converts the value of length(y)/2 into an integer

	sorted[20]
	# extracts the median value of y

	sorted[ceiling(length(y)/2)]
	# a more general way of writing it

	sort(y)[ceiling(length(y)/2)]
	# another more general way of defining it

	# in the case that the vector contains an even number of numbers
	y.even<-y[-1]
	length(y.even)
	# we need to drop the first element called y

	# we will work out the arithmetic average of the two values of y on either side of the middle
	# in this case, the average of the 19th and 20th sorted values

	sort(y.even)[19]
	sort(y.even)[20]

	# so in this case the median would be:

	(sort(y.even)[19]+sort(y.even)[20])/2

	# but we can replace 19 and 20 with 'ceiling(length(y.even)/2)' and 'ceiling(1+length(y.even)/2)'

	# there is trick to find out if the length is odd or even: 'modulo', which is the remainder when one integer is divided by another
	# even = modulo 0 and odd = modulo 1
	# modulo funtion is '%%'

	38%%2
	# 0
	39%%2
	# 1

	med<-function(x){
	modulo<-length(x)%%2
	if (modulo==0) (sort(x)[ceiling(length(x)/2)]+sort(x)[ceiling(1+length(x)/2)])/2
	else sort(x)[ceiling(length(x)/2)]
	}
	# if we have an even number of number then the 'if' statement in evaluated
	# if we have an odd number of number (modulo==1) then else is evaluated

	med(y)
	med(y.even)

	# there is a built-in function for calculating the median

	median(y)
	median(y.even)

	# PROCESSES THAT CHANGE MULTIPLICATIVELY

	# in these conditions the appropriate measure is the geometric mean
	# definition: the "n"th root of the product of the data

	# E.G. 10,1,1000,1,10 multiplying them together give 100000; we want the fifth root of this (5 numbers)
	# roots are fractional powers, so the 5th root 1/5=0.2
	# R denotes 'powers' by '^'

	100000^0.2
	# 10

	# the arithmetic mean on the other hand would give this

	insects<-c(1,10,1000,10,1)
	mean(insects)
	# 204.4

	# another way to calculate the geometric mean is using algorithms
	# we should be able to calculate it by finding the antilog (exp) of the average of the algorithms (log) of the data

	exp(mean(log(insects)))
	# 10

	# THE HARMONIC MEAN

	# this is the reciprocal of the average of the reciprocals
	# a reciprocal is 'one over'
	# the average of our reciprocals is 'average' and the reciprocal of this average is the harmonic mean

	v<-c(1,2,4,1)
	length(v)/sum(1/v)
	# or
	1/mean(1/v)

	# CHAPTER 4

	#VARIENCE

	# measure of variability is very important
	# the greater the variability the greater will be our uncertainty in the values of parameters estimated from the data
	# and the lower will be our ability to distinguish between competing hypotheses about the data

	y<-c(13,7,5,12,9,15,6,11,9,7,12)
	plot(y)

	range(y)
	# this quantifies the variation

	plot(1:11,y,ylim = c(0,20),pch=16,col="blue")
	lines(c(4.5,4.5),c(5,15),col="brown")
	lines(c(4.5,3.5),c(5,5),col="brown")
	lines(c(4.5,5.5),c(15,15),col="brown")
	# plots a graph with lines delimiting the maximum and minimum values
	# this is too dependent on the outlying values

	plot(1:11,y,ylim = c(0,20),pch=16,col="blue")
	abline(h=mean(y),col="green")
	for(i in 1:11)lines(c(i,i),c(mean(y),y[i]),col="red")
	# this ensures all the data contributes to the measure of variability
	# it also estimates de mean value (green)
	# the (red) lines go from the mean to the values, the longer they are the more variable the data

	# adding the lengths of the red lines proves unyielding as the top positive lines will be cancelled by the negative
	# so we do the SUM OF SQUARES

	y-mean(y)
	(y-mean(y))^2
	sum((y-mean(y))^2)

	# VARIANCE

	variance<-function(x) sum((x-mean(x))^2)/(length(x)-1)
	variance(y)
	var(y)

	ozone<-read.csv("c:\\MSc\\Statistics\\Data\\gardens.csv")
	attach(ozone)
	ozone

	mean(gardenA)
	gardenA-mean(gardenA)
	(gardenA-mean(gardenA))^2
	sum((gardenA-mean(gardenA))^2)/9
	mean(gardenB)
	gardenB-mean(gardenB)
	(gardenB-mean(gardenB))^2
	sum((gardenB-mean(gardenB))^2)
	sum((gardenB-mean(gardenB))^2)/9
	mean(gardenC)
	gardenC-mean(gardenC)
	(gardenC-mean(gardenC))^2
	sum((gardenC-mean(gardenC))^2)
	sum((gardenC-mean(gardenC))^2)/9
	var(gardenC)/var(gardenB)
	2*(1-pf(10.667,9,9))
	var.test(gardenB,gardenC)
	plot(c(0,32),c(0,15),type="n",xlab="Samplesize",ylab="Variance")

	for(n in seq(3,31,2)){
	for(i in 1:30){
	x<-norm(n,mean=10,sd=2)
	points(n,var(x))}}

	sqrt(var(gardenA)/10)
	sqrt(var(gardenB)/10)
	sqrt(var(gardenC)/10)

	qt(.025,9)
	qt(.975,9)
	qt(.995,9)
	qt(.9975,9)
	qt(.975,9)*sqrt(1.33333/10)

	# BOOTSTRAP

	data<-read.csv("c:\\MSc\\Statistics\\Data\\skewdata.csv")
	attach(data)
	names(data)

	plot(c(0,30),c(0,60),type="n",xlab="Sample size",
	ylab="Confidence interval")
	for(k in seq(5,30,3)){
	a<-numeric(10000)
	for(i in 1;10000){
	a[i]<-mean(sample(values,k,replace=T))
	}
	points(c(k,k),quantile(a,c(.025,.975)),type="b",pch=21,bg="red")

	quantile(a,c(0.025,0.975))

	xv<-seq(5,30,0.1)
	yv<-mean(values)+1.96*sqrt(var(values)/xv)
	lines(xv,yv,col="blue")
	yv<-mean(values)-1.96*sqrt(var(values)/xv)
	lines(xv,yv,col="blue")

	yv<-mean(values)-qt(.975,xv-1)*sqrt(var(values)/xv)
	lines(xv,yv,lty=2,col="green")
	yv<-mean(values)+qt(.975,xv-1)*sqrt(var(values)/xv)
	lines(xv,yv,lty=2,col="green")

	# CHAPTER 5

	#DATA SUMMARY IN THE ONE-SAMPLE CASE

	data<-read.csv("c:\\MSc\\Statistics\\Data\\example.csv")
	attach(data)
	summary(y)
	boxplot(y)
	hist(y)
	length(table(y))

	plot(range(y),c(0,10),type="n",xlab="yvalues",ylab="")
	for(i in 1:100)lines(c(y[i],y[i]),c(0,1),col="blue")

	(max(y)-min(y))/10
	diff(range(y))/11

	# NORMAL DISTRIBUTION

	score<-2:12
	ways<-c(1,2,3,4,5,6,5,4,3,2,1)
	game<-rep(score,ways)
	game

	sample(game,1)

	outcome<-numeric(10000)
	for(i in 1:10000) outcome[i]<-sample(game,1)

	hist(outcome,breaks=(1.5:12.5))

	mean.score<-numeric(10000)
	for(i in 1:10000)mean.score[i]<-mean(sample(game,3))
	hist(mean.score,break=(1.5:12.5))

	mean(mean.score)
	sd(mean.score)
	xv<-seq(2,12,0.1)

	yv<-10000*dnorm(xv,mean(mean.score),sd(mean.score))
	hist(mean.score,breaks=(1.5:12.5),ylim=c(0,3000),
	col="yellow",main="")
	lines (xv,yv,col="red")

	standard.deviations<-seq(-3,3,0.01)
	pd<-dnorm(standard.deviations)
	plot(standard.deviations,pd,type="l",col="blue")

	pnorm(-2)
	pnorm(-1)

	1-pnorm(3)

	qnorm(c(0.025,0.0975))

	ht<-seq(150,190,0.01)
	plot(ht,dnorm(ht,170,8),type="l",col="brown",
	ylab="Probability density",xlab="Height")

	pnorm(-1.25)
	pnorm(1.875)
	1-pnorm(1.875)

	pnorm(1.25)-pnorm(-0.625)

	par(mfrow=c(2,2))

	ht<-seq(150,190,0.01)
	pd<-dnorm(ht,170,8)

	plot(ht,dnorm(ht,170,8),type="l",col="brown",
	ylab="Probability density",xlab="Height")

	plot(ht,dnorm(ht,170,8),type="l",col="brown",
	ylab="Probability density",xlab="Height")
	yv<-pd[ht<=160]

	xv<-ht[ht<=160]
	xv<-c(xv,160,150)
	yv<-c(yv,yv[1],yv[1])
	polygon(xv,yv,col="orange")

	plot(ht,dnorm(ht,170,8),type="l",col="brown",
	ylab="Probability density",xlab="Height")

	xv<-ht[ht>=185]
	yv<-pd[ht>=185]
	xv<-c(xv,190,185)
	yv<-c(yv,yv[501],yv[501])
	polygon(xv,yv,col="blue")

	plot(ht,dnorm(ht,170,8),type="l",col="brown",
	ylab="Probability density",xlab="Height")
	xv<-ht[ht>=160 & ht<=180]
	vv<-ht[ht>=160 & ht<=180]
	xv<-c(xv,180,160)
	yv<-c(yv,pd[1],pd[1])
	polygon(xv,yv,col="green")

	# PLOTS FOR TESTING NORMALITY OF SINGLE SAMPLES

	data<-read.csv("c:\\MSc\\Statistics\\Data\\skewdata.csv")
	attach(data)
	qqnorm(values)
	qqline(values,lty=2)

	light<-read.csv("c:\\MSc\\Statistics\\Data\\light.csv")
	attach(light)
	names(light)

	hist(speed)

	summary(speed)

	# INFERENCE IN THE ON-SAMPLE CASE

	wilcox.test(speed,mu=990)

	# BOOTSTRAP IN HYPOTHESIS TESTING WITH SINGLE SAMPLES

	a<-numeric(10000)
	for(i in 1:10000) a[i]<-mean(sample(speed,replace=T))
	hist(a)
	max(a)

	# STUDENT'S t DISTRIBUTION

	plot(c(0,30)c(0,10),type="n",
	xlab="Degrees of freedom",
	ylab="Students t value")
	lines(1:30,qt(0.975,df=1:30),col="red")
	abline(h=1.96,lty=2,col="green")

	xvs<-seq(-4,4,0.01)
	plot(xvs,dnorm(xvs),type="l",
	ylab="Probability density",xlab="Deviates")

	lines(xvs,dt(xvs,df=5),col="red")

	qt(0.975,5)

	# SKEW

	skew<-function(x){
	m3<-sum((x-mean(x))^3)/length(x)
	s3<-sqrt(var(x))^3
	m3/s3 }

	hist(values,main="",col="green")

	skew(values)

	skew(values)/sqrt(6/length(values))

	1-pt(2.949,28)

	skew(sqrt(values))/sqrt(6/length(values))

	skew(log(values))/sqrt(6/length(values))

	# KURTOSIS

	kurtosis<-function(x){
	m4<-sum((x-mean(x))^4)/length(x)
	s4<-var(x)^2
	m4/s4-3 }

	kurtosis(values)

	kurtosis(values)/sqrt(24/length(values))