jarsen · March 20, 2015 20:23
diff --git a/playing.jl b/playing.jl
 Pkg.add("Gadfly")
 using Gadfly

 # Random cool meta stuff
 print(code_typed(print, ()))
 :(2 + 2) # quote expressions to see AST
 print(code_lowered(print, ())) # see the lowered AST
 # returns an array because some functions return multiple values

 Pkg.add("Cairo")
 Pkg.add("DataFrames")
 Pkg.add("RDatasets")

 using DataFrames
 using RDatasets

 iris = dataset("datasets", "iris")

 # DataFrames deal with missing data smartly
 v = [0.5, 0.6, 0.7, 0.9]
 mean(v)

 # NA Type like Null, poisons other values, like NaN for floating point numbers
 
 1 + NA
 1 > NA
 typeof(NA)
 isna(NA)

 NA == NA
 NaN == NaN
 NA == 1

 # v = [0.5, 0.6, 0.7, NA, 0.9] # can't do this. use DataArray to store Ts and NAs side by side
 # dv = DataArray([1,2,3])
 # you can add NAs *after* you create the DataArray, not init with
 dv = @data([NA,1,2,3])
 dv[1] = NA
 join(dv, "::")
 mean(dv)
 mean(dropna(dv))

 mean(array(dv, 1)) # subsitution all NAs with 1
 2dv

 # convert DataArray into Array
 convert(Array, dropna(dv))

 # a bunch of convenience constuctors coming from matlab heritage
 DataArray(zeros(Float64, 8))
 ones(Float64, 10)
 falses(10)
 trues(6)
 eye(3) # identity matrix
 diagm(4) # diagonal matrix

 # use this @data macro to make DataArrays
 foo = @data(["John Smith", "Jane Doe"])
 @data(ones(10))

 # MATRIX MATH
 a = DataArray(eye(10))
 a[5,5] = NA
 a = a*a
 print(a)

 print(a.na) # this is a seperate matrix keeping track of NAs

 # singular value decomposition
 svd(a)

 # fast fourier transform...
 # fft(zeros(10))

 # Dealing with Heterogenous Data
 # (each type of column might be different)
 # it's like a very simple implementation of a relation database
 a = [1:5]
 a[1:3]
 a[1:end]
 # a[0:3] 1 indexed!
 in(10, a)
 in(2, a)


 # A and B are our columns
 df = DataFrame(A = 1:4, B=["M", "F", "F", "M"])
 df[:A]
 size(df[:B])
 size(df)
 (4,2)[1]
 nrows = size(df, 1)
 ncols = size(df, 2)

 head(df,2) # look at the first two rows
 tail(df, 2) # look at the last two rows

 # get columns
 df[1] # same as DF[:A]
 df[:A] == df[1]
 mean(df[:A])

 # get rows
 df[2, :]

 describe(df)
 # Type    ASCIIString
 # NAs     0
 # NA%     0.0%
 # Unique  2

 cumsum(df[:A])
 # do to each column
 df = DataFrame(A = 1:4, B = 5:8)
 colwise(cumsum, df)

 # read csv etc http://dataframesjl.readthedocs.org/en/latest/io.html
 # df = readtable("data.csv")
 # write to csv
 # writetable("output.csv", df)
	Pkg.add("Gadfly")
	using Gadfly

	# Random cool meta stuff
	print(code_typed(print, ()))
	:(2 + 2) # quote expressions to see AST
	print(code_lowered(print, ())) # see the lowered AST
	# returns an array because some functions return multiple values

	Pkg.add("Cairo")
	Pkg.add("DataFrames")
	Pkg.add("RDatasets")

	using DataFrames
	using RDatasets

	iris = dataset("datasets", "iris")

	# DataFrames deal with missing data smartly
	v = [0.5, 0.6, 0.7, 0.9]
	mean(v)

	# NA Type like Null, poisons other values, like NaN for floating point numbers

	1 + NA
	1 > NA
	typeof(NA)
	isna(NA)

	NA == NA
	NaN == NaN
	NA == 1

	# v = [0.5, 0.6, 0.7, NA, 0.9] # can't do this. use DataArray to store Ts and NAs side by side
	# dv = DataArray([1,2,3])
	# you can add NAs after you create the DataArray, not init with
	dv = @data([NA,1,2,3])
	dv[1] = NA
	join(dv, "::")
	mean(dv)
	mean(dropna(dv))

	mean(array(dv, 1)) # subsitution all NAs with 1
	2dv

	# convert DataArray into Array
	convert(Array, dropna(dv))

	# a bunch of convenience constuctors coming from matlab heritage
	DataArray(zeros(Float64, 8))
	ones(Float64, 10)
	falses(10)
	trues(6)
	eye(3) # identity matrix
	diagm(4) # diagonal matrix

	# use this @data macro to make DataArrays
	foo = @data(["John Smith", "Jane Doe"])
	@data(ones(10))

	# MATRIX MATH
	a = DataArray(eye(10))
	a[5,5] = NA
	a = a*a
	print(a)

	print(a.na) # this is a seperate matrix keeping track of NAs

	# singular value decomposition
	svd(a)

	# fast fourier transform...
	# fft(zeros(10))

	# Dealing with Heterogenous Data
	# (each type of column might be different)
	# it's like a very simple implementation of a relation database
	a = [1:5]
	a[1:3]
	a[1:end]
	# a[0:3] 1 indexed!
	in(10, a)
	in(2, a)


	# A and B are our columns
	df = DataFrame(A = 1:4, B=["M", "F", "F", "M"])
	df[:A]
	size(df[:B])
	size(df)
	(4,2)[1]
	nrows = size(df, 1)
	ncols = size(df, 2)

	head(df,2) # look at the first two rows
	tail(df, 2) # look at the last two rows

	# get columns
	df[1] # same as DF[:A]
	df[:A] == df[1]
	mean(df[:A])

	# get rows
	df[2, :]

	describe(df)
	# Type ASCIIString
	# NAs 0
	# NA% 0.0%
	# Unique 2

	cumsum(df[:A])
	# do to each column
	df = DataFrame(A = 1:4, B = 5:8)
	colwise(cumsum, df)

	# read csv etc http://dataframesjl.readthedocs.org/en/latest/io.html
	# df = readtable("data.csv")
	# write to csv
	# writetable("output.csv", df)