illy

##1 Data manipulation

If the data contains NA values, it regards it as factor, not numeric.
```
 DATA$COLUMN <- as.numeric(as.character(DATA$COLUMN))
```
Rename the column:
```
 names(DATA)[2] <- "NEW_NAME"
```

##AWK notes##

If $2 = regex, then print the whole line but not $1

	sample <- read.table("~/Dropbox/sample.txt", header=T, sep="\t")

	p <- ggplot(sample)
	p <- p + geom_boxplot(aes(x=factor(Type), y=Word, fill=factor(Type)), notch=T, outlier.shape = NA) +
	theme(axis.text.x=element_text(angle=15, hjust=0.8, vjust=1, size=12),
	axis.text.y=element_text(size=12)) +
	guides(fill=F) + scale_fill_grey() +
	scale_x_discrete(limits=c("NSR", "stock-related", "NTR", "ticker-related", "NEG", "NEU", "POS")) +
	scale_y_continuous(limits = c(0, 30))
	print(p)

	sample <- read.table("~/Dropbox/sample.txt", header=T, sep="\t")

	p <- ggplot(sample)
	p <- p + geom_boxplot(aes(x=factor(Type), y=Word, fill=factor(Type)), notch=T, outlier.shape = NA) +
	theme(axis.text.x=element_text(angle=15, hjust=0.8, vjust=1, size=12),
	axis.text.y=element_text(size=12)) +
	guides(fill=F) + scale_fill_grey() +
	scale_y_continuous(limits = c(0, 30))
	print(p)

	## This script is for crawling tweets with a specific address file.


	#!/usr/bin/env bash

	DIR=PARENT_DIR/`date "+%d-%m-%y-%H:%M"` #set the download file based to download date
	mkdir -p $DIR #make dir according to above

	wget -i EXTERNAL_ADDRESS_LIST -np -r -N -l1 -P $DIR

	str(acf.geQuote)

	List of 6
	$ acf : num [1:5, 1:4, 1:4] 1 -0.1917 -0.478 0.1049 0.0648 ...
	$ type : chr "correlation"
	$ n.used: int 5
	$ lag : num [1:5, 1:4, 1:4] 0 1 2 3 4 0 -1 -2 -3 -4 ...
	$ series: chr "m.geQuote"
	$ snames: chr [1:4] "Open" "Close" "Low" "High"
	- attr(*, "class")= chr "acf"

	p <- ggplot(m.acf.geQuote)
	p <- p + geom_raster(aes(x=Var1, y=Var2, lable=value, fill= value)) +
	facet_wrap(~Var3, nrow=4) +
	ggtitle("Cross-correlation of 4 different prices of GE ticker") +
	theme(legend.position="none") +
	labs(fill="Correlation") +
	xlab("") + ylab("")
	print(p)

	m.geQuote <- as.matrix(geQuote[,2:5])
	acf.geQuote <- acf(m.geQuote, lag=5, plot=F, na.action=na.contiguous)
	m.acf.geQuote <- melt(acf.geQuote$acf)

	m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 1] <- "Day0"
	m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 2] <- "Day1"
	m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 3] <- "Day2"
	m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 4] <- "Day3"
	m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 5] <- "Day4"
	m.acf.geQuote$Var1 <- factor(m.acf.geQuote$Var1,
	+ levels=unique(m.acf.geQuote$Var1), ordered=T)

	m.acf.geQuote$Var2[m.acf.geQuote$Var2 == 1] <- "Open"
	m.acf.geQuote$Var2[m.acf.geQuote$Var2 == 2] <- "Close"