##1 Data manipulation
-
If the data contains NA values, it regards it as factor, not numeric.
DATA$COLUMN <- as.numeric(as.character(DATA$COLUMN)) -
Rename the column:
names(DATA)[2] <- "NEW_NAME"
| sample <- read.table("~/Dropbox/sample.txt", header=T, sep="\t") | |
| p <- ggplot(sample) | |
| p <- p + geom_boxplot(aes(x=factor(Type), y=Word, fill=factor(Type)), notch=T, outlier.shape = NA) + | |
| theme(axis.text.x=element_text(angle=15, hjust=0.8, vjust=1, size=12), | |
| axis.text.y=element_text(size=12)) + | |
| guides(fill=F) + scale_fill_grey() + | |
| scale_x_discrete(limits=c("NSR", "stock-related", "NTR", "ticker-related", "NEG", "NEU", "POS")) + | |
| scale_y_continuous(limits = c(0, 30)) | |
| print(p) |
| sample <- read.table("~/Dropbox/sample.txt", header=T, sep="\t") | |
| p <- ggplot(sample) | |
| p <- p + geom_boxplot(aes(x=factor(Type), y=Word, fill=factor(Type)), notch=T, outlier.shape = NA) + | |
| theme(axis.text.x=element_text(angle=15, hjust=0.8, vjust=1, size=12), | |
| axis.text.y=element_text(size=12)) + | |
| guides(fill=F) + scale_fill_grey() + | |
| scale_y_continuous(limits = c(0, 30)) | |
| print(p) |
##1 Data manipulation
If the data contains NA values, it regards it as factor, not numeric.
DATA$COLUMN <- as.numeric(as.character(DATA$COLUMN))
Rename the column:
names(DATA)[2] <- "NEW_NAME"
| ## This script is for crawling tweets with a specific address file. | |
| #!/usr/bin/env bash | |
| DIR=PARENT_DIR/`date "+%d-%m-%y-%H:%M"` #set the download file based to download date | |
| mkdir -p $DIR #make dir according to above | |
| wget -i EXTERNAL_ADDRESS_LIST -np -r -N -l1 -P $DIR |
| m.geQuote <- as.matrix(geQuote[,2:5]) | |
| acf.geQuote <- acf(m.geQuote, lag=5, plot=F, na.action=na.contiguous) | |
| m.acf.geQuote <- melt(acf.geQuote$acf) |
| str(acf.geQuote) | |
| List of 6 | |
| $ acf : num [1:5, 1:4, 1:4] 1 -0.1917 -0.478 0.1049 0.0648 ... | |
| $ type : chr "correlation" | |
| $ n.used: int 5 | |
| $ lag : num [1:5, 1:4, 1:4] 0 1 2 3 4 0 -1 -2 -3 -4 ... | |
| $ series: chr "m.geQuote" | |
| $ snames: chr [1:4] "Open" "Close" "Low" "High" | |
| - attr(*, "class")= chr "acf" |
| p <- ggplot(m.acf.geQuote) | |
| p <- p + geom_raster(aes(x=Var1, y=Var2, lable=value, fill= value)) + | |
| facet_wrap(~Var3, nrow=4) + | |
| ggtitle("Cross-correlation of 4 different prices of GE ticker") + | |
| theme(legend.position="none") + | |
| labs(fill="Correlation") + | |
| xlab("") + ylab("") | |
| print(p) |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 1] <- "Day0" | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 2] <- "Day1" | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 3] <- "Day2" | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 4] <- "Day3" | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 5] <- "Day4" | |
| m.acf.geQuote$Var1 <- factor(m.acf.geQuote$Var1, | |
| + levels=unique(m.acf.geQuote$Var1), ordered=T) | |
| m.acf.geQuote$Var2[m.acf.geQuote$Var2 == 1] <- "Open" | |
| m.acf.geQuote$Var2[m.acf.geQuote$Var2 == 2] <- "Close" |
| m.geQuote <- as.matrix(geQuote[,2:5]) | |
| acf.geQuote <- acf(m.geQuote, lag=5, plot=F, na.action=na.contiguous) | |
| m.acf.geQuote <- melt(acf.geQuote$acf) | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 1] <- "Day0" | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 2] <- "Day1" | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 3] <- "Day2" | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 4] <- "Day3" | |
| m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 5] <- "Day4" |
##AWK notes##
selective printing
awk '$2 ~ regex, { $1="", pring $0}'
If $2 = regex, then print the whole line but not $1