This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 原始文件读入 | |
txt <- readLines('txtdm.txt') | |
ignore = ",|:|!|'" | |
stopwords = c('and','edition','for','in','little','of','the','to') | |
txt <- tolower(txt) | |
# 文档分词 | |
doc <- strsplit(txt,' ') | |
# 去除常用词和标点 | |
doc <- lapply(doc,function(x)gsub(ignore,'',x)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example from Survival Analysis- A Self-Learning Text, Third Edition | |
library(survival) | |
addicts <- read.table('ADDICTS.txt',T) | |
names(addicts) <- c('id','clinic','status', 'survt','prison','dose') | |
# 1. 估计生存函数,观察不同组间的区别 | |
# 建立生存对象 | |
Surv(addicts$survt,addicts$status==1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 数据读入 | |
data <- read.csv('qq.csv',T,stringsAsFactors=F) | |
data <- data[-nrow(data),] # 最后一行有问题,删除 | |
library(stringr) | |
library(plyr) | |
library(lubridate) | |
library(ggplot2) | |
library(reshape2) | |
library(igraph) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(treemap) | |
data <- read.csv('d:/olympic.csv',T) | |
tmPlot(data, | |
index=c("sports", "events"), | |
vSize="gold", | |
vColor="china", | |
type="value", | |
title='中国奥运金牌分布', | |
fontsize.labels=13, | |
lowerbound.cex.labels=0.7, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(treemap) | |
data <- read.csv('d:/sheet1.csv',T) | |
tmPlot(data, | |
index=c("item", "subitem"), | |
vSize="time1206", | |
vColor="time1106", | |
type="comp", | |
title='苹果公司财务报表可视化', | |
palette='RdBu') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 读取数据 | |
raw <- read.csv('http://www.stat.yale.edu/~jay/EPI_data_download/EPI_2012_Final_Results.csv',T) | |
names(raw) | |
data <- raw[,c(2,7,8,10,23:44)] | |
data <- data[!is.na(data$GDPgroup),] | |
library(reshape) | |
# 数据整理 | |
data.melt <- melt(data,id=c('Country','GDPCAP','GDPgroup')) | |
data.melt.china <- data.melt[data.melt$Country=='China',] | |
data.melt.china$variable <- with(data.melt.china, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(twitteR) | |
library(plyr) | |
library(ggplot2) | |
# 抓取北京和上海空气数据的推文 | |
airb <- userTimeline("beijingair", n=660) | |
airs <- userTimeline("CGShanghaiAir", n=660) | |
airg <- userTimeline("Guangzhou_Air", n=660) | |
#提取文本后用正则表达式分割 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 导入数据 | |
text <- readLines('d:\\honglou.txt',encoding='UTF-8') | |
library(ggplot2) | |
library(rmmseg4j) | |
library(tm) | |
library(MASS) | |
library(proxy) | |
#去除空白行 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\documentclass[UTF8,10pt]{ctexart} | |
\usepackage[a4paper,%%textwidth=129mm,textheight=185mm, %%193-8 | |
text={160mm,260mm},centering]{geometry} | |
\pagestyle{empty} | |
\begin{document} | |
\title{用散点图示范ggplot2的核心概念} | |
\author{肖凯} | |
\maketitle | |
\abstract{ | |
本文稿是第五届R语言会议演讲内容的一部分,试图用散点图示例来说明ggplot2包的核心概念,以方便初学者快速上手。同时这也是笔者应用knitr包的一个练习。该示例所用数据是ggplot2包内带的mpg数据集。} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(glmnet) | |
library(reshape2) | |
# 读入数据 | |
data <- read.csv('d:/ex2data2.txt',F) | |
# 散点图 | |
ggplot()+ | |
geom_point(data=data,aes(V1,V2,colour=factor(V3), | |
shape=factor(V3)),size=3) | |