Skip to content

Instantly share code, notes, and snippets.

@xccds
xccds / svd.R
Last active December 25, 2015 16:39
# 原始文件读入
txt <- readLines('txtdm.txt')
ignore = ",|:|!|'"
stopwords = c('and','edition','for','in','little','of','the','to')
txt <- tolower(txt)
# 文档分词
doc <- strsplit(txt,' ')
# 去除常用词和标点
doc <- lapply(doc,function(x)gsub(ignore,'',x))
@xccds
xccds / Survival.R
Created August 2, 2013 08:21
Example of Survival Analysis
# Example from Survival Analysis- A Self-Learning Text, Third Edition
library(survival)
addicts <- read.table('ADDICTS.txt',T)
names(addicts) <- c('id','clinic','status', 'survt','prison','dose')
# 1. 估计生存函数,观察不同组间的区别
# 建立生存对象
Surv(addicts$survt,addicts$status==1)
@xccds
xccds / qq.R
Created December 13, 2012 13:56
# 数据读入
data <- read.csv('qq.csv',T,stringsAsFactors=F)
data <- data[-nrow(data),] # 最后一行有问题,删除
library(stringr)
library(plyr)
library(lubridate)
library(ggplot2)
library(reshape2)
library(igraph)
library(treemap)
data <- read.csv('d:/olympic.csv',T)
tmPlot(data,
index=c("sports", "events"),
vSize="gold",
vColor="china",
type="value",
title='中国奥运金牌分布',
fontsize.labels=13,
lowerbound.cex.labels=0.7,
library(treemap)
data <- read.csv('d:/sheet1.csv',T)
tmPlot(data,
index=c("item", "subitem"),
vSize="time1206",
vColor="time1106",
type="comp",
title='苹果公司财务报表可视化',
palette='RdBu')
# 读取数据
raw <- read.csv('http://www.stat.yale.edu/~jay/EPI_data_download/EPI_2012_Final_Results.csv',T)
names(raw)
data <- raw[,c(2,7,8,10,23:44)]
data <- data[!is.na(data$GDPgroup),]
library(reshape)
# 数据整理
data.melt <- melt(data,id=c('Country','GDPCAP','GDPgroup'))
data.melt.china <- data.melt[data.melt$Country=='China',]
data.melt.china$variable <- with(data.melt.china,
library(twitteR)
library(plyr)
library(ggplot2)
# 抓取北京和上海空气数据的推文
airb <- userTimeline("beijingair", n=660)
airs <- userTimeline("CGShanghaiAir", n=660)
airg <- userTimeline("Guangzhou_Air", n=660)
#提取文本后用正则表达式分割
# 导入数据
text <- readLines('d:\\honglou.txt',encoding='UTF-8')
library(ggplot2)
library(rmmseg4j)
library(tm)
library(MASS)
library(proxy)
#去除空白行
\documentclass[UTF8,10pt]{ctexart}
\usepackage[a4paper,%%textwidth=129mm,textheight=185mm, %%193-8
text={160mm,260mm},centering]{geometry}
\pagestyle{empty}
\begin{document}
\title{用散点图示范ggplot2的核心概念}
\author{肖凯}
\maketitle
\abstract{
本文稿是第五届R语言会议演讲内容的一部分,试图用散点图示例来说明ggplot2包的核心概念,以方便初学者快速上手。同时这也是笔者应用knitr包的一个练习。该示例所用数据是ggplot2包内带的mpg数据集。}
@xccds
xccds / glmnet.R
Last active November 15, 2020 18:00
library(ggplot2)
library(glmnet)
library(reshape2)
# 读入数据
data <- read.csv('d:/ex2data2.txt',F)
# 散点图
ggplot()+
geom_point(data=data,aes(V1,V2,colour=factor(V3),
shape=factor(V3)),size=3)