Skip to content

Instantly share code, notes, and snippets.

@mia-0032
Last active December 21, 2015 13:09
Show Gist options
  • Select an option

  • Save mia-0032/6310570 to your computer and use it in GitHub Desktop.

Select an option

Save mia-0032/6310570 to your computer and use it in GitHub Desktop.
wコメントとGJコメントの相関ってあるのか調べるためのスクリプト
# -*- coding: utf-8 -*-
import codecs
import MySQLdb
import sys
import config # my_config
sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
def getDbCursor():
db = MySQLdb.connect(user=config.MYSQL_USER,
passwd=config.MYSQL_PASS,
db=config.MYSQL_SCHEMA,
charset=config.MYSQL_CHARSET)
return db.cursor()
def countRegComment(c, regexp):
sql_www = ("SELECT video_id, COUNT(*) AS count " +
"FROM comment " +
"WHERE regularized_comment REGEXP '" +
regexp +
"' GROUP BY video_id;")
print(sql_www)
c.execute(sql_www)
return c.fetchall()
c = getDbCursor()
www_video_list = countRegComment(c, 'w+')
gj_video_list = countRegComment(c, '(GJ)+')
videos = {}
all_video_ids = []
www_videos = {}
gj_videos = {}
for video_id, count in www_video_list:
all_video_ids.append(video_id)
www_videos[video_id] = count
for video_id, count in gj_video_list:
all_video_ids.append(video_id)
gj_videos[video_id] = count
all_video_ids = set(all_video_ids)
f = open('result.txt', 'w+')
for video_id in all_video_ids:
result = [video_id]
if video_id in www_videos:
result.append(str(www_videos[video_id]))
else:
result.append('0')
if video_id in gj_videos:
result.append(str(gj_videos[video_id]))
else:
result.append('0')
f.write("\t".join(result) + "\n")
data = read.table('result.txt', header=TRUE, sep="\t", fileEncoding="utf8")
summary(data)
data$www = log(data$www + 1)
data$gj = log(data$gj + 1)
plot.new()
plot(data$www, data$gj, xlim=c(0, 12), ylim=c(0, 12), xlab="log(www_count)",ylab="log(gj_count)")
#線形回帰してみる
result = lm(gj ~ www, data=data)
par(new=T)
abline(result, col="black")
summary(result)
new <- data.frame(www = seq(0, 12, 0.1))
# 予測区間
result.pre <- predict(result, new, interval="prediction")
par(new=T)
plot(new$www, result.pre[,2], lty=2, col="blue" ,type="l", axes=F, ann = F, xlim=c(0, 12), ylim=c(0, 12))
par(new=T)
plot(new$www, result.pre[,3], lty=2, col="blue" ,type="l", axes=F, ann = F, xlim=c(0, 12), ylim=c(0, 12))
# 信頼区間
result.con <- predict(result, new, interval="confidence")
par(new=T)
plot(new$www, result.con[,2], lty=2, col="red" ,type="l", axes=F, ann = F, xlim=c(0, 12), ylim=c(0, 12))
par(new=T)
plot(new$www, result.con[,3], lty=2, col="red" ,type="l", axes=F, ann = F, xlim=c(0, 12), ylim=c(0, 12))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment