This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Spyder Editor | |
This temporary script file is located here: | |
D:\chengjun\WinPython-64bit-2.7.6.4\settings\.spyder2\.temp.py | |
""" | |
''' | |
# Step2: split the duplicated data into about 2000+ files by user ids |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# !/user/bin/python | |
# coding: *-- utf-8 --* | |
from __future__ import division | |
import os,time,string,random | |
def network(): | |
#select the content which has been shared more than 500 times | |
user = {} | |
fname = 'd:/renren/id2string.txt' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Spyder Editor | |
This temporary script file is located here: | |
C:\Users\chengwang6\.spyder2\.temp.py | |
""" | |
# I download the dump from https://ia601509.us.archive.org/10/items/stackexchange/ | |
# I copy the names into a stack_namelist.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#---------load data------------# | |
setwd("F:/digg/") | |
ft = read.csv("./final_front_zero_mean_time.csv", head=T, na.string='NA', stringsAsFactors=T) | |
dat = read.csv("./digg_votes_threshold.csv", header=T, stringsAsFactors = F) | |
# storyid user time threshold | |
# 1 oay as a session | |
time = dat$time | |
time = as.POSIXct(time, origin="1970-01-01") | |
time = as.numeric(as.Date(time)) - 14394 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in range(-10, 10): | |
try: | |
a = 5/i | |
print a | |
except: | |
print 'i = 0' | |
if i == 3: | |
print "i ==3" | |
break | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Spyder Editor | |
This temporary script file is located here: | |
C:\Users\chengwang6\.spyder2\.temp.py | |
""" | |
import os | |
import urllib2 | |
from bs4 import BeautifulSoup |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import urllib2 | |
from bs4 import BeautifulSoup | |
from time import clock | |
from time import sleep | |
from random import randint | |
# Showing 31,065 closed projects with a public gallery. 1295 pages | |
# aggregate pages-->thread names--->thread replies | |
''' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def caculate_single_sentence_sentiment(group): | |
gsen = 0 | |
no_words = group[0][0][0][0][0] # 否定词 | |
level_words = group[0][0][0][0][1] # 程度词 | |
sen_words = group[0][0][0][0][2] # 情感词 | |
no_words_positions = [no_word[2] for no_word in no_words] | |
level_words_positions = [level_word[2] for level_word in level_words] | |
num_no_words = len(no_words) | |
weight = 1 | |
# 第一层:根据否定词的个数进行句法分析 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###################### | |
"mass-elite robustness test" | |
###################### | |
topic = read.csv("./human coding_Ashoka_combined_label_20140404.csv", | |
header = FALSE, stringsAsFactors = FALSE) | |
cat_names = c("Environment", "Civic Engagement", "Learning/Education", | |
"Human Rights", "Health", "Economic Development" ) | |
# section_id = 3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Spyder Editor | |
This temporary script file is located here: | |
C:\Users\chengwang6\Desktop\WinPython-64bit-2.7.6.4\settings\.spyder2\.temp.py | |
""" | |
import mechanize | |
import cookielib | |
from bs4 import BeautifulSoup |