Skip to content

Instantly share code, notes, and snippets.

View chengjun's full-sized avatar
🏠
Working from home

Cheng-Jun Wang chengjun

🏠
Working from home
View GitHub Profile
# -*- coding: utf-8 -*-
"""
Spyder Editor
This temporary script file is located here:
D:\chengjun\WinPython-64bit-2.7.6.4\settings\.spyder2\.temp.py
"""
'''
# Step2: split the duplicated data into about 2000+ files by user ids
# !/user/bin/python
# coding: *-- utf-8 --*
from __future__ import division
import os,time,string,random
def network():
#select the content which has been shared more than 500 times
user = {}
fname = 'd:/renren/id2string.txt'
# -*- coding: utf-8 -*-
"""
Spyder Editor
This temporary script file is located here:
C:\Users\chengwang6\.spyder2\.temp.py
"""
# I download the dump from https://ia601509.us.archive.org/10/items/stackexchange/
# I copy the names into a stack_namelist.txt
#---------load data------------#
setwd("F:/digg/")
ft = read.csv("./final_front_zero_mean_time.csv", head=T, na.string='NA', stringsAsFactors=T)
dat = read.csv("./digg_votes_threshold.csv", header=T, stringsAsFactors = F)
# storyid user time threshold
# 1 oay as a session
time = dat$time
time = as.POSIXct(time, origin="1970-01-01")
time = as.numeric(as.Date(time)) - 14394
for i in range(-10, 10):
try:
a = 5/i
print a
except:
print 'i = 0'
if i == 3:
print "i ==3"
break
# -*- coding: utf-8 -*-
"""
Spyder Editor
This temporary script file is located here:
C:\Users\chengwang6\.spyder2\.temp.py
"""
import os
import urllib2
from bs4 import BeautifulSoup
import os
import urllib2
from bs4 import BeautifulSoup
from time import clock
from time import sleep
from random import randint
# Showing 31,065 closed projects with a public gallery. 1295 pages
# aggregate pages-->thread names--->thread replies
'''
@chengjun
chengjun / calculate_sentiment
Last active August 29, 2015 14:03
calculate_sentiment
def caculate_single_sentence_sentiment(group):
gsen = 0
no_words = group[0][0][0][0][0] # 否定词
level_words = group[0][0][0][0][1] # 程度词
sen_words = group[0][0][0][0][2] # 情感词
no_words_positions = [no_word[2] for no_word in no_words]
level_words_positions = [level_word[2] for level_word in level_words]
num_no_words = len(no_words)
weight = 1
# 第一层:根据否定词的个数进行句法分析
@chengjun
chengjun / mass-elite robustness test.R
Last active August 29, 2015 14:02
mass-elite robustness test
######################
"mass-elite robustness test"
######################
topic = read.csv("./human coding_Ashoka_combined_label_20140404.csv",
header = FALSE, stringsAsFactors = FALSE)
cat_names = c("Environment", "Civic Engagement", "Learning/Education",
"Human Rights", "Health", "Economic Development" )
# section_id = 3
@chengjun
chengjun / scrape thread info using mechanize in python
Created June 2, 2014 04:58
scrape thread info using mechanize in python
# -*- coding: utf-8 -*-
"""
Spyder Editor
This temporary script file is located here:
C:\Users\chengwang6\Desktop\WinPython-64bit-2.7.6.4\settings\.spyder2\.temp.py
"""
import mechanize
import cookielib
from bs4 import BeautifulSoup