This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
from bs4 import BeautifulSoup | |
import sys | |
# get the link for each chapter | |
url = "http://www.23wx.com/html/50/50550/" # 三界独尊 | |
content = urllib2.urlopen(url).read() | |
soup = BeautifulSoup(content) | |
links = soup.find_all('td')[1000:] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue Jul 07 15:40:57 2015 | |
@author: chengwang6 | |
""" | |
import urllib2 | |
from bs4 import BeautifulSoup | |
## Set the seed of crawler | |
seed = 'https://scholar.google.nl/citations?user=nNdt_G8AAAAJ&hl=en&oe=ASCII' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with open("F:/百度云同步盘/Computational Communication/Data/占中数据20150328/zz-hk-2013.1-2013.3.rtf") as f: | |
news = f.readlines() | |
def stringclean(s): | |
s = s.replace(r'\loch\af0\hich\af0\dbch\f15 \b\cf6 ', '') | |
s = s.replace(r'\loch\af0\hich\af0\dbch\f15 \b0\cf0 ', '') | |
s = s.replace('\par', '').replace('\n', '') | |
return s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Title: Back to basics: High quality plots using base R graphics | |
### An interactive tutorial for the Davis R Users Group meeting on April 24, 2015 | |
### | |
### Date created: 20150418 | |
### Last updated: 20150423 | |
### | |
### Author: Michael Koontz | |
### Email: [email protected] | |
### Twitter: @michaeljkoontz | |
### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(igraph) | |
# generate a social graph | |
node_number = 100 | |
g = barabasi.game(node_number) ; plot(g) | |
seeds_num = 1 | |
set.seed(2014); diffusers = sample(V(g),seeds_num) ; diffusers | |
infected =list() | |
infected[[1]]= diffusers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Aug 13 21:28:10 2014 | |
@author: v_chjwang | |
""" | |
from os import listdir | |
import glob | |
from collections import defaultdict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf8 -*- | |
from weibo import APIClient | |
import urllib2 | |
import urllib | |
import sys | |
import time | |
from time import clock | |
import random |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from collections import defaultdict, Counter | |
import glob | |
reload(sys) | |
sys.setdefaultencoding('utf8') | |
path = "D:/chengjun/renren/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
‘’‘ | |
step3: delte duplicates, sort data and save data | |
’‘’ | |
import os | |
import glob | |
from collections import defaultdict | |
path = "D:/renren/friends_sorted/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
# Step2: split the duplicated data into about 2000+ files by user ids | |
# to prepare for deleting the duplicated ties | |
''' | |
from collections import defaultdict | |
path = "D:/renren/" | |
bigfile = open(path + "friends_all.txt") |
NewerOlder