Created
March 2, 2013 08:14
-
-
Save gin0606/5070142 to your computer and use it in GitHub Desktop.
Twitterから落とした全ツイートをごにょごにょするためのやつ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8; -*- | |
import codecs | |
import json | |
def parse_tweet_file(file_name): | |
f = codecs.open(file_name, "r", "shift_jis") | |
f.readline() # 一行目は要らない | |
tweets_data = f.read() | |
tweets_data = tweets_data.decode("utf-8") | |
tweets_json = json.loads(tweets_data) | |
for tweet in tweets_json: | |
# 任意の処理とか | |
print tweet['text'] if tweet.has_key('text') else None | |
def parse_tweet_archive(): | |
tweet_index = "./tweets/data/js/tweet_index.js" | |
tweet_index_json = codecs.open(tweet_index, "r", "shift_jis").read() | |
tweet_index_json = tweet_index_json.decode("utf-8") | |
tweet_index_json = tweet_index_json.replace("var tweet_index = ", "") | |
tweet_index_json = json.loads(tweet_index_json) | |
for i in tweet_index_json: | |
tweets_file_name = "./tweets/" + i["file_name"] | |
parse_tweet_file(tweets_file_name) | |
if __name__ == '__main__': | |
parse_tweet_archive() | |
print 'finish' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment