Last active
May 6, 2018 04:47
-
-
Save yatt/58e95218a7bdc4a2648c19836b2824cb to your computer and use it in GitHub Desktop.
ツイート全履歴データから、今日から1年前/2年前のツイート一覧を出力する
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python2.7 | |
# coding: utf-8 | |
import datetime | |
# ダウンロードしたツイート全履歴のディレクトリ | |
TWEET_HISOTRY_DIR = u'C:/Users/XXX/twitter全履歴' | |
# JavaScript式をevalするため. | |
false = False | |
true = True | |
def read_status_history_chunk(target_dt): | |
""" | |
指定された日付にあたる年月のツイートを読み込む | |
""" | |
target_ym = u'%d_%02d' % (target_dt.year, target_dt.month) | |
path = (TWEET_HISOTRY_DIR + u'/data/js/tweets/%s.js') % target_ym | |
with open(path) as ifs: | |
ifs.readline() # 代入文の行を除去 | |
content = ifs.read() | |
content = content.replace('"text" : ', '"text" : u') | |
status_history = eval(content) | |
return status_history | |
def pretty_print_status(status): | |
print status['created_at'], status['text'].encode('cp932', 'ignore') | |
def tweets_at(target_dt, exclude_rt=False): | |
""" | |
指定された日付のツイートを拾う | |
""" | |
chunk = read_status_history_chunk(target_dt) | |
target_dt_str = u'%d-%02d-%02d' % (target_dt.year, target_dt.month, target_dt.day) | |
target_dt_status_list = [c for c in chunk if c['created_at'].startswith(target_dt_str)] | |
if exclude_rt: | |
target_dt_status_list = [s for s in target_dt_status_list if not s['text'].startswith('RT')] | |
print u'----------------------------------------' | |
for status in target_dt_status_list: | |
pretty_print_status(status) | |
print u'----------------------------------------' | |
def main(): | |
target_dt = datetime.datetime.now() | |
# 1年前の日付を指定する. | |
target_dt_1y = datetime.datetime(target_dt.year - 1, target_dt.month, target_dt.day) | |
tweets_at(target_dt_1y, exclude_rt=True) | |
# 2年前の日付を指定する. | |
target_dt_2y = datetime.datetime(target_dt.year - 2, target_dt.month, target_dt.day) | |
tweets_at(target_dt_2y, exclude_rt=True) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment