Created
January 8, 2012 05:18
-
-
Save krrrr38/1577321 to your computer and use it in GitHub Desktop.
twitter data #programming
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
'''ハッシュタグ#programmngからStatusListを受け取りファイルへテキストを保存する | |
1,「#programming」を条件にデータを取得 | |
2,「, . ( ) 」をスペースに置換 | |
3,小文字に変換 | |
#4,スペースで文字を分割 | |
#5,文字が#で始まっていたら#を削除 | |
''' | |
import twitter | |
import re | |
def convert(text): | |
text = text.encode('utf-8') | |
text = re.sub(r'[,\.\(\)]',' ', text) | |
text = text.lower() | |
return text | |
def writeFile(searchList, fo): | |
for i in xrange(1, len(searchList)): | |
text = convert(searchList[i].text) | |
fo.write(text + '\n') | |
fo.close() | |
def getTimeLine(LANG = "en", SEARCH_WORD = "#programming", | |
per_page = 100,start_page = 1, end_page = 51): | |
api = twitter.Api() | |
for i in xrange(start_page,end_page): | |
searchList = api.GetSearch( | |
SEARCH_WORD, per_page=per_page, page=i, lang=LANG) | |
fileName = './tmp/'+ LANG + str(i) | |
f = open(fileName, 'w') | |
writeFile(searchList, f) | |
if __name__ == "__main__": | |
getTimeLine() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment