Skip to content

Instantly share code, notes, and snippets.

@rolph-recto
Created July 3, 2013 20:12
Show Gist options
  • Save rolph-recto/5922373 to your computer and use it in GitHub Desktop.
Save rolph-recto/5922373 to your computer and use it in GitHub Desktop.
Script to classify tweets manually
# classify_politics.py
# manually classify tweets as political or not
import sys
import json
if __name__ == "__main__":
#read tweets from JSON file
tweets = []
with open(sys.argv[1], "r") as f:
tweets = json.load(f)
#see if user specified a range of tweets to classify
begin = 0
end = len(tweets)
if len(sys.argv) >= 4:
begin = int(sys.argv[2])
end = int(sys.argv[3])
#iterate through each tweet and classify it as political/apolitical
for tweet in tweets[begin:end]:
print "ID: {0} \n {1}".format(tweet["id"], tweet["text"].encode("ascii", "replace"))
#ask the user to classify the tweet
while True:
input = raw_input("Is this a political tweet (y/n)?")
if input == "y":
tweet["political"] = True
break
elif input == "n":
tweet["political"] = False
break
#write back the tweets into the JSON file
with open(sys.argv[1], "w") as f:
tweets = json.dump(tweets, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment