Skip to content

Instantly share code, notes, and snippets.

@kurasaiteja
Created May 28, 2020 17:59
Show Gist options
  • Save kurasaiteja/fb882cf9e503261042bf644129b3645d to your computer and use it in GitHub Desktop.
Save kurasaiteja/fb882cf9e503261042bf644129b3645d to your computer and use it in GitHub Desktop.
tweets = []
files = list(glob.iglob('/content/tweets.json'))
for f in files:
fh = open(f, 'r', encoding = 'utf-8')
tweets_json = fh.read().split("\n")
## remove empty lines
tweets_json = list(filter(len, tweets_json))
## parse each tweet
for tweet in tweets_json:
tweet_obj = json.loads(tweet)
# Store the user screen name in 'user-screen_name'
tweet_obj['user-screen_name'] = tweet_obj['user']['screen_name']
# Check if this is a 140+ character tweet
if 'extended_tweet' in tweet_obj:
# Store the extended tweet text in 'extended_tweet-full_text'
tweet_obj['extended_tweet-full_text'] = tweet_obj['extended_tweet']['full_text']
if 'retweeted_status' in tweet_obj:
# Store the retweet user screen name in 'retweeted_status-user-screen_name'
tweet_obj['retweeted_status-user-screen_name'] = tweet_obj['retweeted_status']['user']['screen_name']
# Store the retweet text in 'retweeted_status-text'
tweet_obj['retweeted_status-text'] = tweet_obj['retweeted_status']['text']
if 'quoted_status' in tweet_obj:
# Store the retweet user screen name in 'retweeted_status-user-screen_name'
tweet_obj['quoted_status-user-screen_name'] = tweet_obj['quoted_status']['user']['screen_name']
# Store the retweet text in 'retweeted_status-text'
tweet_obj['quoted_status-text'] = tweet_obj['quoted_status']['text']
tweets.append(tweet_obj)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment