Created
May 28, 2020 19:19
-
-
Save kurasaiteja/df25a4c477e6b625c3474f514cb3d0b0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def check_word_in_tweet(word, data): | |
"""Checks if a word is in a Twitter dataset's text. | |
Checks text and extended tweet (140+ character tweets) for tweets, | |
retweets and quoted tweets. | |
Returns a logical pandas Series. | |
""" | |
contains_column = data['text'].str.contains(word, case = False) | |
contains_column |= data['extended_tweet-full_text'].str.contains(word, case = False) | |
contains_column |= data['quoted_status-text'].str.contains(word, case = False) | |
contains_column |= data['retweeted_status-text'].str.contains(word, case = False) | |
return contains_column | |
# Find mentions of #python in all text fields | |
python = check_word_in_tweet('python', df_tweet) | |
# Find mentions of #javascript in all text fields | |
js = check_word_in_tweet('javascript', df_tweet) | |
# Print proportion of tweets mentioning #python | |
print("Proportion of #python tweets:", np.sum(python) / df_tweet.shape[0]) | |
# Print proportion of tweets mentioning #rstats | |
print("Proportion of #javascript tweets:", np.sum(js) / df_tweet.shape[0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment