kurasaiteja · May 28, 2020 19:19
diff --git a/countwords.py b/countwords.py
 def check_word_in_tweet(word, data):
    """Checks if a word is in a Twitter dataset's text. 
    Checks text and extended tweet (140+ character tweets) for tweets,
    retweets and quoted tweets.
    Returns a logical pandas Series.
    """
    contains_column = data['text'].str.contains(word, case = False)
    contains_column |= data['extended_tweet-full_text'].str.contains(word, case = False)
    contains_column |= data['quoted_status-text'].str.contains(word, case = False) 
    contains_column |= data['retweeted_status-text'].str.contains(word, case = False) 
    return contains_column
  
 # Find mentions of #python in all text fields
 python = check_word_in_tweet('python', df_tweet)
 # Find mentions of #javascript in all text fields
 js = check_word_in_tweet('javascript', df_tweet)

 # Print proportion of tweets mentioning #python
 print("Proportion of #python tweets:", np.sum(python) / df_tweet.shape[0])

 # Print proportion of tweets mentioning #rstats
 print("Proportion of #javascript tweets:", np.sum(js) / df_tweet.shape[0])
	def check_word_in_tweet(word, data):
	"""Checks if a word is in a Twitter dataset's text.
	Checks text and extended tweet (140+ character tweets) for tweets,
	retweets and quoted tweets.
	Returns a logical pandas Series.
	"""
	contains_column = data['text'].str.contains(word, case = False)
	contains_column \|= data['extended_tweet-full_text'].str.contains(word, case = False)
	contains_column \|= data['quoted_status-text'].str.contains(word, case = False)
	contains_column \|= data['retweeted_status-text'].str.contains(word, case = False)
	return contains_column

	# Find mentions of #python in all text fields
	python = check_word_in_tweet('python', df_tweet)
	# Find mentions of #javascript in all text fields
	js = check_word_in_tweet('javascript', df_tweet)

	# Print proportion of tweets mentioning #python
	print("Proportion of #python tweets:", np.sum(python) / df_tweet.shape[0])

	# Print proportion of tweets mentioning #rstats
	print("Proportion of #javascript tweets:", np.sum(js) / df_tweet.shape[0])