esenthil2018 · October 11, 2020 18:08
diff --git a/Twitter sentiment b/Twitter sentiment
 import requests

 TWITTER_KEY = 'Your twitter key'
 TWITTER_SECRET_KEY = 'Your secret key'

 # Authenticate
 auth = tweepy.AppAuthHandler(TWITTER_KEY, TWITTER_SECRET_KEY)

 api = tweepy.API(auth, wait_on_rate_limit=True,
 				   wait_on_rate_limit_notify=True)

 if (not api):
    print ("Can't Authenticate")
    sys.exit(-1)

 #@title Twitter Search API Inputs
 #@markdown ### Enter Search Query:
 searchQuery = '#giraffe ' #@param {type:"string"}
 #@markdown ### Enter Max Tweets To Scrape:
 #@markdown #### The Twitter API Rate Limit (currently) is 45,000 tweets every 15 minutes.
 maxTweets = 5000 #@param {type:"slider", min:0, max:45000, step:100}
 Filter_Retweets = True #@param {type:"boolean"}

 tweetsPerQry = 100  # this is the max the API permits
 tweet_lst = []

 if Filter_Retweets:
  searchQuery = searchQuery + ' -filter:retweets'  # to exclude retweets

 # If results from a specific ID onwards are reqd, set since_id to that ID.
 # else default to no lower limit, go as far back as API allows
 sinceId = None

 # If results only below a specific ID are, set max_id to that ID.
 # else default to no upper limit, start from the most recent tweet matching the search query.
 max_id = -10000000000
 global vimage
 tweetCount = 0
 print("Downloading max {0} tweets".format(maxTweets))
 while tweetCount < maxTweets:
    try:
        if (max_id <= 0):
            if (not sinceId):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang="en")
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        lang="en", since_id=sinceId)
        else:
            if (not sinceId):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        lang="en", max_id=str(max_id - 1))
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        lang="en", max_id=str(max_id - 1),
                                        since_id=sinceId)
        if not new_tweets:
            print("No more tweets found")
            break
        for tweet in new_tweets:
          if hasattr(tweet, 'reply_count'):
            reply_count = tweet.reply_count
          else:
            reply_count = 0
          if hasattr(tweet, 'retweeted'):
            retweeted = tweet.retweeted
          else:
            retweeted = "NA"
            
          # fixup search query to get topic
          topic = searchQuery[:searchQuery.find('-')].capitalize().strip()
          
          # fixup date
          tweetDate = tweet.created_at.date()
          #url = tweet.entities["media"]["media_url"]

          for media in tweet.entities.get("media",[{}]):

             #checks if there is any media-entity
            if media.get("type",None) == "photo":
              print('test)')
              vurl = media["media_url"]
            # checks if the entity is of the type "photo"
              vimage = requests.get(media["media_url"])
              #vurl = media["media_url"]
              #print(url)
            # save to file etc.
          timage = vimage
          turl = vurl
          tweet_lst.append([tweetDate, topic, 
                      tweet.id, tweet.user.screen_name, tweet.user.name, tweet.text, tweet.favorite_count, 
                      reply_count,tweet.user.location,timage,turl,tweet.user.description, tweet.retweet_count, retweeted])

        tweetCount += len(new_tweets)
        print("Downloaded {0} tweets".format(tweetCount))
        max_id = new_tweets[-1].id
    except tweepy.TweepError as e:
        # Just exit if any error
        print("some error : " + str(e))
        break

 clear_output()
 print("Downloaded {0} tweets".format(tweetCount))

 pd.set_option('display.max_colwidth', -1)

 # load it into a pandas dataframe
 tweet_df = pd.DataFrame(tweet_lst, columns=['tweet_dt', 'topic', 'id', 'screenname','username', 'tweet', 'like_count', 'reply_count','location','timage','turl','description', 'retweet_count', 'retweeted'])
 tweet_df.to_csv('tweets1.csv')
 tweet_df.head()
	import requests

	TWITTER_KEY = 'Your twitter key'
	TWITTER_SECRET_KEY = 'Your secret key'

	# Authenticate
	auth = tweepy.AppAuthHandler(TWITTER_KEY, TWITTER_SECRET_KEY)

	api = tweepy.API(auth, wait_on_rate_limit=True,
	wait_on_rate_limit_notify=True)

	if (not api):
	print ("Can't Authenticate")
	sys.exit(-1)

	#@title Twitter Search API Inputs
	#@markdown ### Enter Search Query:
	searchQuery = '#giraffe ' #@param {type:"string"}
	#@markdown ### Enter Max Tweets To Scrape:
	#@markdown #### The Twitter API Rate Limit (currently) is 45,000 tweets every 15 minutes.
	maxTweets = 5000 #@param {type:"slider", min:0, max:45000, step:100}
	Filter_Retweets = True #@param {type:"boolean"}

	tweetsPerQry = 100 # this is the max the API permits
	tweet_lst = []

	if Filter_Retweets:
	searchQuery = searchQuery + ' -filter:retweets' # to exclude retweets

	# If results from a specific ID onwards are reqd, set since_id to that ID.
	# else default to no lower limit, go as far back as API allows
	sinceId = None

	# If results only below a specific ID are, set max_id to that ID.
	# else default to no upper limit, start from the most recent tweet matching the search query.
	max_id = -10000000000
	global vimage
	tweetCount = 0
	print("Downloading max {0} tweets".format(maxTweets))
	while tweetCount < maxTweets:
	try:
	if (max_id <= 0):
	if (not sinceId):
	new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang="en")
	else:
	new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
	lang="en", since_id=sinceId)
	else:
	if (not sinceId):
	new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
	lang="en", max_id=str(max_id - 1))
	else:
	new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
	lang="en", max_id=str(max_id - 1),
	since_id=sinceId)
	if not new_tweets:
	print("No more tweets found")
	break
	for tweet in new_tweets:
	if hasattr(tweet, 'reply_count'):
	reply_count = tweet.reply_count
	else:
	reply_count = 0
	if hasattr(tweet, 'retweeted'):
	retweeted = tweet.retweeted
	else:
	retweeted = "NA"

	# fixup search query to get topic
	topic = searchQuery[:searchQuery.find('-')].capitalize().strip()

	# fixup date
	tweetDate = tweet.created_at.date()
	#url = tweet.entities["media"]["media_url"]

	for media in tweet.entities.get("media",[{}]):

	#checks if there is any media-entity
	if media.get("type",None) == "photo":
	print('test)')
	vurl = media["media_url"]
	# checks if the entity is of the type "photo"
	vimage = requests.get(media["media_url"])
	#vurl = media["media_url"]
	#print(url)
	# save to file etc.
	timage = vimage
	turl = vurl
	tweet_lst.append([tweetDate, topic,
	tweet.id, tweet.user.screen_name, tweet.user.name, tweet.text, tweet.favorite_count,
	reply_count,tweet.user.location,timage,turl,tweet.user.description, tweet.retweet_count, retweeted])

	tweetCount += len(new_tweets)
	print("Downloaded {0} tweets".format(tweetCount))
	max_id = new_tweets[-1].id
	except tweepy.TweepError as e:
	# Just exit if any error
	print("some error : " + str(e))
	break

	clear_output()
	print("Downloaded {0} tweets".format(tweetCount))

	pd.set_option('display.max_colwidth', -1)

	# load it into a pandas dataframe
	tweet_df = pd.DataFrame(tweet_lst, columns=['tweet_dt', 'topic', 'id', 'screenname','username', 'tweet', 'like_count', 'reply_count','location','timage','turl','description', 'retweet_count', 'retweeted'])
	tweet_df.to_csv('tweets1.csv')
	tweet_df.head()