marcosan93 · September 13, 2021 00:10
diff --git a/twint_daily_func.py b/twint_daily_func.py
 def tweetByDay(start, end, df, search, limit=20):
    """
    Runs the twint query everyday between the given dates and returns
    the total dataframe.
    """
    # Finishing the recursive loop
    if start==end:
        # Removing any potential duplicates
        df = df.drop_duplicates(subset="id")
        print(len(df))
        return df
    
    # Getting the new set of tweets for the day
    tweet_df = getTweets(search, end, limit)
    
    # Running the query a few more times in case twint missed some tweets
    run = 0 
    
    while len(tweet_df)==0 or run<=2:
        # Running query again
        tweet_df = getTweets(search, end, limit)
        
        # Counting how many times it ran
        run += 1
        
        # Pausing for a bit
        time.sleep(1)
    
    # Adding the new tweets
    df = df.append(tweet_df, ignore_index=True)
    
    # Updating the new end date
    new_end = (datetime.strptime(end, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
    
    # Printing scraping status
    print(f"\t{len(df)} Total Tweets collected as of {new_end}\t")
    
    # Running the function again
    return tweetByDay(start, new_end, df, search)
	def tweetByDay(start, end, df, search, limit=20):
	"""
	Runs the twint query everyday between the given dates and returns
	the total dataframe.
	"""
	# Finishing the recursive loop
	if start==end:
	# Removing any potential duplicates
	df = df.drop_duplicates(subset="id")
	print(len(df))
	return df

	# Getting the new set of tweets for the day
	tweet_df = getTweets(search, end, limit)

	# Running the query a few more times in case twint missed some tweets
	run = 0

	while len(tweet_df)==0 or run<=2:
	# Running query again
	tweet_df = getTweets(search, end, limit)

	# Counting how many times it ran
	run += 1

	# Pausing for a bit
	time.sleep(1)

	# Adding the new tweets
	df = df.append(tweet_df, ignore_index=True)

	# Updating the new end date
	new_end = (datetime.strptime(end, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")

	# Printing scraping status
	print(f"\t{len(df)} Total Tweets collected as of {new_end}\t")

	# Running the function again
	return tweetByDay(start, new_end, df, search)
No results found