Skip to content

Instantly share code, notes, and snippets.

@juanalonso
Last active May 16, 2019 18:11
Show Gist options
  • Save juanalonso/aa21da260754e97d46ac5c1b7b9f1881 to your computer and use it in GitHub Desktop.
Save juanalonso/aa21da260754e97d46ac5c1b7b9f1881 to your computer and use it in GitHub Desktop.
Script para scrapear los tweets con un hashtag determinado.
#!/usr/bin/env python
# encoding: utf-8
import tweepy
import re
import sys
import os
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
indexFilePath = os.path.join(os.path.dirname(sys.argv[0]),'index.txt')
if os.path.exists(indexFilePath):
indexFile = open(indexFilePath, 'r', encoding='utf8')
lastTweet = indexFile.readline().strip()
indexFile.close()
else:
lastTweet = '0'
print("\n\n")
print(" Folder:", os.path.dirname(sys.argv[0]))
print("Rate Limit:", api.rate_limit_status()['resources']['search'])
print(" Old index:", lastTweet)
counter = 0
firstTweet = True
csvFile = open(os.path.join(os.path.dirname(sys.argv[0]),'tweets.txt'), 'a', encoding='utf8')
for tweet in tweepy.Cursor(api.search,
q='#sitges2018 -filter:retweets',
lang='es',
tweet_mode='extended').items():
if "@" in tweet.full_text:
continue
if firstTweet:
firstTweet = False
indexFile = open(indexFilePath, 'w', encoding='utf8')
indexFile.write(tweet.id_str + '\n')
print(" New index:", tweet.id_str, '\n\n')
if tweet.id_str <= lastTweet:
break
tweet.full_text = tweet.full_text.replace('\n', ' ').replace('\r', '')
tweet.full_text = tweet.full_text.replace('“', '').replace('”', '')
tweet.full_text = tweet.full_text.replace('-', ' ')
tweet.full_text = tweet.full_text.replace('\'', ' ').replace('"', ' ')
tweet.full_text = tweet.full_text.replace('•', ' ')
tweet.full_text = re.sub(' +',' ',tweet.full_text).strip()
counter = counter + 1
print ('%04d' % counter,
tweet.id_str,
tweet.created_at.strftime('%d-%m %H:%M'),
tweet.full_text[:70])
csvFile.write(tweet.full_text + '\n')
@Hajogit
Copy link

Hajogit commented May 16, 2019

Thank you for this code! It's working, but I'm still getting truncated tweets. Am I doing something wrong?

You included "tweet_mode='extended'", so that can't be the issue.

Please help! I would really appreciate it.

Edit: I figured it out. "tweet.full_text[:70])" below needs to be changed to "tweet.full_text[:])" to display full tweets.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment