Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save brettclare/3461acff0b5af5def65854a199b1e086 to your computer and use it in GitHub Desktop.
Save brettclare/3461acff0b5af5def65854a199b1e086 to your computer and use it in GitHub Desktop.
Enter file contents here#http://blog.impiyush.me/2015/03/data-analysis-using-twitter-api-and.html
#http://stackoverflow.com/questions/27555343/python-ms-access-database-table-creation-from-pandas-dataframe-using-sqlalchemy
import tweepy
import pandas as pd
# only goes back one week
access_key = "xxx"
access_key_secret ="xxx"
consumer_key ="xxY"
consumer_secret ="xxxe"
auth = tweepy.OAuthHandler("xxx", "xxx")
auth.set_access_token("4xxx","xxx")
api=tweepy.API(auth)
result=api.search(q='%Proflowers')
#print(len(result))
#tweet=result[0]
#analyze the data in one tweet to see what we require
#for param in dir(tweet):
#the key names beginning with an '_' are hidden and usually not required so are skipped
#if not param.startswith("_"):
# print ("%s : %s\n" % (param, eval('tweet.'+param)))
results = []
for tweet in tweepy.Cursor(api.search, q='%Proflowers').items(400):
results.append(tweet)
#print (len(results))
# Create a function to convert a given list of tweets into a Pandas DataFrame.
# The DataFrame will consist of only the values, which I think might be useful for analysis...
def toDataFrame(tweets):
DataSet = pd.DataFrame()
DataSet['tweetID'] = [tweet.id for tweet in tweets]
DataSet['tweetText'] = [tweet.text for tweet in tweets]
DataSet['tweetRetweetCt'] = [tweet.retweet_count for tweet in tweets]
DataSet['tweetFavoriteCt'] = [tweet.favorite_count for tweet in tweets]
DataSet['tweetSource'] = [tweet.source for tweet in tweets]
DataSet['tweetCreated'] = [tweet.created_at for tweet in tweets]
DataSet['userID'] = [tweet.user.id for tweet in tweets]
DataSet['userScreen'] = [tweet.user.screen_name for tweet in tweets]
DataSet['userName'] = [tweet.user.name for tweet in tweets]
DataSet['userCreateDt'] = [tweet.user.created_at for tweet in tweets]
DataSet['userDesc'] = [tweet.user.description for tweet in tweets]
DataSet['userFollowerCt'] = [tweet.user.followers_count for tweet in tweets]
DataSet['userFriendsCt'] = [tweet.user.friends_count for tweet in tweets]
DataSet['userLocation'] = [tweet.user.location for tweet in tweets]
DataSet['userTimezone'] = [tweet.user.time_zone for tweet in tweets]
return DataSet
#Pass the tweets list to the above function to create a DataFrame
DataSet = toDataFrame(results)
#print(DataSet.items)
print(DataSet.head(5))
#print(DataSet.tail(2))
#DataSet=DataSet[DataSet.userTimezone.notnull()]
#print(len(DataSet))
#tzs=DataSet['userTimezone'].value_counts()[:10]
#print(tzs)
writer = pd.ExcelWriter('H:\Python\ProFlowerSharisTwitterHistory.xlsx',engine='xlsxwriter')
DataSet.to_excel(writer,sheet_name='PF_Sh_TwitterHistory')
writer.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment