Created
April 29, 2016 23:46
-
-
Save brettclare/3461acff0b5af5def65854a199b1e086 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Enter file contents here#http://blog.impiyush.me/2015/03/data-analysis-using-twitter-api-and.html | |
#http://stackoverflow.com/questions/27555343/python-ms-access-database-table-creation-from-pandas-dataframe-using-sqlalchemy | |
import tweepy | |
import pandas as pd | |
# only goes back one week | |
access_key = "xxx" | |
access_key_secret ="xxx" | |
consumer_key ="xxY" | |
consumer_secret ="xxxe" | |
auth = tweepy.OAuthHandler("xxx", "xxx") | |
auth.set_access_token("4xxx","xxx") | |
api=tweepy.API(auth) | |
result=api.search(q='%Proflowers') | |
#print(len(result)) | |
#tweet=result[0] | |
#analyze the data in one tweet to see what we require | |
#for param in dir(tweet): | |
#the key names beginning with an '_' are hidden and usually not required so are skipped | |
#if not param.startswith("_"): | |
# print ("%s : %s\n" % (param, eval('tweet.'+param))) | |
results = [] | |
for tweet in tweepy.Cursor(api.search, q='%Proflowers').items(400): | |
results.append(tweet) | |
#print (len(results)) | |
# Create a function to convert a given list of tweets into a Pandas DataFrame. | |
# The DataFrame will consist of only the values, which I think might be useful for analysis... | |
def toDataFrame(tweets): | |
DataSet = pd.DataFrame() | |
DataSet['tweetID'] = [tweet.id for tweet in tweets] | |
DataSet['tweetText'] = [tweet.text for tweet in tweets] | |
DataSet['tweetRetweetCt'] = [tweet.retweet_count for tweet in tweets] | |
DataSet['tweetFavoriteCt'] = [tweet.favorite_count for tweet in tweets] | |
DataSet['tweetSource'] = [tweet.source for tweet in tweets] | |
DataSet['tweetCreated'] = [tweet.created_at for tweet in tweets] | |
DataSet['userID'] = [tweet.user.id for tweet in tweets] | |
DataSet['userScreen'] = [tweet.user.screen_name for tweet in tweets] | |
DataSet['userName'] = [tweet.user.name for tweet in tweets] | |
DataSet['userCreateDt'] = [tweet.user.created_at for tweet in tweets] | |
DataSet['userDesc'] = [tweet.user.description for tweet in tweets] | |
DataSet['userFollowerCt'] = [tweet.user.followers_count for tweet in tweets] | |
DataSet['userFriendsCt'] = [tweet.user.friends_count for tweet in tweets] | |
DataSet['userLocation'] = [tweet.user.location for tweet in tweets] | |
DataSet['userTimezone'] = [tweet.user.time_zone for tweet in tweets] | |
return DataSet | |
#Pass the tweets list to the above function to create a DataFrame | |
DataSet = toDataFrame(results) | |
#print(DataSet.items) | |
print(DataSet.head(5)) | |
#print(DataSet.tail(2)) | |
#DataSet=DataSet[DataSet.userTimezone.notnull()] | |
#print(len(DataSet)) | |
#tzs=DataSet['userTimezone'].value_counts()[:10] | |
#print(tzs) | |
writer = pd.ExcelWriter('H:\Python\ProFlowerSharisTwitterHistory.xlsx',engine='xlsxwriter') | |
DataSet.to_excel(writer,sheet_name='PF_Sh_TwitterHistory') | |
writer.save() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment