Skip to content

Instantly share code, notes, and snippets.

@PandaWhoCodes
Created January 27, 2020 18:23
Show Gist options
  • Save PandaWhoCodes/dc4e64e5cfe34483f8f411b69770c8ba to your computer and use it in GitHub Desktop.
Save PandaWhoCodes/dc4e64e5cfe34483f8f411b69770c8ba to your computer and use it in GitHub Desktop.
Extract companies from given twitter profiles
import sys
from utils import save_to_db, get_topics, get_companies, get_products
from tweet_collector.request import collector
import csv
import requests
def get_profiles_from_file(source):
"""
takes the source file location and extracts all the twitter profiles
:param source: text files with twitter profiles
:return: list of twitter profiles
"""
profiles = []
with open(source, "r") as f:
lines = f.read().split("\n")
for line in lines:
try:
if len(line.split()) >= 2:
continue
if line[0] == "@":
profiles.append(line)
else:
profiles.append("@" + line)
except:
continue
return profiles
def make_calls(query):
c = collector(query, 1000)
tweets = c.get_tweets()
try:
save_to_db(querystr=query, tweet_data=tweets)
except:
print(tweets)
return None
def get_profiles(profiles):
"""
parses the profiles from twitter and load them to remote database
:param profiles: profile names
:return: None
"""
for profile in profiles:
print("Getting profile:", profile)
make_calls(profile)
def _get_companies(profiles, destination):
"""
gets the companies from google API and saves them to a csv file
:param profiles: twitter accounts to query
:return: None
"""
for profile in profiles:
companies = get_companies(profile)
with open(destination, 'a', newline='') as newFile:
newFileWriter = csv.writer(newFile)
for company in companies:
newFileWriter.writerow([company])
if __name__ == '__main__':
source = sys.argv[1]
destination = sys.argv[2]
# Initialize the csv file
with open(destination, 'w', newline='') as newFile:
print("created new file: ", destination)
newFileWriter = csv.writer(newFile)
newFileWriter.writerow(['company'])
# parse profile names from source file
profiles = get_profiles_from_file(source)
# Collect tweets from profiles
get_profiles(profiles)
# Get companies from collected profiles
_get_companies(profiles, destination)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment