Skip to content

Instantly share code, notes, and snippets.

View SandroLuck's full-sized avatar

Sandro Luck SandroLuck

View GitHub Profile
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import random
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
get_point = lambda: {'Type': random.choice(['Liker', 'Subscriber', 'Like & Sub']),
'Age': random.randint(20, 40),
'Gender': random.choice(['M', 'W', 'T']),
'MemberSince': random.randint(2010, 2020)}
df = pd.DataFrame([get_point() for i in range(1000)])
# Our ML things
import pytorch_lightning as pl
import torch
from torch.utils.data import DataLoader, Dataset
from captum.attr import IntegratedGradients
from pytorch_lightning import seed_everything
from pytorch_lightning import Trainer
# Visualization
import pandas as pd
# To read Csvs
import pandas as pd
# To plot Csvs
import seaborn as sns
# Library that helps us deal with Windows/Linux/OSX
import os
# The standard python plotting library that people use in python
# Seaborn is build on top of this library
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import urllib.request
from time import sleep
import pandas as pd
import bar_chart_race as bcr
from tqdm import tqdm
import re
import cProfile
find_count = re.compile(r'([0-9,]+) (available|commit)')
START_YEAR = 2000
def search_git_get_count(terms, file_name='freq.csv', language="Python"):
"""
Collects the amount of function in terms calls for all year from 2000 to 2020
:param terms: array of terms that we want to aggreate e.g. ["print", "len"]
:param language: Language we want to search for e.g. "Python"
:return: Filename of the Dataframe including the results
"""
function_calls_by_date = []
print(
f"Starting to gather the data this will take approx. {(len(terms) * (END_YEAR - START_YEAR + 1)) // 10} minutes"
#!/usr/bin/env python3
# The following keys you need to get from Twitter: https://developer.twitter.com/en/apps
# NOTE: It is not good practice to store API-KEYS like this (We only do it for simplicity)
API_KEY = "The key you get from twitter"
API_SECRET = "The key you get from twitter"
ACCESS_TOKEN = "The key you get from twitter"
ACCESS_TOKEN_SECRET = "The key you get from twitter"
import tweepy
def __init__(self):
# Authenticate to Twitter
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
self.api = tweepy.API(auth)
self.str_to_time = lambda x: datetime.strptime(x, '%a %b %d %H:%M:%S %z %Y')
self.selection_function = lambda x: int(x['favorite_count']) + int(x['retweet_count'])
self.last_tweet_time = datetime.now(timezone.utc) - timedelta(minutes=10)
# run it once now and then every 10 minutes after that
def run_bot(self):
OUR_SEARCH_TERMS = ['Python Programming', 'Numpy', 'Pandas Python', 'Pip Python','Python Software Foundation']
try:
found_tweets = []
for term in OUR_SEARCH_TERMS:
found_tweets += self.api.search(term, lang='en', result_type='recent', count=1000)
# remove the unneeded things
found_tweets = [t._json for t in found_tweets]
# make sure not to old
found_tweets = [t for t in found_tweets if self.str_to_time(t['created_at']) > self.last_tweet_time]
if __name__ == '__main__':
seed_everything(42)
device = 'cpu'
early_stop_callback = EarlyStopping(monitor='val_loss', min_delta=0.00, patience=5, verbose=True, mode='auto')
model = OurModel().to(device)
trainer = Trainer(max_epochs=100, min_epochs=1, auto_lr_find=False, auto_scale_batch_size=False,
progress_bar_refresh_rate=10, early_stop_callback=early_stop_callback)
trainer.fit(model)
save(model.state_dict(), 'Location of our saved model')