manmohan24nov’s gists

manmohan24nov / rank_dense_rank_agg.py

Created October 19, 2020 08:06

	In [3]: weekly_sales_df = sales_data[['Store',
	...: 'Date',
	...: 'Weekly_Sales']].groupby(['Date',
	...: 'Store']).agg({'Weekly_Sales':'sum'})

	In [4]: weekly_sales_df.reset_index(inplace=True)

	In [5]: weekly_sales_df['rank']=weekly_sales_df.groupby(['Store'])['Weekly_Sales'].rank(ascending=False)
	...: weekly_sales_df['dense_rank'] = weekly_sales_df.groupby(['Store'])['Weekly_Sales'].rank(method='dense',
	...: ascending=False)

manmohan24nov / percentile_groupby.py

Created October 19, 2020 08:49

	In [2]: weekly_sales_df = sales_data[['Store',
	...: 'Date',
	...: 'Weekly_Sales']].groupby(['Store',
	...: 'Date']).agg({'Weekly_Sales':'mean'})

	In [3]: weekly_sales_df.reset_index(inplace=True)

	In [4]: weekly_sales_df['Percent_weekly_sales'] = weekly_sales_df.groupby(['Date'])['Weekly_Sales'].rank(pct=True,
	...: ascending=False)

manmohan24nov / rolling_agg_function.py

Created October 19, 2020 15:19

	In [2]: sales_data_ordered = sales_data.sort_values(by=['Date'],ascending=False)

	In [3]: sales_data_ordered['Date']= pd.to_datetime(sales_data_ordered['Date'])

	In [5]: weekly_sales_df = sales_data_ordered[['Store',
	...: 'Date',
	...: 'Weekly_Sales']].groupby(['Store',
	...: 'Date']).agg({'Weekly_Sales':'mean'})

	In [6]: weekly_sales_df.reset_index(inplace=True)

manmohan24nov / row_number_agg.py

Created October 19, 2020 15:40

	In [2]: weekly_sales_df = sales_data[['Store',
	...: 'Date',
	...: 'Weekly_Sales']].groupby(['Store',
	...: 'Date']).agg({'Weekly_Sales':'sum'})

	In [3]: weekly_sales_df.reset_index(inplace=True)

	In [4]: weekly_sales_df['Date']= pd.to_datetime(weekly_sales_df['Date'])

	In [5]: weekly_sales_df = weekly_sales_df.sort_values(by='Weekly_Sales',ascending=False)

manmohan24nov / sales_raw_data_agg.py

Created October 20, 2020 16:07

	In [1]: import pandas as pd
	...: import numpy as np
	...: import matplotlib.pyplot as plt
	...: import seaborn as sns
	...: from collections import Counter

	In [2]: sales_data = pd.read_csv('sales_data_set.csv')

	In [3]: sales_data
	Out[3]:

manmohan24nov / fetch_tweeter_data.py

Last active November 3, 2020 15:25

	In [1]: import tweepy

	In [2]: # Twitter API credentials
	...: consumer_key = "consumer key"
	...: consumer_secret = "consumer secret"
	...: access_key = "access key"
	...: access_secret = "access secret"

	In [3]: auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	...: auth.set_access_token(access_key, access_secret)

manmohan24nov / bart_model_summerization.py

Created November 3, 2020 18:44

	>>> from transformers import pipeline
	>>> summarizer = pipeline('summarization', model='facebook/bart-large-cnn', tokenizer='facebook/bart-large-cnn')
	>>> text = " ".join(tweet_data)
	>>> TEXT_CLEANING_RE = "@\S+\|https?:\S+\|http?:\S\|[^A-Za-z0-9]+"
	>>> text = re.sub(TEXT_CLEANING_RE, ' ', str(text).lower()).strip()
	>>> summarizer(text, min_length = round(0.1 * len(text.split(' '))), max_length = round(0.2 * len(text.split(' '))), do_sample=False)
	[{'summary_text': "Don't miss the most comprehensive non stop uselections2020 coverage on india s only global news channel wionews.
	A reminder as you seek comfort food in the days ahead that calories don t count if you don't use a plate handtomouth.
	A new poll shows potus leading in one of the most important swing states pennsylvania."}]

manmohan24nov / gpt_2_summerization.py

Last active November 3, 2020 20:17

	>>> from summarizer import TransformerSummarizer
	>>> import re
	>>> GPT2_model = TransformerSummarizer(transformer_type="GPT2",transformer_model_key="gpt2-medium")
	>>> text = " ".join(tweet_data)
	>>> TEXT_CLEANING_RE = "@\S+\|https?:\S+\|http?:\S\|[^A-Za-z0-9]+"
	>>> text = re.sub(TEXT_CLEANING_RE, ' ', str(text).lower()).strip()
	>>> summerize = ''.join(GPT2_model(text, min_length=60, max_length=120))
	>>> summerize
	'Overnight show with me and a host of brilliant guests on both sides of the at trump s defeat will expose narendramodi to international censure change in the white house likely to force the in in a choice between a clown and a gaffe prone plagiarist tarred by his son s alleged corruption trump deserves th see a detailed map of'

manmohan24nov / xlnet_summerize.py

Created November 3, 2020 20:19

	>>> from summarizer import TransformerSummarizer
	>>> import re
	>>> xlnet_model = TransformerSummarizer(transformer_type="XLNet",transformer_model_key="xlnet-base-cased")
	>>> text = " ".join(tweet_data)
	>>> TEXT_CLEANING_RE = "@\S+\|https?:\S+\|http?:\S\|[^A-Za-z0-9]+"
	>>> text = re.sub(TEXT_CLEANING_RE, ' ', str(text).lower()).strip()
	>>> summerize = ''.join(xlnet_model(text, min_length=60, max_length=120))
	>>> summerize
	"The fixwithohimai and chidiodinkalu look ahead to tomorrow's presidential election. The uselections2020 overnight show will feature guests on both sides of the at trump s defeat.
	A new poll shows potus leading in one of the most important swing states pennsylvania."

manmohan24nov / reddit_summarization_data_clean.py

Last active November 10, 2020 18:40

	In [1]: import praw

	In [2]: import re

	In [3]: reddit = praw.Reddit(client_id='client id',
	...: client_secret='client secret',
	...: user_agent='user agent')

	In [4]: top_posts = reddit.subreddit('showerthoughts').top('week', limit=1)

Manmohan Singh manmohan24nov