manmohan24nov’s gists

manmohan24nov / sentiment_textblob.py

Last active March 31, 2021 19:32

	# PRAW to interact with reddit
	import praw
	#install textblob if not already installed using "pip install -U textblob"
	from textblob import TextBlob
	import nltk
	# Download VADER, if not downloaded
	# nltk.download('vader_lexicon')
	from nltk.sentiment.vader import SentimentIntensityAnalyzer

	# create object for VADER sentiment function interaction

manmohan24nov / roberta_twitter_comment.py

Last active April 18, 2021 14:38

	import pandas as pd
	# Recommended tensorflow version is <= 2.1.0, otherwise F1 score function breaks
	import tensorflow as tf
	from sklearn.metrics import f1_score
	from sklearn.model_selection import train_test_split
	import tensorflow_datasets as tfds
	from transformers import TFRobertaForSequenceClassification
	from transformers import RobertaTokenizer

	# Load your Dataset

manmohan24nov / RoBERTa_model_building.py

Last active July 13, 2020 13:32

	import pandas as pd
	# Recommended tensorflow version is <= 2.1.0, otherwise F1 score function breaks
	import tensorflow as tf
	from sklearn.metrics import f1_score
	from sklearn.model_selection import train_test_split
	import tensorflow_datasets as tfds
	from transformers import TFRobertaForSequenceClassification
	from transformers import RobertaTokenizer
	import os

manmohan24nov / reddit_roberta_sentiment_analysis.py

Created July 13, 2020 13:28

	import praw
	import pandas as pd
	from transformers import RobertaTokenizer
	import tensorflow as tf
	from transformers import TFRobertaForSequenceClassification
	import tensorflow_datasets as tfds

	reddit = praw.Reddit(client_id='client id',
	client_secret='client sceret',
	user_agent='user agent')

manmohan24nov / named_entity_bert.py

Created July 30, 2020 07:51

	from transformers import TFAutoModelForTokenClassification, AutoTokenizer
	import tensorflow as tf
	import praw
	import pandas as pd

	model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
	tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

	label_list = [
	"O", # Outside of a named entity

manmohan24nov / reuters_unigram_code.py

Last active September 5, 2020 20:22

	from gensim.parsing.preprocessing
	import remove_stopwords
	import genism
	from wordcloud import WordCloud
	import numpy as np
	import random

	# import stopwords from gensim methods to stop_list variable
	# You can also manually add stopwords
	gensim_stopwords = gensim.parsing.preprocessing.STOPWORDS

manmohan24nov / reuters_ner.py

Created September 5, 2020 13:52

	import spacy
	from matplotlib import cm
	from matplotlib.pyplot import plt

	nlp = spacy.load('en_core_web_sm')
	ner_collection = {"Location":[],"Person":[],"Date":[],"Quantity":[],"Organisation":[]}
	location = []
	person = []
	date = []
	quantity = []

manmohan24nov / eda_descriptive.py

Created September 30, 2020 10:20

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from collections import Counter

	# Loat the train and test data
	train_df = pd.read_csv('train.csv')
	train_df['df_type'] = 'train'
	test_df = pd.read_csv('test.csv')

manmohan24nov / count_descriptive_statistics.py

Created September 30, 2020 10:44

	# import the
	import pandas as pd
	from matplotlib import cm
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib.patches as patches
	import matplotlib.path as path
	import matplotlib.ticker as ticker
	import matplotlib.animation as animation
	import pandas as pd

manmohan24nov / percentiles_descriptive.py

Last active October 1, 2020 15:54

	# prtint min, max, median, first quartile, third quartile and random quartile
	# using .quartile()
	for i in num_col:
	print(f'Min: {train[i].quantile(0)} First Quartile: {train[i].quantile(0.25)}'
	f'Median: {train[i].quantile(0.5)} Third Quartile: {train[i].quantile(0.75)}'
	f'Max: {train[i].quantile(0)} Random Quartile(90%): {train[i].quantile(0.9)}')

	# quartile for categorical variables
	def percentile(n):
	def percentile_(x):

Manmohan Singh manmohan24nov