Himanshu Lohiya himlohiya

3 followers · 14 following

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

himlohiya / most_negative_positive.py

Created June 30, 2018 12:53

	pos_idx = df[(df.news_category=='technology') & (df.sentiment_score == 6)].index[0]
	neg_idx = df[(df.news_category=='technology') & (df.sentiment_score == -15)].index[0]

	print('Most Negative Tech News Article:', news_df.iloc[neg_idx][['news_article']][0])
	print()
	print('Most Positive Tech News Article:', news_df.iloc[pos_idx][['news_article']][0])

himlohiya / sentiment_category.py

Created June 30, 2018 12:47

	fc = sns.factorplot(x="news_category", hue="sentiment_category",
	data=df, kind="count",
	palette={"negative": "#FE2020",
	"positive": "#BADD07",
	"neutral": "#68BFF5"})

himlohiya / sentiment_visualisations.py

Created June 30, 2018 12:39

	f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))
	sp = sns.stripplot(x='news_category', y="sentiment_score",
	hue='news_category', data=df, ax=ax1)
	bp = sns.boxplot(x='news_category', y="sentiment_score",
	hue='news_category', data=df, palette="Set2", ax=ax2)
	t = f.suptitle('Visualizing News Sentiment', fontsize=14

himlohiya / sentiment_analysis_afinn.py

Created June 30, 2018 11:25

	# initialize afinn sentiment analyzer
	from afinn import Afinn
	af = Afinn()

	# compute sentiment scores (polarity) and labels
	sentiment_scores = [af.score(article) for article in corpus]
	sentiment_category = ['positive' if score > 0
	else 'negative' if score < 0
	else 'neutral'
	for score in sentiment_scores]

himlohiya / top_ner.py

Created June 29, 2018 19:14

	named_entities = []
	for sentence in corpus:
	temp_entity_name = ''
	temp_named_entity = None
	sentence = nlp(sentence)
	for word in sentence:
	term = word.text
	tag = word.ent_type_
	if tag:
	temp_entity_name = ' '.join([temp_entity_name, term]).strip()

himlohiya / ner.py

Created June 29, 2018 19:10

	ner(sentence) {
	sentence_nlp = nlp(sentence)
	# print named entities in article
	print([(word, word.ent_type_) for word in sentence_nlp if word.ent_type_])

	# visualize named entities
	displacy.render(sentence_nlp, style='ent', jupyter=True)
	}

himlohiya / pos_tagging.py

Created June 29, 2018 19:02

	pos_tagging_spacy(sentence) {
	sentence_nlp = nlp(sentence)

	# POS tagging with Spacy
	spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in sentence_nlp]
	pd.DataFrame(spacy_pos_tagged, columns=['Word', 'POS tag', 'Tag type'])
	}

	pos_tagging_nltk(sentence) {
	# POS tagging with nltk

himlohiya / normalize_corpus.py

Last active July 6, 2018 21:06

	def normalize_corpus(corpus, html_stripping=True, contraction_expansion=True,
	accented_char_removal=True, text_lower_case=True,
	text_lemmatization=True, special_char_removal=True,
	stopword_removal=True, remove_digits=True):

	normalized_corpus = []
	# normalize each document in the corpus
	for doc in corpus:
	# strip HTML
	if html_stripping:

himlohiya / remove_stopwords.py

Created June 29, 2018 18:47

	def remove_stopwords(text, is_lower_case=False):
	tokens = tokenizer.tokenize(text)
	tokens = [token.strip() for token in tokens]
	if is_lower_case:
	filtered_tokens = [token for token in tokens if token not in stopword_list]
	else:
	filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]
	filtered_text = ' '.join(filtered_tokens)
	return filtered_text

himlohiya / stemmer_lemmatize_text.py

Created June 29, 2018 18:42

	def simple_stemmer(text):
	ps = nltk.porter.PorterStemmer()
	text = ' '.join([ps.stem(word) for word in text.split()])
	return text

	def lemmatize_text(text):
	text = nlp(text)
	text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
	return text