Skip to content

Instantly share code, notes, and snippets.

View victormurcia's full-sized avatar
😀
Playing with data :]

Victor Murcia victormurcia

😀
Playing with data :]
View GitHub Profile
@victormurcia
victormurcia / tag.py
Last active September 5, 2022 15:33
tagging words
def lemmatize_sentence(tokens):
lemmatizer = WordNetLemmatizer()
lemmatized_sentence = []
for word, tag in pos_tag(tokens):
if tag.startswith('NN'):
pos = 'n'
elif tag.startswith('VB'):
pos = 'v'
else:
pos = 'a'
@victormurcia
victormurcia / denoise_chapter.py
Last active September 5, 2022 15:40
denoising words in book
stop_words = stopwords.words('english')
def remove_noise(tokens, stop_words = ()):
cleaned_tokens = []
for token, tag in pos_tag(tokens):
if tag.startswith("NN"):
pos = 'n'
@victormurcia
victormurcia / get_chapters_for_model.py
Created September 5, 2022 18:53
get_chapters_for_ Naive Bayes model
def get_chapters_for_model(cleaned_tokens_list):
for tokens in cleaned_tokens_list:
yield dict([tokens, True] for token in tokens)
@victormurcia
victormurcia / nb_class_doriangray.py
Created September 5, 2022 22:13
nb_class dorian gray
print(classifier.classify(dict([token, True] for token in tokens_for_model)))
@victormurcia
victormurcia / book_sentiment_wrapper.py
Created September 5, 2022 22:18
book_sentiment_wrapper
def book_sentiment_wrapper(book_dir, book_id,stop_words):
#Start by splitting book into chapters and saving them into textfiles
chapter_list = split_book_into_chapters(book_dir, book_id)
num_chapters = len(chapter_list)
#Initialize sentiment array
book_sentiment = []
chapter_num = 0
for chapter_num in range(num_chapters):
@victormurcia
victormurcia / book_pandas.py
Created September 6, 2022 00:56
book dataframe
chap_list = []
for i in range(len(cleaned_chaps)):
chap_elem = ' '.join(cleaned_chaps[i])
chap_list.append(chap_elem)
#Make pandas dataframe. Each row is a chapter
df_pos = pd.DataFrame(chap_list, columns=['chapter_contents'])
#Add column with Chapter names.
chapter_names = ['Prelude']
@victormurcia
victormurcia / vader_dg.py
Created September 6, 2022 00:59
vader for dataframe
sid = SentimentIntensityAnalyzer()
df_pos['compound'] = [sid.polarity_scores(x)['compound'] for x in df_pos['chapter_contents']]
df_pos['neg'] = [sid.polarity_scores(x)['neg'] for x in df_pos['chapter_contents']]
df_pos['neu'] = [sid.polarity_scores(x)['neu'] for x in df_pos['chapter_contents']]
df_pos['pos'] = [sid.polarity_scores(x)['pos'] for x in df_pos['chapter_contents']]
@victormurcia
victormurcia / nrc_dg.py
Created September 6, 2022 02:28
nrc emotion analysis dorian gray
df_pos['joy'] = [NRCLex(x).raw_emotion_scores['joy'] for x in df_pos['chapter_contents']]
df_pos['positive'] = [NRCLex(x).raw_emotion_scores['positive'] for x in df_pos['chapter_contents']]
df_pos['anticipation'] = [NRCLex(x).raw_emotion_scores['anticipation'] for x in df_pos['chapter_contents']]
df_pos['sadness'] = [NRCLex(x).raw_emotion_scores['sadness'] for x in df_pos['chapter_contents']]
df_pos['surprise'] = [NRCLex(x).raw_emotion_scores['surprise'] for x in df_pos['chapter_contents']]
df_pos['negative'] = [NRCLex(x).raw_emotion_scores['negative'] for x in df_pos['chapter_contents']]
df_pos['anger'] = [NRCLex(x).raw_emotion_scores['anger'] for x in df_pos['chapter_contents']]
df_pos['disgust'] = [NRCLex(x).raw_emotion_scores['disgust'] for x in df_pos['chapter_contents']]
df_pos['trust'] = [NRCLex(x).raw_emotion_scores['trust'] for x in df_pos['chapter_contents']]
df_pos['fear'] = [NRCLex(x).raw_emotion_scor
@victormurcia
victormurcia / initialize_YT_service.py
Created October 7, 2022 20:25
get user credentials for YouTube API
def build_service(api_key):
'''
To build the YT API service
'''
key = api_key
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey = key)
@victormurcia
victormurcia / get_video_id.py
Created October 7, 2022 20:28
Get video ID from YouTube url
def get_id(url):
'''
To get the video id from the video url, example:
'https://www.youtube.com/watch?v=wfAPXlFu8', videoId = wfAPXlFu8
'''
u_pars = urlparse(url)
quer_v = parse_qs(u_pars.query).get('v')
if quer_v:
return quer_v[0]
pth = u_pars.path.split('/')