This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def lemmatize_sentence(tokens): | |
| lemmatizer = WordNetLemmatizer() | |
| lemmatized_sentence = [] | |
| for word, tag in pos_tag(tokens): | |
| if tag.startswith('NN'): | |
| pos = 'n' | |
| elif tag.startswith('VB'): | |
| pos = 'v' | |
| else: | |
| pos = 'a' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| stop_words = stopwords.words('english') | |
| def remove_noise(tokens, stop_words = ()): | |
| cleaned_tokens = [] | |
| for token, tag in pos_tag(tokens): | |
| if tag.startswith("NN"): | |
| pos = 'n' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_chapters_for_model(cleaned_tokens_list): | |
| for tokens in cleaned_tokens_list: | |
| yield dict([tokens, True] for token in tokens) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| print(classifier.classify(dict([token, True] for token in tokens_for_model))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def book_sentiment_wrapper(book_dir, book_id,stop_words): | |
| #Start by splitting book into chapters and saving them into textfiles | |
| chapter_list = split_book_into_chapters(book_dir, book_id) | |
| num_chapters = len(chapter_list) | |
| #Initialize sentiment array | |
| book_sentiment = [] | |
| chapter_num = 0 | |
| for chapter_num in range(num_chapters): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| chap_list = [] | |
| for i in range(len(cleaned_chaps)): | |
| chap_elem = ' '.join(cleaned_chaps[i]) | |
| chap_list.append(chap_elem) | |
| #Make pandas dataframe. Each row is a chapter | |
| df_pos = pd.DataFrame(chap_list, columns=['chapter_contents']) | |
| #Add column with Chapter names. | |
| chapter_names = ['Prelude'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| sid = SentimentIntensityAnalyzer() | |
| df_pos['compound'] = [sid.polarity_scores(x)['compound'] for x in df_pos['chapter_contents']] | |
| df_pos['neg'] = [sid.polarity_scores(x)['neg'] for x in df_pos['chapter_contents']] | |
| df_pos['neu'] = [sid.polarity_scores(x)['neu'] for x in df_pos['chapter_contents']] | |
| df_pos['pos'] = [sid.polarity_scores(x)['pos'] for x in df_pos['chapter_contents']] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| df_pos['joy'] = [NRCLex(x).raw_emotion_scores['joy'] for x in df_pos['chapter_contents']] | |
| df_pos['positive'] = [NRCLex(x).raw_emotion_scores['positive'] for x in df_pos['chapter_contents']] | |
| df_pos['anticipation'] = [NRCLex(x).raw_emotion_scores['anticipation'] for x in df_pos['chapter_contents']] | |
| df_pos['sadness'] = [NRCLex(x).raw_emotion_scores['sadness'] for x in df_pos['chapter_contents']] | |
| df_pos['surprise'] = [NRCLex(x).raw_emotion_scores['surprise'] for x in df_pos['chapter_contents']] | |
| df_pos['negative'] = [NRCLex(x).raw_emotion_scores['negative'] for x in df_pos['chapter_contents']] | |
| df_pos['anger'] = [NRCLex(x).raw_emotion_scores['anger'] for x in df_pos['chapter_contents']] | |
| df_pos['disgust'] = [NRCLex(x).raw_emotion_scores['disgust'] for x in df_pos['chapter_contents']] | |
| df_pos['trust'] = [NRCLex(x).raw_emotion_scores['trust'] for x in df_pos['chapter_contents']] | |
| df_pos['fear'] = [NRCLex(x).raw_emotion_scor |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def build_service(api_key): | |
| ''' | |
| To build the YT API service | |
| ''' | |
| key = api_key | |
| YOUTUBE_API_SERVICE_NAME = "youtube" | |
| YOUTUBE_API_VERSION = "v3" | |
| return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey = key) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_id(url): | |
| ''' | |
| To get the video id from the video url, example: | |
| 'https://www.youtube.com/watch?v=wfAPXlFu8', videoId = wfAPXlFu8 | |
| ''' | |
| u_pars = urlparse(url) | |
| quer_v = parse_qs(u_pars.query).get('v') | |
| if quer_v: | |
| return quer_v[0] | |
| pth = u_pars.path.split('/') |