This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#%%timeit ## ~2.44 s ± 377 | |
# Alice in Wonderland Starting from Chapter 8 | |
import urllib3 | |
url = 'https://www.gutenberg.org/files/11/11-0.txt' | |
http = urllib3.PoolManager() | |
text = http.urlopen("GET", url).data.decode() | |
chapters = text.split("THE END")[0].split("CHAPTER VIII")[2] | |
## 61235 Characters | 27432 Words | 3762 Lines | |
#characters = len(text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def find(search_string, input_string, mismatches=0, bwt_data=None, s_array=None): | |
results = [] | |
if len(search_string) == 0: | |
return("Empty Query String") | |
if bwt_data is None: | |
bwt_data = generate_all(input_string, s_array=s_array) | |
letters, bwt, lf_map, count, s_array = bwt_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a = input() | |
words = list(a) | |
bwt = [] | |
for i in range(len(words)): | |
word = a[-1] + a[:-1] | |
new = ''.join(word) | |
a = new | |
bwt.append(new) | |
i += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def suffix_array(string): | |
return(list(sorted(range(len(string)), key=lambda i:string[i:]))) | |
def bwt_from_suffix(string, s_array=None): | |
if s_array is None: | |
s_array = suffix_array(string) | |
return("".join(string[idx - 1] for idx in s_array)) | |
def lf_mapping(bwt, letters=None): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a = input() | |
words = list(a) | |
bwt = [] | |
for i in range(len(words)): | |
word = a[-1] + a[:-1] | |
new = ''.join(word) | |
a = new | |
bwt.append(new) | |
i += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<form action="https://formspree.io/yourformlink" method="POST"> | |
<div class="row gtr-uniform"> | |
<div class="col-6 col-12-xsmall"> | |
<input type="text" name="name" id="name" value="" placeholder="Name" /> | |
</div> | |
<div class="col-6 col-12-xsmall"> | |
<input type="email" name="_replyto" id="email" value="" placeholder="Email" /> | |
</div> | |
</form> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Change the URL | |
url = 'http://www.gutenberg.org/files/16/16-0.txt' | |
# Edit the string split to pull the story | |
text = str(text) | |
text = text.split("Chapter 1 PETER BREAKS THROUGH")[2].split("THE END")[0] | |
text = text.replace("\n", " ").replace("\r", " ").replace("\\r", " ").replace("\\n", " ").replace("_", "").lower() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Print sentiments of chapters | |
count = 1 | |
for item in Chapters: | |
print("Sentiment of Chapter " + str(count) + ": " + "{0:.3g}".format(Chapter_Sentiment(item))) | |
count += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with PdfPages('../data/Animal_WordClouds.pdf', "w") as pdf: | |
for item in Chapters: | |
values = text_processing(item) | |
values = [i for i in values if i in animals] | |
y = Counter(values).most_common(1)[0][0] | |
path = "../data/Animals/" + y + ".png" | |
mask = np.array(Image.open(path)) | |
wc = WordCloud(background_color="white", max_words=200, mask=mask, max_font_size=90, random_state=42) | |
titles = "Chapter "+ str(count) + "\n" + "Most Common Animal in Chapter: " + y | |
wc.generate(item) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import nltk | |
import string | |
nltk.download("stopwords") | |
nltk.download('vader_lexicon') | |
from nltk.tokenize import RegexpTokenizer | |
from nltk.stem import WordNetLemmatizer | |
from nltk.corpus import stopwords | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer |
NewerOlder