Skip to content

Instantly share code, notes, and snippets.

View glickmac's full-sized avatar

Cody Glickman glickmac

View GitHub Profile
#%%timeit ## ~2.44 s ± 377
# Alice in Wonderland Starting from Chapter 8
import urllib3
url = 'https://www.gutenberg.org/files/11/11-0.txt'
http = urllib3.PoolManager()
text = http.urlopen("GET", url).data.decode()
chapters = text.split("THE END")[0].split("CHAPTER VIII")[2]
## 61235 Characters | 27432 Words | 3762 Lines
#characters = len(text)
def find(search_string, input_string, mismatches=0, bwt_data=None, s_array=None):
results = []
if len(search_string) == 0:
return("Empty Query String")
if bwt_data is None:
bwt_data = generate_all(input_string, s_array=s_array)
letters, bwt, lf_map, count, s_array = bwt_data
a = input()
words = list(a)
bwt = []
for i in range(len(words)):
word = a[-1] + a[:-1]
new = ''.join(word)
a = new
bwt.append(new)
i += 1
def suffix_array(string):
return(list(sorted(range(len(string)), key=lambda i:string[i:])))
def bwt_from_suffix(string, s_array=None):
if s_array is None:
s_array = suffix_array(string)
return("".join(string[idx - 1] for idx in s_array))
def lf_mapping(bwt, letters=None):
a = input()
words = list(a)
bwt = []
for i in range(len(words)):
word = a[-1] + a[:-1]
new = ''.join(word)
a = new
bwt.append(new)
i += 1
<form action="https://formspree.io/yourformlink" method="POST">
<div class="row gtr-uniform">
<div class="col-6 col-12-xsmall">
<input type="text" name="name" id="name" value="" placeholder="Name" />
</div>
<div class="col-6 col-12-xsmall">
<input type="email" name="_replyto" id="email" value="" placeholder="Email" />
</div>
</form>
# Change the URL
url = 'http://www.gutenberg.org/files/16/16-0.txt'
# Edit the string split to pull the story
text = str(text)
text = text.split("Chapter 1 PETER BREAKS THROUGH")[2].split("THE END")[0]
text = text.replace("\n", " ").replace("\r", " ").replace("\\r", " ").replace("\\n", " ").replace("_", "").lower()
## Print sentiments of chapters
count = 1
for item in Chapters:
print("Sentiment of Chapter " + str(count) + ": " + "{0:.3g}".format(Chapter_Sentiment(item)))
count += 1
with PdfPages('../data/Animal_WordClouds.pdf', "w") as pdf:
for item in Chapters:
values = text_processing(item)
values = [i for i in values if i in animals]
y = Counter(values).most_common(1)[0][0]
path = "../data/Animals/" + y + ".png"
mask = np.array(Image.open(path))
wc = WordCloud(background_color="white", max_words=200, mask=mask, max_font_size=90, random_state=42)
titles = "Chapter "+ str(count) + "\n" + "Most Common Animal in Chapter: " + y
wc.generate(item)
import requests
from bs4 import BeautifulSoup
import nltk
import string
nltk.download("stopwords")
nltk.download('vader_lexicon')
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer