Last active
September 9, 2022 15:07
-
-
Save glickmac/22d395ee9300ba523da939d28340b1b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#%%timeit ## ~2.44 s ± 377 | |
# Alice in Wonderland Starting from Chapter 8 | |
import urllib3 | |
url = 'https://www.gutenberg.org/files/11/11-0.txt' | |
http = urllib3.PoolManager() | |
text = http.urlopen("GET", url).data.decode() | |
chapters = text.split("THE END")[0].split("CHAPTER VIII")[2] | |
## 61235 Characters | 27432 Words | 3762 Lines | |
#characters = len(text) | |
#words = len(text.split(" ")) | |
#lines = len(text.split("\n")) | |
bwt_data = generate_all(chapters) | |
## 3 instances of "Off with her head" | |
print("Number of Exact Matches: "+ str(len(find('Off with her head', chapters, mismatches=0, bwt_data=bwt_data)))) | |
## 0 instances of "off with her head" CASE SENSITIVE | |
print("Number of Exact Matches: "+ str(len(find('off with her head', chapters, mismatches=0, bwt_data=bwt_data)))) | |
### Mismatches = 2 | |
## 5 instances of "Off with her/his head" | |
print("Number of Fuzzy Matches: "+ str(len(find('Off with her head', chapters, mismatches=2, bwt_data=bwt_data)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment