Last active
March 28, 2019 05:18
-
-
Save auburus/38d75a9e4aaed5382b1f83ba0ce80ac0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
""" Example of use: | |
This will look into "A Study in Scarlet" for the 3rd paragraph, 1st line, 2nd word | |
``` | |
from sherlock import find_word | |
find_word("asis", 3, 1, 2) | |
``` | |
""" | |
BASE_URL = "https://sherlock-holm.es" | |
data = {} | |
data["A Study In Scarlet"] = "/stories/html/stud.html" | |
data["The Sign of the Four"] = "/stories/html/sign.html" | |
data["The Hound of the Baskervilles"] = "/stories/html/houn.html" | |
data["The Valley of Fear"] = "/stories/html/vall.html" | |
data["A Scandal in Bohemia"] = "/stories/html/scan.html" | |
data["The Red-Headed League"] = "/stories/html/redh.html" | |
data["A Case of Identity"] = "/stories/html/iden.html" | |
data["The Boscombe Valley Mystery"] = "/stories/html/bosc.html" | |
data["The Five Orange Pips"] = "/stories/html/five.html" | |
data["The Man with the Twisted Lip"] = "/stories/html/twis.html" | |
data["The Adventure of the Blue Carbuncle"] = "/stories/html/blue.html" | |
data["The Adventure of the Speckled Band"] = "/stories/html/spec.html" | |
data["The Adventure of the Engineer's Thumb"] = "/stories/html/engr.html" | |
data["The Adventure of the Noble Bachelor"] = "/stories/html/nobl.html" | |
data["The Adventure of the Beryl Coronet"] = "/stories/html/bery.html" | |
data["The Adventure of the Copper Beeches"] = "/stories/html/copp.html" | |
data["Silver Blaze"] = "/stories/html/silv.html" | |
data["Yellow Face"] = "/stories/html/yell.html" | |
data["The Stockbroker's Clerk"] = "/stories/html/stoc.html" | |
data["The Gloria Scott"] = "/stories/html/glor.html" | |
data["The Musgrave Ritual"] = "/stories/html/musg.html" | |
data["The Reigate Puzzle"] = "/stories/html/reig.html" | |
data["The Crooked Man"] = "/stories/html/croo.html" | |
data["The Resident Patient"] = "/stories/html/resi.html" | |
data["The Greek Interpreter"] = "/stories/html/gree.html" | |
data["The Naval Treaty"] = "/stories/html/nava.html" | |
data["The Final Problem"] = "/stories/html/fina.html" | |
data["The Empty House"] = "/stories/html/empt.html" | |
data["The Norwood Builder"] = "/stories/html/norw.html" | |
data["The Dancing Men"] = "/stories/html/danc.html" | |
data["The Solitary Cyclist"] = "/stories/html/soli.html" | |
data["The Priory School"] = "/stories/html/prio.html" | |
data["Black Peter"] = "/stories/html/blac.html" | |
data["Charles Augustus Milverton"] = "/stories/html/chas.html" | |
data["The Six Napoleons"] = "/stories/html/sixn.html" | |
data["The Three Students"] = "/stories/html/3stu.html" | |
data["The Golden Pince-Nez"] = "/stories/html/gold.html" | |
data["The Missing Three-Quarter"] = "/stories/html/miss.html" | |
data["The Abbey Grange"] = "/stories/html/abbe.html" | |
data["The Second Stain"] = "/stories/html/seco.html" | |
data["Wisteria Lodge"] = "/stories/html/wist.html" | |
data["The Cardboard Box"] = "/stories/html/card.html" | |
data["The Red Circle"] = "/stories/html/redc.html" | |
data["The Bruce-Partington Plans"] = "/stories/html/bruc.html" | |
data["The Dying Detective"] = "/stories/html/dyin.html" | |
data["Lady Frances Carfax"] = "/stories/html/lady.html" | |
data["The Devil's Foot"] = "/stories/html/devi.html" | |
data["His Last Bow"] = "/stories/html/last.html" | |
def initials(story): | |
str = "" | |
for w in story.split(' '): | |
str += w[0].lower(); | |
return str | |
def fetch_word(link, p, l, w): | |
p -= 1 | |
l -= 1 | |
w -= 1 | |
r = requests.get(BASE_URL + link) | |
soup = BeautifulSoup(r.text, 'html.parser') | |
# If there is a table of contents is a novel, otherwise its a short story | |
# and in the novels, there is an extra paragraf | |
if soup.find('.table-of-contents'): | |
p += 1 | |
paragraph = soup.find_all('p')[p].text | |
line = paragraph.split('.')[l].strip() | |
word = line.split(' ')[w] | |
return word | |
def find_word(story, par, line, word): | |
for story_name, link in data.items(): | |
if story.lower() == initials(story_name): | |
print(story_name, '=>', fetch_word(link, par, line, word)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment