Skip to content

Instantly share code, notes, and snippets.

@auburus
Last active March 28, 2019 05:18
Show Gist options
  • Save auburus/38d75a9e4aaed5382b1f83ba0ce80ac0 to your computer and use it in GitHub Desktop.
Save auburus/38d75a9e4aaed5382b1f83ba0ce80ac0 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
""" Example of use:
This will look into "A Study in Scarlet" for the 3rd paragraph, 1st line, 2nd word
```
from sherlock import find_word
find_word("asis", 3, 1, 2)
```
"""
BASE_URL = "https://sherlock-holm.es"
data = {}
data["A Study In Scarlet"] = "/stories/html/stud.html"
data["The Sign of the Four"] = "/stories/html/sign.html"
data["The Hound of the Baskervilles"] = "/stories/html/houn.html"
data["The Valley of Fear"] = "/stories/html/vall.html"
data["A Scandal in Bohemia"] = "/stories/html/scan.html"
data["The Red-Headed League"] = "/stories/html/redh.html"
data["A Case of Identity"] = "/stories/html/iden.html"
data["The Boscombe Valley Mystery"] = "/stories/html/bosc.html"
data["The Five Orange Pips"] = "/stories/html/five.html"
data["The Man with the Twisted Lip"] = "/stories/html/twis.html"
data["The Adventure of the Blue Carbuncle"] = "/stories/html/blue.html"
data["The Adventure of the Speckled Band"] = "/stories/html/spec.html"
data["The Adventure of the Engineer's Thumb"] = "/stories/html/engr.html"
data["The Adventure of the Noble Bachelor"] = "/stories/html/nobl.html"
data["The Adventure of the Beryl Coronet"] = "/stories/html/bery.html"
data["The Adventure of the Copper Beeches"] = "/stories/html/copp.html"
data["Silver Blaze"] = "/stories/html/silv.html"
data["Yellow Face"] = "/stories/html/yell.html"
data["The Stockbroker's Clerk"] = "/stories/html/stoc.html"
data["The Gloria Scott"] = "/stories/html/glor.html"
data["The Musgrave Ritual"] = "/stories/html/musg.html"
data["The Reigate Puzzle"] = "/stories/html/reig.html"
data["The Crooked Man"] = "/stories/html/croo.html"
data["The Resident Patient"] = "/stories/html/resi.html"
data["The Greek Interpreter"] = "/stories/html/gree.html"
data["The Naval Treaty"] = "/stories/html/nava.html"
data["The Final Problem"] = "/stories/html/fina.html"
data["The Empty House"] = "/stories/html/empt.html"
data["The Norwood Builder"] = "/stories/html/norw.html"
data["The Dancing Men"] = "/stories/html/danc.html"
data["The Solitary Cyclist"] = "/stories/html/soli.html"
data["The Priory School"] = "/stories/html/prio.html"
data["Black Peter"] = "/stories/html/blac.html"
data["Charles Augustus Milverton"] = "/stories/html/chas.html"
data["The Six Napoleons"] = "/stories/html/sixn.html"
data["The Three Students"] = "/stories/html/3stu.html"
data["The Golden Pince-Nez"] = "/stories/html/gold.html"
data["The Missing Three-Quarter"] = "/stories/html/miss.html"
data["The Abbey Grange"] = "/stories/html/abbe.html"
data["The Second Stain"] = "/stories/html/seco.html"
data["Wisteria Lodge"] = "/stories/html/wist.html"
data["The Cardboard Box"] = "/stories/html/card.html"
data["The Red Circle"] = "/stories/html/redc.html"
data["The Bruce-Partington Plans"] = "/stories/html/bruc.html"
data["The Dying Detective"] = "/stories/html/dyin.html"
data["Lady Frances Carfax"] = "/stories/html/lady.html"
data["The Devil's Foot"] = "/stories/html/devi.html"
data["His Last Bow"] = "/stories/html/last.html"
def initials(story):
str = ""
for w in story.split(' '):
str += w[0].lower();
return str
def fetch_word(link, p, l, w):
p -= 1
l -= 1
w -= 1
r = requests.get(BASE_URL + link)
soup = BeautifulSoup(r.text, 'html.parser')
# If there is a table of contents is a novel, otherwise its a short story
# and in the novels, there is an extra paragraf
if soup.find('.table-of-contents'):
p += 1
paragraph = soup.find_all('p')[p].text
line = paragraph.split('.')[l].strip()
word = line.split(' ')[w]
return word
def find_word(story, par, line, word):
for story_name, link in data.items():
if story.lower() == initials(story_name):
print(story_name, '=>', fetch_word(link, par, line, word))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment