Skip to content

Instantly share code, notes, and snippets.

@jayrambhia
Created February 16, 2013 09:01
Show Gist options
  • Save jayrambhia/4966162 to your computer and use it in GitHub Desktop.
Save jayrambhia/4966162 to your computer and use it in GitHub Desktop.
Python based dictionary. Fetched data from meriem-webster dictionary website,
from BeautifulSoup import BeautifulSoup
import urllib2
import sys
proxy = urllib2.ProxyHandler() # Your proxy here.
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
def getPrintUnicode(soup):
body=''
if isinstance(soup, unicode):
soup = soup.replace(''',"'")
soup = soup.replace('"','"')
soup = soup.replace(' ',' ')
soup = soup.replace('>','>')
soup = soup.replace('&lt;','<')
body = body + soup
else:
if not soup.contents:
return ''
con_list = soup.contents
for con in con_list:
body = body + getPrintUnicode(con)
return body
def getWord():
word = sys.argv[1]
base_url = "http://www.merriam-webster.com/dictionary/"
url = base_url+word
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read())
print word,":"
contents = soup.findAll("span",{"class":"ssens"})
for content in contents:
print getPrintUnicode(content)
if __name__ == "__main__":
getWord()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment