Skip to content

Instantly share code, notes, and snippets.

@pscollins
Created January 17, 2014 07:09
Show Gist options
  • Select an option

  • Save pscollins/8469565 to your computer and use it in GitHub Desktop.

Select an option

Save pscollins/8469565 to your computer and use it in GitHub Desktop.
multithreaded
import requests
from bs4 import BeautifulSoup
from concurrent import futures
BASE_URL = "http://ordnet.dk/ddo/ordbog?query={}"
def get_ipa(word):
soup = BeautifulSoup(requests.get(BASE_URL.format(word)).text)
try:
return soup.select(".lydskrift")[0].contents[1]
except IndexError:
pass
return None
def main(path_to_file):
ans = []
words = [line.split(" ")[0] for line in open(path_to_file)]
with futures.ThreadPoolExecutor(max_workers=50) as p:
ipas = p.map(get_ipa, words)
ans = [ipa for ipa in futures.as_completed(ipas) if ipa]
return ans
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment