Skip to content

Instantly share code, notes, and snippets.

@mrdrozdov
Created January 8, 2018 03:05
Show Gist options
  • Save mrdrozdov/1c6d0b578f7069d18650fe7df3bd7295 to your computer and use it in GitHub Desktop.
Save mrdrozdov/1c6d0b578f7069d18650fe7df3bd7295 to your computer and use it in GitHub Desktop.
fetch_synset.py
# fetch_synset.py
import time
from textblob.wordnet import Synset
from nltk.corpus import wordnet as wn
import requests
example_syn = "n02486410"
animal = Synset("animal.n.01")
mammals_file = "mammals_v04_wordnet.csv"
def get_syn(m):
i = 1
while True:
syn = Synset('{}.n.{:01}'.format(m, i))
if syn.lowest_common_hypernyms(animal)[0] == animal:
return syn
i += 1
mammals = [
"baboon",
"bat",
"bear",
"beaver",
"bobcat",
"camel",
"caribou",
"cat",
"cheetah",
"chimpanzee",
"chipmunk",
"coati",
"cougar",
"coyote",
"dingo",
"dog",
"dolphin",
"echidna",
"elephant",
"elk",
"fox",
"gazelle",
"gibbon",
"giraffe",
"goat",
"gorilla",
"grizzly",
"groundhog",
"hare",
"hippopotamus",
"horse",
"hyena",
"impala",
"jaguar",
"kangaroo",
"koala",
"kob",
"lemur",
"leopard",
"lion",
"llama",
"lynx",
"manatee",
"mandrill",
"meerkat",
"mongoose",
"monkey",
"mouse",
"ocelot",
"orangutan",
"otter",
"porcupine",
"pronghorn",
"rabbit",
"raccoon",
"rat",
"rhinoceros",
"sheep",
"sloth",
"squirrel",
"tamarin",
"tapir",
"tiger",
"wallaby",
"walrus",
"warthog",
"whale",
"wolf",
"wombat",
"zebra",
]
for i, mammal in enumerate(mammals):
syns = wn.synsets(mammal, pos=wn.NOUN)
for syn in syns:
if syn.lowest_common_hypernyms(animal)[0] == animal:
synset = "n{:08}".format(syn.offset())
wn_url = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={}".format(synset)
response = requests.get(wn_url)
url_count = len([line for line in response.iter_lines()])
print("{} {} {} {} {}".format(mammal, syn.name(), synset, url_count, wn_url))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment