Last active
April 24, 2016 12:24
-
-
Save Fedjmike/bbc21d71e2d6cde85012624a29da6983 to your computer and use it in GitHub Desktop.
Scrape wikipedia images
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from model import Model, NotFound | |
from tools import get_wikipedia_image | |
def safe_print(*args): | |
try: | |
print(*args) | |
except: | |
pass | |
with Model() as model: | |
for (artist_id,) in model.query("select id from artists"): | |
title = model.get_link(artist_id, "wikipedia") | |
if not title: | |
safe_print("No wp:", model.get_artist(artist_id).name) | |
continue | |
if model.get_link(artist_id, "image"): | |
pass #continue | |
try: | |
safe_print(model.get_artist(artist_id).name) | |
thumb, full = get_wikipedia_image(title) | |
print(thumb, full) | |
model.set_link(artist_id, "image_thumb", thumb) | |
model.set_link(artist_id, "image", full) | |
except (KeyError, TypeError): | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment