Skip to content

Instantly share code, notes, and snippets.

@njanakiev
Created June 15, 2019 10:55
Show Gist options
  • Save njanakiev/0fe0900cd620ba0e54d92f03999b3f0a to your computer and use it in GitHub Desktop.
Save njanakiev/0fe0900cd620ba0e54d92f03999b3f0a to your computer and use it in GitHub Desktop.
Download images from commons gallery
import os
import requests
import mwclient
import pandas as pd
category_names = ['Computer_hardware', 'Data_centers', 'Servers',
'Control_rooms', 'Floppy_disk_drives', 'Hard_disks',
'Automobile_dashboards', 'Old_maps', 'Laboratory_equipment']
folder = '/mnt/cloud-volume/wikimedia-commons/'
for category_name in category_names:
category_folder = os.path.join(folder, category_name)
if not os.path.exists(category_folder):
os.mkdir(category_folder)
site = mwclient.Site('commons.wikimedia.org')
category = site.Categories[category_name]
for member in category.members(namespace=6):
image_name = member.page_title
image_url = "https://commons.wikimedia.org/wiki/Special:FilePath/" + image_name
image_path = os.path.join(category_folder, image_name)
print(image_url)
response = requests.get(image_url, stream=True)
if response.status_code == 200:
with open(image_path, 'wb') as image:
for chunk in response:
image.write(chunk)
else:
print('Status Code :', response.status_code)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment