Last active
August 16, 2019 11:26
-
-
Save nickbuddendotcom/5792710 to your computer and use it in GitHub Desktop.
Scrape wikipedia's country flags from http://commons.wikimedia.org/wiki/Sovereign-state_flags. All of Wikipedia's country flags are stored as SVG, so they can be scaled. Their width is stored in the flag's URL, so this can be changed to download whatever size of a flag you'd like. For my purposes I needed to limit my flags by height, but I wasn'…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
scrape_wikipedia_flags(); | |
function scrape_wikipedia_flags( $max_height = 60 ) { | |
// Create a 'flags' directory inside whatever directory we're working in. | |
// We'll download our flags here | |
$dir = realpath(dirname(__FILE__)) . "/flags/"; | |
if(!is_dir($dir)) mkdir($dir,0777,true); | |
// set this to something ridiculous because I only care about the max height for my purposes | |
$max_width = 10000; | |
$html = file_get_contents('http://commons.wikimedia.org/wiki/Sovereign-state_flags'); | |
$dom = new domDocument; | |
$dom->loadHTML($html); | |
$dom->preserveWhiteSpace = false; | |
$images = $dom->getElementsByTagName('img'); | |
foreach ($images as $image) { | |
$src = str_replace("//upload.", "http://upload.", $image->getAttribute('src')); | |
$old_size = getimagesize($src); | |
// Get current dimensions | |
$old_width = $old_size[0]; | |
$old_height = $old_size[1]; | |
// Calculate the scaling we need to do to fit the image inside our frame | |
$scale = min($max_width/$old_width, $max_height/$old_height); | |
// Get the new dimensions | |
$new_width = ceil($scale*$old_width); | |
// My it's 2am and I'm tired way of changing the size of the SVG | |
$src = explode("/", $src); | |
$filename = array_pop($src); | |
$src[] = $new_width . strstr($filename, 'px'); | |
$src = implode("/",$src); | |
// The country's name is stored in the image's ALT tag | |
$name = $image->getAttribute('alt'); | |
// We'll name our image according to the country's name, put it all lower case, and replace spaces with underscores | |
$img = $dir . str_replace(' ', '_', strtolower( $name ) ) . ".png"; | |
file_put_contents($img, file_get_contents($src)); | |
} | |
// You probably only want to run this function once, so die | |
die('done'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment