Created
July 12, 2018 20:40
-
-
Save aloisdg/024fda4115445ca10b9e5686aece4eef to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import unittest | |
import sys | |
from pathlib import Path | |
# http://www.dinosaur-coloring.com/ | |
# [...document.querySelectorAll('#mainContent > p:nth-child(3) > a')].map(x => x.href.replace('/pages/', '/pdfs/').replace('.html', '.pdf')).map(x => `"${x}"`).join(',\n') | |
def getUrls(): | |
urls = [ | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-allosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-ankylosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-apatosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-archaeopteryx.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-borogovia.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-brachiosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-chaoyangsaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-compsognathus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-dacentrurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-deinonychus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-dimetrodon.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-edmontosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-fukuiraptor.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-gallimimus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-giganotosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-hypsilophodon.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-iguanodon.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-jobaria.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-kentrosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-leptoceratops.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-maiasaura.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-microraptor.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-nigersaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-ornithomimus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-oviraptor.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-pachycephalosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-parasaurolophus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-quaesitosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-rugops.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-spinosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-stegosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-styracosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-triceratops.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-tyrannosaurus-rex.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-utahraptor.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-velociraptor.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-wannanosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-xenotarsosaurus.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-yinlong.pdf", | |
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-zalmoxes.pdf" ] | |
return urls | |
def downloadPdf(url, path): | |
r = requests.get(url, stream=True) | |
r.raw.decode_content = True | |
name = url.replace('http://www.dinosaur-coloring.com/pdfs/', path) | |
with open(name, 'wb') as f: | |
f.write(r.content) | |
def main(argv): | |
urls = getUrls() | |
for url in urls: | |
downloadPdf(url, "./dino/") | |
pass | |
if __name__ == "__main__": | |
main(sys.argv) | |
class DinoTests(unittest.TestCase): | |
def testUrlsCountShouldBe40(self): | |
self.assertEqual(40, len(getUrls())) | |
def testDownloadPdf(self): | |
url = "http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-zalmoxes.pdf" | |
name = "dinosaur-coloring-zalmoxes.pdf" | |
path = "/tmp/" | |
downloadPdf(url, path) | |
pdf = Path(path + name) | |
self.assertTrue(pdf.is_file()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment