Skip to content

Instantly share code, notes, and snippets.

@aloisdg
Created July 12, 2018 20:40
Show Gist options
  • Save aloisdg/024fda4115445ca10b9e5686aece4eef to your computer and use it in GitHub Desktop.
Save aloisdg/024fda4115445ca10b9e5686aece4eef to your computer and use it in GitHub Desktop.
import requests
import unittest
import sys
from pathlib import Path
# http://www.dinosaur-coloring.com/
# [...document.querySelectorAll('#mainContent > p:nth-child(3) > a')].map(x => x.href.replace('/pages/', '/pdfs/').replace('.html', '.pdf')).map(x => `"${x}"`).join(',\n')
def getUrls():
urls = [
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-allosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-ankylosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-apatosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-archaeopteryx.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-borogovia.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-brachiosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-chaoyangsaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-compsognathus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-dacentrurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-deinonychus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-dimetrodon.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-edmontosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-fukuiraptor.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-gallimimus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-giganotosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-hypsilophodon.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-iguanodon.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-jobaria.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-kentrosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-leptoceratops.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-maiasaura.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-microraptor.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-nigersaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-ornithomimus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-oviraptor.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-pachycephalosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-parasaurolophus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-quaesitosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-rugops.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-spinosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-stegosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-styracosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-triceratops.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-tyrannosaurus-rex.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-utahraptor.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-velociraptor.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-wannanosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-xenotarsosaurus.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-yinlong.pdf",
"http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-zalmoxes.pdf" ]
return urls
def downloadPdf(url, path):
r = requests.get(url, stream=True)
r.raw.decode_content = True
name = url.replace('http://www.dinosaur-coloring.com/pdfs/', path)
with open(name, 'wb') as f:
f.write(r.content)
def main(argv):
urls = getUrls()
for url in urls:
downloadPdf(url, "./dino/")
pass
if __name__ == "__main__":
main(sys.argv)
class DinoTests(unittest.TestCase):
def testUrlsCountShouldBe40(self):
self.assertEqual(40, len(getUrls()))
def testDownloadPdf(self):
url = "http://www.dinosaur-coloring.com/pdfs/dinosaur-coloring-zalmoxes.pdf"
name = "dinosaur-coloring-zalmoxes.pdf"
path = "/tmp/"
downloadPdf(url, path)
pdf = Path(path + name)
self.assertTrue(pdf.is_file())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment