Created
April 15, 2024 10:01
-
-
Save umstek/89b822fd864b330c32cea2f8e16214d5 to your computer and use it in GitHub Desktop.
downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyM+Nh5i8UEq+9W/lesUsuFv", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/umstek/89b822fd864b330c32cea2f8e16214d5/downloader.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Mu2BEttX2kbk", | |
"outputId": "e6dd7ceb-9526-4d38-c816-708f9d9224e3" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n" | |
] | |
} | |
], | |
"source": [ | |
"from google.colab import drive\n", | |
"drive.mount('/content/gdrive')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# prompt: Scrape this page https://archive.org/download/x-men-the-animated-series-1080p-ai-upscale_202204 and extract all links to an mp4 file\n", | |
"\n", | |
"import requests\n", | |
"from bs4 import BeautifulSoup\n", | |
"\n", | |
"url = 'https://archive.org/download/x-men-the-animated-series-1080p-ai-upscale_202204'\n", | |
"\n", | |
"response = requests.get(url)\n", | |
"soup = BeautifulSoup(response.text, 'html.parser')\n", | |
"\n", | |
"mp4_links = []\n", | |
"\n", | |
"for link in soup.find_all('a'):\n", | |
" if 'href' in link.attrs and link['href'].endswith('.mp4'):\n", | |
" mp4_links.append(url + '/' + link['href'])\n", | |
"\n", | |
"print(len(mp4_links))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ZUfdoJph3dpJ", | |
"outputId": "aaa754c9-76cb-4e6c-b623-e15b37f60172" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"76\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import urllib.parse\n", | |
"\n", | |
"def clean_file_name(fn):\n", | |
" return urllib.parse.unquote(fn)\n" | |
], | |
"metadata": { | |
"id": "QXPMp8Nd89PJ" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# prompt: Download each link in the above mp4_links array into /content/gdrive/MyDrive/downloads\n", | |
"\n", | |
"import os\n", | |
"import requests\n", | |
"\n", | |
"download_dir = '/content/gdrive/MyDrive/downloads'\n", | |
"\n", | |
"# Download each MP4 file\n", | |
"for link in mp4_links:\n", | |
" filename = clean_file_name(os.path.basename(link))\n", | |
" response = requests.get(link)\n", | |
" file_path = os.path.join(download_dir, filename)\n", | |
" print(file_path)\n", | |
" with open(file_path, 'wb') as f:\n", | |
" f.write(response.content)\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "NZIIxjOl7jbC", | |
"outputId": "917f1321-9a7a-447a-b8a1-a2e5a9446514" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"/content/gdrive/MyDrive/downloads/EP01 - Night of the Sentinels.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP02 - Night of the Sentinels Pt. 2.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP03 - Enter Magneto.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP04 - Deadly Reunions.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP05 - Captive Hearts.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP06 - Cold Vengeance.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP07 - Slave Island.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP08 - The Unstoppable Juggernaut.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP09 - The Cure.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP10 - Come the Apocalypse.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP11 - Days of Future Past.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP12 - Days of Future Past Pt. 2.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP13 - The Final Decision.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP14 - Till Death Do Us Part.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP15 - Till Death Do Us Part Pt. 2.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP16 - Whatever It Takes.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP17 - Red Dawn.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP18 - Repo Man.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP19 - X-Ternally Yours.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP20 - Time Fugitives.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP21 - Time Fugitives Pt. 2.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP22 - A Rogue's Tale.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP23 - Beauty & the Beast.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP24 - Mojovision.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP25 - Reunion.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP26 - Reunion Pt. 2.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP27 - Out of the Past.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP28 - Out of the Past Pt. 2.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP29 - The Phoenix Saga, Part I Sacrifice.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP30 - The Phoenix Saga, Part II The Dark Shroud.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP31 - The Phoenix Saga, Part III The Cry of the Banshee.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP32 - The Phoenix Saga, Part IV The Starjammers.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP33 - The Phoenix Saga, Part V Child of Light.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP34 - No Mutant Is an Island.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP35 - Obsession.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP36 - Longshot.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP37 - Cold Comfort.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP38 - Savage Land, Strange Heart - Part One.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP39 - Savage Land, Strange Heart - Part Two.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP40 - The Dark Phoenix, Part I Dazzled.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP41 - The Dark Phoenix, Part II The Inner Circle.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP42 - The Dark Phoenix, Part III The Dark Phoenix.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP43 - The Dark Phoenix, Part IV The Fate of the Phoenix.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP44 - Orphan's End.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP45 - Love in Vain.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP46 - The Juggernaut Returns.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP47 - A Deal with the Devil.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP48 - Sanctuary (Part 1).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP49 - Sanctuary (Part 2).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP50 - Xavier Remembers.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP51 - Courage.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP52 - Secrets, Not Long Buried.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP53 - Nightcrawler.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP54 - One Man's Worth (Part 1).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP55 - One Man's Worth (Part 2).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP56 - Proteus (Part 1).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP57 - Proteus (Part 2).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP58 - Family Ties.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP59 - Bloodlines.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP60 - Lotus and the Steel.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP61 - Weapon X, Lies, and Video Tape.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP62 - Have Yourself a Morlock Little X-Mas.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP63 - Beyond Good and Evil (Part 1) The End of Time.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP64 - Beyond Good and Evil (Part 2) Promise of Apocalypse.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP65 - Beyond Good and Evil (Part 3) The Lazarus Chamber.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP66 - Beyond Good and Evil (Part 4) End and Beginning.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP67 - The Phalanx Covenant (Part 1).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP68 - The Phalanx Covenant (Part 2).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP69 - Storm Front (Part 1).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP70 - Storm Front (Part 2).mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP71 - The Fifth Horseman.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP72 - Jubilees Fairytale Theatre.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP73 - Old Soldiers.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP74 - Hidden Agendas.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP75 - Descent.mp4\n", | |
"/content/gdrive/MyDrive/downloads/EP76 - Graduation Day.mp4\n" | |
] | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment