Last active
January 19, 2017 22:57
-
-
Save rafaelgfirmino/e53398e04851c90a4fc85b1aa0abbf6d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyquery import PyQuery as pq | |
import os | |
import urllib | |
import re | |
ANIME_NAME = "Charlotte2/" | |
LINK_DA_PAGINA_DO_ANIME = "http://www.animeai.net/318337.html" | |
TAG_PAI_DA_LISTA_DE_ANIME = "#lcp_instance_0" | |
os.system('[ -e out.html ]&& rm file; wget -c "'+LINK_DA_PAGINA_DO_ANIME+'" --user-agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0" -O "out.html"') | |
htmlFile = open("out.html") | |
htmlSource = htmlFile.read() | |
htmlFile.close() | |
d = pq(htmlSource) | |
inicio = d("#lcp_instance_0") | |
matrix = [] | |
for row in inicio.items("a"): | |
arr = re.findall("\d+", row.text()) | |
if "Epis" in row.text() and len(arr) > 0: | |
matrix.append([row.attr("href"), int(arr[0])]) | |
wget = 'wget -c "%s" --user-agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0" -O "%s"' | |
if not os.path.exists( ANIME_NAME ): | |
os.system('mkdir '+ ANIME_NAME ) | |
for arr in matrix: | |
if arr[1] >3: | |
fileName = ANIME_NAME + str(arr[1]) | |
print fileName | |
os.system(wget % (arr[0], fileName + ".html")) | |
print "\n" + wget % (arr[0], fileName + ".html") + "\n" | |
htmlFile = open(fileName + ".html") | |
htmlSource = htmlFile.read() | |
htmlFile.close() | |
d = pq(htmlSource) | |
src = str(d("video").attr("src")) | |
if len(src) == 0 or src == "None": | |
src = str(d("video").children().attr("src")) | |
if len(src) > 0: | |
print wget % (src, fileName + ".mp4") | |
os.system(wget % (src, fileName + ".mp4")) | |
os.system("rm %s.html" % (fileName)) | |
os.system('[ -e out.html ]&& rm file;') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Este script baixa os vídeos no diretório corrente.
É necessário ter instalado o gerenciador de pacotes do python, o "pip", para instalar:
sudo apt-get install python-pip
Agora é necessário instalar a biblioteca PyQuery:
sudo pip install PyQuery
Caso tenha proxy, use mais ou menos assim:
sudo pip --proxy http://10.10.80.2:3128 install PyQuery
Para executar o arquivo use:
python Anime.py
Para parar, use:
sudo killalll Anime.py
Se quiser que explique o código, passa aqui que te falo.
Ps: para limpar o cache do sistema, como super usuário execute:
free -m && sync && echo 3 > /proc/sys/vm/drop_caches && free -m