Created
May 17, 2017 23:22
-
-
Save undefinedzain/01d93cc217357d97da8ed3f26c14b208 to your computer and use it in GitHub Desktop.
Scraping to get download link (example)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import sys | |
headers = {'User-Agent': 'Mozilla/5.0'} | |
session = requests.Session() | |
download_links = [] | |
###### GET LINKS ##### | |
resp = session.get('http://meguminime.com/naruto-subtitle-indonesia',headers=headers) | |
html_element = BeautifulSoup(resp.content,'lxml') | |
box = html_element.find('div',{'class' : 'box'}) | |
all_a = box.find_all('p')[14] | |
aaa = all_a.find_all('a') | |
for links in aaa: | |
# print (links['href']) | |
resp2 = session.get(links['href'],headers=headers) | |
html_element2 = BeautifulSoup(resp2.content,'lxml') | |
script_ = html_element2.find_all('script')[3] | |
new_script = str(script_) | |
aaa2 = new_script.split("'") | |
download_links.append(aaa2[1]) | |
##### GET DOWNLOAD LINKS ##### | |
# print (download_links) | |
for bbb in download_links: | |
print (bbb) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment