riza · August 1, 2018 15:07
diff --git a/hiphoplife-crawler.py b/hiphoplife-crawler.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 # TODO: Mega downloader

 import json
 import requests
 from bs4 import BeautifulSoup
 import re
 import codecs

 class HLCrawler():
 	
 	def getMagaLink(self,url):
 		r = requests.get(url)
 		m = re.search("window.location = '(.*?)'", r.text)
 		redirect = m.group(1)
 		r = requests.get(redirect)
 		return r.url

 	def crawlPagination(self,url):
 		r = requests.get(url)
 		soup = BeautifulSoup(r.text, 'html.parser')
 		try:
 			tracks = soup.findAll("div", { "class" : "remositoryfileblock" })
 			for track in tracks:
 				title = track.findAll("a")[0].text
 				downloadUrlHL = track.findAll("a", href=True)[1]["href"]
 				downloadUrlHL = self.getMagaLink(downloadUrlHL)
 				self.PAGES[self.pagesKey] = {'title': title, 'url':downloadUrlHL}

 				print("{0} Bulundu -> URL -> {1}".format(title.encode('utf-8').strip(),downloadUrlHL))
 				self.pagesKey += 1
 		except IndexError:
 			 print("Dead. \n")

 	
 	def __init__(self):
 		self.URL = "https://www.hiphoplife.com.tr/dosyalar/Mp3/orderby,2/page,{0}/";
 		self.TRACKS = {}
 		self.PAGES = {}
 		self.MAX_PAGE = 196

 		self.pagesKey = 0

 		for x in xrange(1,self.MAX_PAGE + 1):
 			print("\n{0}. sayfa crawl ediliyor.\n".format(x))
 			self.crawlPagination(self.URL.format(x))

 if __name__ == "__main__":
 	HLCrawler()
	#!/usr/bin/env python
	# -- coding: utf-8 --

	# TODO: Mega downloader

	import json
	import requests
	from bs4 import BeautifulSoup
	import re
	import codecs

	class HLCrawler():

	def getMagaLink(self,url):
	r = requests.get(url)
	m = re.search("window.location = '(.*?)'", r.text)
	redirect = m.group(1)
	r = requests.get(redirect)
	return r.url

	def crawlPagination(self,url):
	r = requests.get(url)
	soup = BeautifulSoup(r.text, 'html.parser')
	try:
	tracks = soup.findAll("div", { "class" : "remositoryfileblock" })
	for track in tracks:
	title = track.findAll("a")[0].text
	downloadUrlHL = track.findAll("a", href=True)[1]["href"]
	downloadUrlHL = self.getMagaLink(downloadUrlHL)
	self.PAGES[self.pagesKey] = {'title': title, 'url':downloadUrlHL}

	print("{0} Bulundu -> URL -> {1}".format(title.encode('utf-8').strip(),downloadUrlHL))
	self.pagesKey += 1
	except IndexError:
	print("Dead. \n")


	def __init__(self):
	self.URL = "https://www.hiphoplife.com.tr/dosyalar/Mp3/orderby,2/page,{0}/";
	self.TRACKS = {}
	self.PAGES = {}
	self.MAX_PAGE = 196

	self.pagesKey = 0

	for x in xrange(1,self.MAX_PAGE + 1):
	print("\n{0}. sayfa crawl ediliyor.\n".format(x))
	self.crawlPagination(self.URL.format(x))

	if __name__ == "__main__":
	HLCrawler()
No results found