Skip to content

Instantly share code, notes, and snippets.

@leepro
Last active December 23, 2015 01:09
Show Gist options
  • Select an option

  • Save leepro/6558072 to your computer and use it in GitHub Desktop.

Select an option

Save leepro/6558072 to your computer and use it in GitHub Desktop.
Get latest HANRel TV shows from bitsnoop.com.
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
import requests, bs4, re
# <codecell>
re_date = re.compile(r'(?P<date>13[0-1]{1}[0-9]{1}[0-9]{1}[0-9]{1})')
def getPage(page=1):
headers = { "user-agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19" }
session = requests.Session()
r = session.get("http://bitsnoop.com/search/all/HANRel/c/d/%d/" % page, headers=headers)
b = bs4.BeautifulSoup(r.text)
return b
# <codecell>
def torrents():
tor= []
for page in xrange(1,3):
b = getPage(1)
for i in b.find_all("a"):
if i["href"].find(".html")>=0 and len(i["href"])>30:
tor.append("http://bitsnoop.com"+i["href"])
return tor
def getDatedTorrents(t):
DATA = {}
for i in t:
d = re_date.search(i)
if d!=None:
dd = d.group("date")
DATA[dd] = list( set(DATA.get(dd, []) + [ i ]) )
return DATA
# <codecell>
t = torrents()
DATA = getDatedTorrents(t)
dates = sorted(DATA.keys(),reverse=True)
for i in dates:
print i
for j in DATA[i]:
print "\t", j
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment