Created
March 20, 2019 09:59
-
-
Save super1024201/7e86f2c4ebcaa04b94e7de47f190ca8e to your computer and use it in GitHub Desktop.
pornhub
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
class HtmlDownloader(object): | |
def download(self, url): | |
if url is None: | |
return None | |
response = urllib.request.urlopen(url) | |
if response.getcode() != 200: | |
return None | |
return response.read() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import os | |
import re | |
from bs4 import BeautifulSoup as soup | |
link = 'https://www.pornhub.com' | |
# 获取某分类下任意页数的视频列表 | |
def getVideoList(url): | |
# url = link + '/video?c=111&page=1' | |
result = {} | |
rep = requests.get(url) | |
videoList = soup(rep.content) | |
videoList = videoList.find('ul', id='videoCategory').findAll('li') | |
for i in videoList: | |
title = i.a.get('title') | |
result[title] = { | |
'title': title, | |
'preview': i.img.get('data-mediabook'), | |
'pic': i.img.get('data-src'), | |
'url': i.a.get('href'), | |
'time': i.var.text | |
} | |
return result | |
# 获取视频全部清晰度,地址,视频编号 | |
def getVideoUrl(url): | |
# url = link + '/view_video.php?viewkey=ph5c56aa1268005' | |
rep = requests.get(url) | |
videoList = json.loads('[' + re.findall(r'"mediaDefinitions":\[(.+?)\]', rep.text)[0] + ']') | |
if videoList[0].get('videoUrl') == '': | |
videoList = videoList[1:] | |
ids = os.path.split(os.path.split(videoList[0].get('videoUrl'))[0])[1] | |
return [videoList, ids] | |
# 根据视频编号获取全部相似推荐 | |
def getVideoSimilar(ids): | |
urlList = list('%s/video/relateds?ajax=1&id=%s&page=%s&num_per_page=10' % (link, ids, i) for i in range(1, 7)) | |
result = {} | |
for url in urlList: | |
rep = requests.get(url) | |
videoList = soup(rep.content, 'lxml') | |
videoList = videoList.findAll('li') | |
for i in videoList: | |
title = i.a.get('title') | |
result[title] = { | |
'title': title, | |
'preview': i.img.get('data-mediabook'), | |
'pic': i.img.get('data-src'), | |
'url': i.a.get('href'), | |
'time': i.var.text | |
} | |
return result | |
""" | |
print(getVideoUrl("https://www.pornhub.com/view_video.php?viewkey=ph599489118380e")) | |
print(getVideoSimilar(128823211)) | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html><body><table><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph599489118380e</td><td>Japanese girl humping</td><td>https://ci.phncdn.com/videos/201708/16/128823211/original/(m=eaf8Ggaaaa)(mh=i-ju8bZdsPqZYcj8)6.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5a1481ecead16</td><td>Sweaty Japanese Lesbians</td><td>https://ci.phncdn.com/videos/201711/21/142087522/original/(m=eaf8Ggaaaa)(mh=HxZKHwBrOByK2ZtF)16.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=705504768&pkey=40793002</td><td>Galitsin - Oil babes (117)(Liza and Natia)</td><td>https://ci.phncdn.com/videos/201404/01/24991662/original/(m=eaf8Ggaaaa)(mh=aQp9R0GzV8lePzjn)11.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph599462a6b0a19</td><td>Japanese girl humping</td><td>https://di.phncdn.com/videos/201708/16/128808371/original/(m=eaf8Ggaaaa)(mh=NwTsEC8bCUfrFFzM)3.jpg</td></tr><tr><td>https://nl.pornhub.com/view_video.php?viewkey=ph599489118380e</td><td>Japanse meid rijdend</td><td>https://di.phncdn.com/videos/201708/16/128823211/original/(m=eaf8Ggaaaa)(mh=i-ju8bZdsPqZYcj8)6.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5bed89b9d03ed</td><td>3 Chances to Cum! - Episode 2 - JOI Games With Saffron!</td><td>https://ci.phncdn.com/videos/201811/15/192200201/thumbs_20/(m=eaf8Ggaaaa)(mh=D31NWYFkFSsGRG3a)1.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5afab7de4286c&pkey=76998801</td><td>Stacey Poole in HOT pink slip and black satin panties</td><td>https://ci.phncdn.com/videos/201805/15/166257401/original/(m=eaf8Ggaaaa)(mh=UudbzW35XtwZvudl)11.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=705504768&pkey=40793002</td><td>Galitsin - Bellezze unte (117)(Liza e Natia)</td><td>https://ci.phncdn.com/videos/201404/01/24991662/original/(m=eaf8Ggaaaa)(mh=aQp9R0GzV8lePzjn)11.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph57b330d3e415f</td><td>TeenFuns Sabina</td><td>https://ci.phncdn.com/videos/201608/16/86021801/original/(m=eaf8Ggaaaa)(mh=d-EH3yJohv4LF8QS)3.jpg</td></tr><tr><td>https://nl.pornhub.com/view_video.php?viewkey=ph5649f17ac53e2</td><td>Wat dan ook droogneuken</td><td>https://ci.phncdn.com/videos/201511/16/61748201/original/(m=eaf8Ggaaaa)(mh=EwjjVsO-rBz6BJLF)1.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5af6eb2c4616f</td><td>Joe in super sexy satin lingerie . What an ass!</td><td>https://ci.phncdn.com/videos/201805/12/165834661/original/(m=eaf8Ggaaaa)(mh=rtd7yiCHDnLxJaka)11.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5a366575b8fb9</td><td>Japanese schoolgirl sexual intercourse 02</td><td>https://di.phncdn.com/videos/201712/17/145854812/original/(m=eaf8Ggaaaa)(mh=qBaWMzD7C120H9Y1)14.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=ph5afab7de4286c&pkey=76998801</td><td>Stacey Poole in HOT pink slip and black satin panties</td><td>https://ci.phncdn.com/videos/201805/15/166257401/original/(m=eaf8Ggaaaa)(mh=UudbzW35XtwZvudl)11.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph57f76d6bb3ab6</td><td>Atomi Slender Jav Idol Fucked On The Floor And Standing Very Cute Teen Babe</td><td>https://ci.phncdn.com/videos/201610/07/92100831/original/(m=eaf8Ggaaaa)(mh=QHa9IFl-IeolyYYO)9.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5aeda0606009b</td><td>G-Queen Waka & Maria Dirigent</td><td>https://ci.phncdn.com/videos/201805/05/164871831/original/(m=eaf8Ggaaaa)(mh=L4cKucXMC9J2w3Mq)1.jpg</td></tr><tr><td>https://de.pornhub.com/view_video.php?viewkey=ph5a366575b8fb9</td><td>Geschlechtsverkehr des japanischen Schulmädchens 02</td><td>https://ci.phncdn.com/videos/201712/17/145854812/original/(m=eaf8Ggaaaa)(mh=qBaWMzD7C120H9Y1)14.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph590ef4e9424a0</td><td>Candy TeenFuns</td><td>https://ci.phncdn.com/videos/201705/07/115635981/original/(m=eaf8Ggaaaa)(mh=c4xk_C0JcYAc_Vk7)8.jpg</td></tr><tr><td>https://de.pornhub.com/view_video.php?viewkey=ph599462a6b0a19</td><td>Japanese girl humping</td><td>https://ci.phncdn.com/videos/201708/16/128808371/original/(m=eaf8Ggaaaa)(mh=NwTsEC8bCUfrFFzM)3.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5af81d3a4e1c0</td><td>Stacey Poole in HOT blue satin panties and pantyhose</td><td>https://ci.phncdn.com/videos/201805/13/165963021/original/(m=eaf8Ggaaaa)(mh=3HrbUBmC1NXfss7E)5.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=ph5649f17ac53e2</td><td>体操服の美少女JKが発情して机の角にまんこを擦りつけて一人エッチに夢中</td><td>https://ci.phncdn.com/videos/201511/16/61748201/original/(m=eaf8Ggaaaa)(mh=EwjjVsO-rBz6BJLF)1.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5aa3f728e35c6</td><td>Stacey Poole - Blue Dress with White Dots</td><td>https://ci.phncdn.com/videos/201803/10/157606552/original/(m=eaf8Ggaaaa)(mh=ydqFUi5XjoICav7f)5.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=1015219089&pkey=14883271</td><td>Mofos - Amy Quinn makes studding hot</td><td>https://di.phncdn.com/videos/201406/20/28380861/original/(m=eaf8Ggaaaa)(mh=Jsm_JI87rhJWbTBd)0.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=ph5c63624dca727</td><td>Cute teen from small town gets a big dick in her **full video**</td><td>https://ci.phncdn.com/videos/201902/13/207409091/original/(m=eaf8Ggaaaa)(mh=wcdsjQ_NgiEwkT9L)1.jpg</td></tr><tr><td>https://rt.pornhub.com/view_video.php?viewkey=1015219089&pkey=14883271</td><td>Mofos - Amy Quinn makes studding hot</td><td>https://ci.phncdn.com/videos/201406/20/28380861/original/(m=eaf8Ggaaaa)(mh=Jsm_JI87rhJWbTBd)0.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=705504768&pkey=40793002</td><td>Galitsin - Oil babes (117)(Liza and Natia)</td><td>https://ci.phncdn.com/videos/201404/01/24991662/original/(m=eaf8Ggaaaa)(mh=aQp9R0GzV8lePzjn)11.jpg</td></tr><tr><td>https://fr.pornhub.com/view_video.php?viewkey=ph5af6eb2c4616f</td><td>Joe in super sexy satin lingerie . What an ass!</td><td>https://ci.phncdn.com/videos/201805/12/165834661/original/(m=eaf8Ggaaaa)(mh=rtd7yiCHDnLxJaka)11.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=ph5c530b4c3d9b5</td><td>Lucky Guy Fucks Thick Teen Slut From Tinder - Serena Skye</td><td>https://ci.phncdn.com/videos/201901/31/204963792/thumbs_40/(m=eaf8Ggaaaa)(mh=AnvYwwNt6thpGQnH)16.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=1004430518&pkey=21838581</td><td>Lexi Fucks</td><td>https://di.phncdn.com/videos/201501/06/37054641/original/(m=eaf8Ggaaaa)(mh=3raNfRaWEpiEgX1l)8.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=ph563a496c37bb4</td><td>VERY YOUNG PETITE TEEN FUCKING OLDER GUY</td><td>https://ci.phncdn.com/videos/201511/04/60905821/original/(m=eaf8Ggaaaa)(mh=05fshuM4x7Ig-3Nh)10.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=ph56ffe7089acb8</td><td>エッチに興味津々な美少女が棒をまんこに必死に擦りつけて一人エッチに夢中</td><td>https://ci.phncdn.com/videos/201604/02/72712752/original/(m=eaf8Ggaaaa)(mh=4xmirhBOIkS5N2EE)10.jpg</td></tr><tr><td>https://fr.pornhub.com/view_video.php?viewkey=ph59924f7a50a8e</td><td>OS Rae</td><td>https://ci.phncdn.com/videos/201708/15/128630011/original/(m=eaf8Ggaaaa)(mh=83MwvT4gHWGDwDpR)13.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5c7725f0bc523</td><td>Big cock fit stud jerking and flexing</td><td>https://ci.phncdn.com/videos/201902/28/210293161/original/(m=eaf8Ggaaaa)(mh=5McD-v5ac_xiU_NV)8.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph56ffe7089acb8</td><td>Jav fh14 Uncensored</td><td>https://ci.phncdn.com/videos/201604/02/72712752/original/(m=eaf8Ggaaaa)(mh=4xmirhBOIkS5N2EE)10.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=ph5b590847deea1</td><td>Bratty Sis - Lana Rhoades Big Ass Bouncing On My Cock S5:E2</td><td>https://ci.phncdn.com/videos/201807/25/176011171/original/(m=eaf8Ggaaaa)(mh=0sWA72GXW885rLRw)0.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=1590187130</td><td>Small titted cutie rubs her meaty cunt</td><td>https://ci.phncdn.com/videos/201312/23/21198701/original/(m=eaf8Ggaaaa)(mh=PiFCMpBa0Aj3SzoE)15.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5b7dc75f5ee81</td><td>Extreme Pussy Closeup!</td><td>https://ci.phncdn.com/videos/201808/22/179719211/original/(m=eaf8Ggaaaa)(mh=jnhoYMRFgbCPP3k6)1.jpg</td></tr><tr><td>https://nl.pornhub.com/view_video.php?viewkey=ph5c7725f0bc523</td><td>Big cock fit stud jerking and flexing</td><td>https://ci.phncdn.com/videos/201902/28/210293161/original/(m=eaf8Ggaaaa)(mh=5McD-v5ac_xiU_NV)8.jpg</td></tr><tr><td>https://es.pornhub.com/view_video.php?viewkey=ph5b590847deea1</td><td>Bratty Sis - Lana Rhoades Culo Grande Botando En Mi Polla S5:E2</td><td>https://ci.phncdn.com/videos/201807/25/176011171/original/(m=eaf8Ggaaaa)(mh=0sWA72GXW885rLRw)0.jpg</td></tr><tr><td>https://nl.pornhub.com/view_video.php?viewkey=1004499691&pkey=5315391</td><td>TBA Lolita Cheng 15h</td><td>https://ci.phncdn.com/videos/201503/02/45547421/original/(m=eaf8Ggaaaa)(mh=DvWhgo_AMKkcymun)6.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=1067269321&pkey=30662761</td><td>Busty webcam tease</td><td>https://di.phncdn.com/videos/201306/25/13943981/original/(m=eaf8Ggaaaa)(mh=IyrFTeBOUgXtGwZN)1.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=117785157&pkey=68529122</td><td>巨乳のむっちりお姉さん達がお昼からホテルでヤリチン男達と乱交プレイ</td><td>https://ci.phncdn.com/videos/201404/27/26092182/original/(m=eaf8Ggaaaa)(mh=PwBkuOsGwRtZyq0l)12.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5c60809521370</td><td>Perfect body teen in a sneakers riding big cock | 18 yo amateur squirt</td><td>https://ci.phncdn.com/videos/201902/10/206995091/original/(m=eaf8Ggaaaa)(mh=qfh7oZ8Nlmkop4yB)8.jpg</td></tr><tr><td>https://rt.pornhub.com/view_video.php?viewkey=ph59924f7a50a8e</td><td>OS Rae</td><td>https://ci.phncdn.com/videos/201708/15/128630011/original/(m=eaf8Ggaaaa)(mh=83MwvT4gHWGDwDpR)13.jpg</td></tr><tr><td>https://es.pornhub.com/view_video.php?viewkey=ph5c6f503b84fd9</td><td>Sexo duro de perrito a una puta rockera</td><td>https://ci.phncdn.com/videos/201902/22/209127951/thumbs_10/(m=eaf8Ggaaaa)(mh=nASjjjhAVYFeCIDX)6.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5b6ac8d5f2673</td><td>Ass Smashin’ Big Dick – FX Rios, Danny Gunn</td><td>https://ci.phncdn.com/videos/201808/08/177767761/original/(m=eaf8Ggaaaa)(mh=8pTfhb49yOq2-pcG)6.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=1689489014</td><td>Young Teen Foxi Fucked</td><td>https://ci.phncdn.com/videos/201403/31/24963522/original/(m=eaf8Ggaaaa)(mh=jsRLq8dWT7lvWSP7)6.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5c7dddd55b83a</td><td>Horny 18 Squirts with Anal Plug</td><td>https://ci.phncdn.com/videos/201903/05/211281581/original/(m=eaf8Ggaaaa)(mh=Nzn8vMWNUp7T0fLX)10.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=ph57278056bb3be</td><td>galitsin</td><td>https://ci.phncdn.com/videos/201605/02/75560661/original/(m=eaf8Ggaaaa)(mh=OxlVPLE13Tzwhvfu)6.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5c817d5244183</td><td>Petite MILF Stepmom & Bratty Step Sister Teach You How to Jerk Off!</td><td>https://ci.phncdn.com/videos/201903/07/211794862/original/(m=qH5HJOUbeaf8Ggaaaa)(mh=uTOJcRwuK2UAaSVN)0.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=ph583500b5a9fc1&pkey=41525172</td><td>Pretty Teenage Lesbos Fuck Party With 2 Old Grandpas</td><td>https://ci.phncdn.com/videos/201611/23/96995912/original/(m=eaf8Ggaaaa)(mh=r62i2AC8EINzlRvq)4.jpg</td></tr><tr><td>https://fr.pornhub.com/view_video.php?viewkey=ph5ba0a301685ab</td><td>Petite étudiante coquine ne veut pas réviser mais préfère baiser et sucer</td><td>https://ci.phncdn.com/videos/201809/18/183480811/thumbs_20/(m=eaf8Ggaaaa)(mh=nBl1eMOeQnzmr29F)12.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=ph5af6efd76a73e</td><td>Kristina teasing in HOT black satin panties and pantyhose</td><td>https://ci.phncdn.com/videos/201805/12/165836891/original/(m=eaf8Ggaaaa)(mh=AWUd4HvhEcj3TUzv)11.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=1810053541</td><td>Cute first timer fucks her tight twat</td><td>https://ci.phncdn.com/videos/201408/04/30309082/original/(m=eaf8Ggaaaa)(mh=HeUwkjH4ktO_xSYI)2.jpg</td></tr><tr><td>https://de.pornhub.com/view_video.php?viewkey=ph58bf5eefa53cb</td><td>Japanese girl humping</td><td>https://ci.phncdn.com/videos/201703/08/108878912/original/(m=eaf8Ggaaaa)(mh=Mu126RfK6IWTMEdJ)14.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph57b31ac8b7a6a</td><td>TeenFuns Dori</td><td>https://ci.phncdn.com/videos/201608/16/86019681/original/(m=eaf8Ggaaaa)(mh=1w1vsZIs2pjrLR3r)2.jpg</td></tr><tr><td>https://pl.pornhub.com/view_video.php?viewkey=ph57b31ac8b7a6a</td><td>TeenFuns Dori</td><td>https://ci.phncdn.com/videos/201608/16/86019681/original/(m=eaf8Ggaaaa)(mh=1w1vsZIs2pjrLR3r)2.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=ph564274896c599</td><td>milena d outside</td><td>https://ci.phncdn.com/videos/201511/10/61337971/original/(m=eaf8Ggaaaa)(mh=-8Rf8a6mOlDljlKI)11.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=ph5994891172ebb</td><td>黒髪ロングの激カワ美人OLが会議室でまんこを机に擦りつけて一人エッチ</td><td>https://ci.phncdn.com/videos/201708/16/128821971/original/(m=eaf8Ggaaaa)(mh=IUseEf39uuiMvw9z)11.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5c79065315bc3</td><td>Amateur Teen Gets Her First Facial</td><td>https://ci.phncdn.com/videos/201903/01/210568071/original/(m=q0LOYNUbeaf8Ggaaaa)(mh=45P7fhC2aDnX697q)0.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=ph56ef40f89c7ba</td><td>Millena D & Sonia Teen Lesbian Plays</td><td>https://di.phncdn.com/videos/201603/21/71595041/original/(m=eaf8Ggaaaa)(mh=1l3woAG1vOwAJWiJ)2.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=1996449162</td><td>SEXY AMATEUR TEEN GIRL SUNNA MINNIEMOUSE AA 4H0T1</td><td>https://ci.phncdn.com/videos/201507/23/53550371/original/(m=eaf8Ggaaaa)(mh=_ga-F-e2f5YOHzi_)11.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph57b31ac8e2174</td><td>TeenFuns Ellen</td><td>https://ci.phncdn.com/videos/201608/16/86019721/original/(m=eaf8Ggaaaa)(mh=Hygy6hypdNzGkHtG)3.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=ph5b297161cdebe&pkey=80081891</td><td>Jodi Taylor fuck porn</td><td>https://ci.phncdn.com/videos/201806/19/171084681/original/(m=eaf8Ggaaaa)(mh=gnjp_u2_9jSZH6KT)11.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5c61aa3ed0511</td><td>[Role-Play] Naughty School FemBoy gets Bareback Punished by Fit Teacher</td><td>https://di.phncdn.com/videos/201902/11/207151331/original/(m=eaf8Ggaaaa)(mh=DnZyLicSM6cmAz1R)13.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=ph5af81a1af3f62</td><td>Brandy showing of her sexy body in black satin panties</td><td>https://ci.phncdn.com/videos/201805/13/165961891/original/(m=eaf8Ggaaaa)(mh=qN8DjZMyXZGXrl3M)13.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=ph5c79065315bc3</td><td>Amateur Teen Gets Her First Facial</td><td>https://di.phncdn.com/videos/201903/01/210568071/original/(m=q0LOYNUbeaf8Ggaaaa)(mh=45P7fhC2aDnX697q)0.jpg</td></tr><tr><td>https://nl.pornhub.com/view_video.php?viewkey=ph5a1481ecead16</td><td>Zoete Japanse lesbiennes</td><td>https://ci.phncdn.com/videos/201711/21/142087522/original/(m=eaf8Ggaaaa)(mh=HxZKHwBrOByK2ZtF)16.jpg</td></tr><tr><td>https://nl.pornhub.com/view_video.php?viewkey=ph59643d7ced685</td><td>Jbtebv</td><td>https://di.phncdn.com/videos/201707/11/123949071/original/(m=eaf8Ggaaaa)(mh=0E6eFpyXWIRuzx22)12.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=632683254&pkey=52382771</td><td>teen masturbates in front of a mirror</td><td>https://ci.phncdn.com/videos/201507/22/53467931/original/(m=eaf8Ggaaaa)(mh=yHIex4-NYdJpk3b6)12.jpg</td></tr><tr><td>https://rt.pornhub.com/view_video.php?viewkey=ph58fc4bfd51df6</td><td>BoppingBabes - Sarah G Naked Massage</td><td>https://ci.phncdn.com/videos/201704/23/114155781/original/(m=eaf8Ggaaaa)(mh=11qUE30P9zZ3JDbq)14.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=533488727&pkey=13678061</td><td>Alexa,Liza y Katia</td><td>https://ci.phncdn.com/videos/200911/22/1002286/original/(m=eaf8Ggaaaa)(mh=7Slj3NDetCQKvTVn)16.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=1062535995&pkey=5797441</td><td>Officially cracked in half!</td><td>https://ci.phncdn.com/videos/201505/20/49265961/original/(m=eaf8Ggaaaa)(mh=wCRtpcOx8qUCHd1k)16.jpg</td></tr><tr><td>https://fr.pornhub.com/view_video.php?viewkey=1098216357&pkey=44513512</td><td>Gorgeous French brunette shows off her lingerie & masturbates</td><td>https://ci.phncdn.com/videos/201302/01/9477771/original/(m=eaf8Ggaaaa)(mh=rgZ2v5dPuGimRTzn)11.jpg</td></tr><tr><td>https://rt.pornhub.com/view_video.php?viewkey=ph5c83d42278f92</td><td>Маленькая детка в колготках трахается так классно, что он забыл вытащить</td><td>https://ci.phncdn.com/videos/201903/09/212123622/thumbs_35/(m=eaf8Ggaaaa)(mh=SNKpYciLDOY0xc5L)6.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5753f513237af</td><td>CDBBPrem</td><td>https://ci.phncdn.com/videos/201606/05/78647131/original/(m=eaf8Ggaaaa)(mh=sQ44sPC0052nVLgV)13.jpg</td></tr><tr><td>https://pl.pornhub.com/view_video.php?viewkey=705504768&pkey=40793002</td><td>Galitsin - Oil babes (117)(Liza and Natia)</td><td>https://ci.phncdn.com/videos/201404/01/24991662/original/(m=eaf8Ggaaaa)(mh=aQp9R0GzV8lePzjn)11.jpg</td></tr><tr><td>https://pl.pornhub.com/view_video.php?viewkey=ph5c7856a0a3d55</td><td>Amateur Girl Passionately Pegging Him To Massive Cumshot - She Can Pound!</td><td>https://di.phncdn.com/videos/201902/28/210471051/original/(m=q593WNUbeaf8Ggaaaa)(mh=1Bi4FTaQFtBX8rhx)0.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph562e12abe42e2</td><td>Masturbation in homemade</td><td>https://ci.phncdn.com/videos/201510/26/60305761/original/(m=eaf8Ggaaaa)(mh=IWkgFRi1s_kOVGSf)1.jpg</td></tr><tr><td>https://rt.pornhub.com/view_video.php?viewkey=ph55dc9012104f8&pkey=82544131</td><td>YiYa - Мост в Терабитию</td><td>https://di.phncdn.com/videos/201508/25/55768551/original/(m=eaf8Ggaaaa)(mh=BCRnMF_0ULxrUDWK)0.jpg</td></tr><tr><td>https://rt.pornhub.com/view_video.php?viewkey=ph5c5cc548c7b44</td><td>Занялся сексом со студенткой и кончил ей в киску</td><td>https://ci.phncdn.com/videos/201902/08/206419291/original/(m=eaf8Ggaaaa)(mh=0crFbIhhYBkQ9cnz)11.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=ph599462a6b0a19</td><td>欲求不満のむっちり人妻が我慢できず机の角にまんこを擦りつけてオナニー</td><td>https://ci.phncdn.com/videos/201708/16/128808371/original/(m=eaf8Ggaaaa)(mh=NwTsEC8bCUfrFFzM)3.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=356856281</td><td>Beautiful Beata Undine gets laid with oldman backyard</td><td>https://ci.phncdn.com/videos/201408/25/31159012/original/(m=eaf8Ggaaaa)(mh=jbDVG0Za8f_ZMcQT)5.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=486461791</td><td>色白のむっちりお姉さん達が極太ディルドに跨って喘ぎながら腰振り</td><td>https://di.phncdn.com/videos/201312/16/20960971/original/(m=eaf8Ggaaaa)(mh=ppPPfJdNq85wX9Ai)1.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=1001373730&pkey=40495882</td><td>russian soldiers</td><td>https://ci.phncdn.com/videos/201312/08/20641691/original/(m=eaf8Ggaaaa)(mh=mzrKpJ-upoxWR4hz)4.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=356856281</td><td>La Bella Beata Undine scopa con un vecchio nel retro</td><td>https://ci.phncdn.com/videos/201408/25/31159012/original/(m=eaf8Ggaaaa)(mh=jbDVG0Za8f_ZMcQT)5.jpg</td></tr><tr><td>https://nl.pornhub.com/view_video.php?viewkey=ph5c6f503b84fd9</td><td>Sexo duro de perrito a una puta rockera</td><td>https://ci.phncdn.com/videos/201902/22/209127951/thumbs_10/(m=eaf8Ggaaaa)(mh=nASjjjhAVYFeCIDX)6.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph58b9b676968cb</td><td>Zander Craze & Leo Alexander Flip flip fuck</td><td>https://ci.phncdn.com/videos/201703/03/108339582/original/(m=eaf8Ggaaaa)(mh=ZQRobfHqCGrcL5kO)13.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=1062535995&pkey=5797441</td><td>Officially cracked in half!</td><td>https://ci.phncdn.com/videos/201505/20/49265961/original/(m=eaf8Ggaaaa)(mh=wCRtpcOx8qUCHd1k)16.jpg</td></tr><tr><td>https://pl.pornhub.com/view_video.php?viewkey=ph5c7638a076381</td><td>Amateur homemade anal femdom pegging - Missionary pegging POV</td><td>https://ci.phncdn.com/videos/201902/27/210146641/thumbs_5/(m=eaf8Ggaaaa)(mh=NLRJtBBXIIC4QcEa)2.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=ph5c069603694c5</td><td>Little Brother Fights With Dad And Comes To Me For Consolation</td><td>https://ci.phncdn.com/videos/201812/04/195122281/original/(m=eaf8Ggaaaa)(mh=iXGwI72ZlasZabhZ)1.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5c7be19fa102d</td><td>Tinder Girl with Amazing Big Ass Loves Doggystyle</td><td>https://ci.phncdn.com/videos/201903/03/210980861/original/(m=eaf8Ggaaaa)(mh=7zZuwotY8LmvkRpo)14.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph591fa58a63119</td><td>Onlytease - Scarlett t</td><td>https://ci.phncdn.com/videos/201705/20/117108021/original/(m=eaf8Ggaaaa)(mh=u0feRgvY4qZI8QDY)10.jpg</td></tr><tr><td>https://jp.pornhub.com/view_video.php?viewkey=ph5c4a590761436</td><td>Mom & Step Son Have Breakfast in Bed - Amber Chase - Family Therapy</td><td>https://ci.phncdn.com/videos/201901/25/203632791/thumbs_35/(m=eaf8Ggaaaa)(mh=nWwzQaHnq94AXBKN)9.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph59924f7a50a8e</td><td>OS Rae</td><td>https://di.phncdn.com/videos/201708/15/128630011/original/(m=eaf8Ggaaaa)(mh=83MwvT4gHWGDwDpR)13.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=ph56ffe7089acb8</td><td>Giapponese gode senza censura</td><td>https://ci.phncdn.com/videos/201604/02/72712752/original/(m=eaf8Ggaaaa)(mh=4xmirhBOIkS5N2EE)10.jpg</td></tr><tr><td>https://cz.pornhub.com/view_video.php?viewkey=ph5b012aee82df5&pkey=83289291</td><td>私人玩物水晶棒1</td><td>https://di.phncdn.com/videos/201805/20/166930872/original/(m=eaf8Ggaaaa)(mh=rLfhdeoXklTMsDN8)1.jpg</td></tr><tr><td>https://fr.pornhub.com/view_video.php?viewkey=ph59643d7ced685</td><td>Trans et écolière baisent dans la classe après les cours</td><td>https://ci.phncdn.com/videos/201707/11/123949071/original/(m=eaf8Ggaaaa)(mh=0E6eFpyXWIRuzx22)12.jpg</td></tr><tr><td>https://it.pornhub.com/view_video.php?viewkey=ph5c3abc5070c24</td><td>SHE CANT STOP SQUIRTING AND MOANING</td><td>https://ci.phncdn.com/videos/201901/13/201504071/original/(m=eaf8Ggaaaa)(mh=4dvbrpzczza17JgN)8.jpg</td></tr><tr><td>https://rt.pornhub.com/view_video.php?viewkey=ph5c6086fba8ad0</td><td>Трахнул молодую соску! - SolaZola</td><td>https://ci.phncdn.com/videos/201902/10/207002421/thumbs_9/(m=eaf8Ggaaaa)(mh=ctJmYNKRfcON30nq)12.jpg</td></tr><tr><td>https://www.pornhub.com/view_video.php?viewkey=ph5ac82e7d3fb34</td><td>hot selfie girl masturbation</td><td>https://ci.phncdn.com/videos/201804/07/161107702/original/(m=eaf8Ggaaaa)(mh=O_9Q-xD_LNwfUp4q)7.jpg</td></tr></table></body></html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class HtmlOutputer(object): | |
def __init__(self): | |
self.datas = [] | |
def collect_data(self, data): | |
if data is None: | |
return | |
self.datas.append(data) | |
def output_html(self): | |
fout = open('output.html', 'w', encoding='utf-8') | |
fout.write('<html>') | |
fout.write('<body>') | |
fout.write('<table>') | |
# ascii | |
for data in self.datas: | |
fout.write('<tr>') | |
fout.write('<td>%s</td>' % data['url']) | |
fout.write('<td>%s</td>' % data['title']) | |
fout.write('<td><img src="%s" class="img"/></td>' % data['src']) | |
fout.write('</tr>') | |
fout.write('</table>') | |
fout.write('</body>') | |
fout.write('</html>') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib.parse | |
import re | |
class HtmlParser(object): | |
def _get_new_urls(self, page_url, soup): | |
new_urls = set() | |
# /view/123.html | |
# /item/Python/407313 | |
links = soup.find_all('a', href=re.compile(r"/view_video.php\?viewkey=.+")) | |
for link in links: | |
new_url = link['href'] | |
new_full_url = urllib.parse.urljoin(page_url, new_url) | |
new_urls.add(new_full_url) | |
return new_urls | |
def _get_new_data(self, page_url, soup): | |
res_data = {} | |
res_data['url'] = page_url | |
title = soup.find('span', class_='inlineFree')# .find("h1") | |
res_data['title'] = title.get_text() | |
img = soup.find('div', class_='thumbnail').find("img") | |
res_data['src'] = img.get('src') | |
# <dd class="lemmaWgt-lemmaTitle-title"> <h1>Python</h1> | |
#nodes = soup.findAll('li', class_="videoblock")# .find("h1") | |
""" | |
for node in nodes: | |
new_data = {} | |
new_data['data-id'] = node.get('data-id') | |
new_data['view_key'] = node.get('view_key') | |
res_data[page_url] = new_data | |
""" | |
#res_data['title'] = title_node.get_text() | |
return res_data | |
def parse(self, page_url, html_cont): | |
if page_url is None or html_cont is None: | |
return | |
soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8') | |
new_urls = self._get_new_urls(page_url, soup) | |
new_data = self._get_new_data(page_url, soup) | |
return new_urls, new_data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# conding=uft-8 | |
from pornhub import url, download, parser, outputer | |
class SpiderMain(object): | |
def __init__(self): | |
self.urls = url.UrlManager() | |
self.downloader = download.HtmlDownloader() | |
self.parser = parser.HtmlParser() | |
self.outputer = outputer.HtmlOutputer() | |
def craw(self, root_url): | |
count = 1 | |
self.urls.add_new_url(root_url) | |
while self.urls.has_new_url(): | |
try: | |
new_url = self.urls.get_new_url() | |
print ('crawl %d : %s' % (count, new_url)) | |
html_cont = self.downloader.download(new_url) | |
new_urls, new_data = self.parser.parse(new_url, html_cont) | |
self.urls.add_new_urls(new_urls) | |
self.outputer.collect_data(new_data) | |
if count == 100: | |
print ('Success!') | |
break | |
count = count + 1 | |
except: | |
print ('crawl failed') | |
self.outputer.output_html() | |
if __name__=="__main__": | |
root_url = "https://www.pornhub.com/view_video.php?viewkey=ph599489118380e" | |
obj_spider = SpiderMain() | |
obj_spider.craw(root_url) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class UrlManager(object): | |
def __init__(self): | |
self.new_urls = set() | |
self.old_urls = set() | |
def add_new_url(self, url): | |
if url is None: | |
return | |
if url not in self.new_urls and url not in self.old_urls: | |
self.new_urls.add(url) | |
def add_new_urls(self, urls): | |
if urls is None or len(urls) == 0: | |
return | |
for url in urls: | |
self.add_new_url(url) | |
def has_new_url(self): | |
return len(self.new_urls) != 0 | |
def get_new_url(self): | |
new_url = self.new_urls.pop() | |
self.old_urls.add(new_url) | |
return new_url |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment