Last active
August 9, 2016 08:37
-
-
Save sillykelvin/8630120 to your computer and use it in GitHub Desktop.
A node.js script for sohu video downloading
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
var fs = require('fs'); | |
var http = require('http'); | |
var request = require('request'); | |
var urlListFile = 'url.list'; | |
if (!fs.existsSync(urlListFile)) { | |
console.error('[ERROR] the url list file does not exist.'); | |
return; | |
} | |
var videoUrls = fs.readFileSync(urlListFile).toString().split('\n'); | |
videoUrls = videoUrls.filter(function (url) { return url; }); // remove empty lines | |
for (var v = 0; v < videoUrls.length; v++) { | |
console.log('[ INFO] Start to analyse the video url: ' + videoUrls[v] + ' ...'); | |
analyzeVideoUrl(videoUrls[v]); | |
} | |
function analyzeVideoUrl(url) { | |
request(url, function (err, res, body) { | |
if (err) { | |
console.log("[ERROR] Error opening video url: " + err); | |
return; | |
} | |
// console.log(body); | |
var result = /var vid="(.*)"/gim.exec(body); | |
if (!result) { | |
console.error('[ERROR] Invalid html response.'); | |
return; | |
} | |
var vid = result[1]; | |
/*** | |
* The video quality is relevant to vid, the rule is as below: | |
* 1. 高清: vid (default) | |
* 2. 标清: vid + 1 | |
* 3. 超清: vid + 2 | |
* 4. 原画: vid + 3 | |
***/ | |
var highestQualityVid = parseInt(vid) + 3; | |
// var jsonUrl = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' + vid; | |
var jsonUrl = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' + highestQualityVid; | |
analyzeJsonUrl(jsonUrl); | |
}); | |
} | |
function analyzeJsonUrl(url) { | |
request(url, function (err, res, body) { | |
if (err) { | |
console.error("[ERROR] Error fetching json data: " + err); | |
return; | |
} | |
// console.log(body); | |
var json = JSON.parse(body); | |
var host = json['allot']; | |
var prot = json['prot']; | |
var data = json['data']; | |
var title = data['tvName']; | |
console.log('[ INFO] Url analysis finished, video: ' + title + ', start to analyze the downloading url...'); | |
var sizes = data['clipsBytes']; | |
var clipUrls = data['clipsURL']; | |
var sus = data['su']; | |
if (clipUrls.length != sus.length || clipUrls.length != sizes.length) { | |
console.error('[ERROR] Parameter length mismatch.'); | |
return; | |
} | |
var videoProperties = {}; | |
videoProperties.name = title; | |
videoProperties.finishedCount = 0; | |
videoProperties.totalCount = clipUrls.length; | |
videoProperties.dir = title + '/'; | |
for (var i = 0; i < clipUrls.length; ++i) { | |
var newUrl = 'http://' + host + '/?prot=' + prot + '&file=' + clipUrls[i] + '&new=' + sus[i]; | |
// console.log('url: ' + newUrl); | |
analyzeNewUrl(newUrl, sus[i], i, videoProperties); | |
} | |
}); | |
} | |
function analyzeNewUrl(url, su, episodeSequence, videoProperties) { | |
request(url, function (err, res, body) { | |
if (err) { | |
console.error('[ERROR] Error fetching downloading url: ' + err.message); | |
return; | |
} | |
var items = body.split('|'); | |
var downloadUrl = items[0].substr(0, items[0].length - 1) + su + '?key=' + items[3]; | |
console.log('[ INFO] The downloading url is retrieved: ' + downloadUrl + ', start to download...'); | |
var pad = function (num, size) { | |
var s = num + ''; | |
while(s.length < size) s = '0' + s; | |
return s; | |
}; | |
var seq = pad(episodeSequence, videoProperties.totalCount.toString().length); | |
var filename = 'Episode' + seq + '.mp4'; | |
downloadVideo(downloadUrl, filename, videoProperties); | |
}); | |
} | |
function downloadVideo(url, filename, videoProperties) { | |
var fullpath = videoProperties.dir + filename; | |
var reqOptions = { | |
method: 'GET', | |
url: url, | |
encoding: null | |
}; | |
request(reqOptions, function (err, res, body) { | |
if (err) { | |
console.error('[ERROR] Error fetching video: ' + err); | |
console.log('[ INFO] Failed to download ' + url + ', retrying...'); | |
downloadVideo(url, filename, videoProperties); | |
return; | |
} | |
if(res.statusCode != 200) { | |
console.error('[ERROR] Status code error: ' + res.statusCode + ', body: ' + body); | |
console.log('[ INFO] Failed to download ' + url + ', retrying...'); | |
downloadVideo(url, filename, videoProperties); | |
return; | |
} | |
if(!fs.existsSync(videoProperties.dir)) { | |
fs.mkdirSync(videoProperties.dir); | |
} | |
++videoProperties.finishedCount; | |
console.log('[ INFO] One episode [' + filename + '] of ' + videoProperties.name + ' is finished, writing to hard drive...'); | |
fs.writeFile(fullpath, body, function(e) { | |
if (e) { | |
console.error('[ERROR] Error writing video to hard drive: ' + e); | |
return; | |
} | |
console.log('[ INFO] Episode ' + fullpath + ' has been written to hard drive.'); | |
if(videoProperties.finishedCount >= videoProperties.totalCount) { | |
console.log('[ INFO] All episodes of ' + videoProperties.name + ' are downloaded.'); | |
} | |
}); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment