Skip to content

Instantly share code, notes, and snippets.

@sang4lv
Last active December 21, 2015 05:59
Show Gist options
  • Save sang4lv/6261141 to your computer and use it in GitHub Desktop.
Save sang4lv/6261141 to your computer and use it in GitHub Desktop.
Scraper based on Socket
var net = require('net');
var socket = net.Socket();
socket.on('error', function(error) {
console.log(error);
});
process.argv.forEach(function (value, index, array) {
if ( value !== "node" &&
value !== __filename ) processValue(value);
});
function processValue(value, loop) {
value = value.split("/");
var options = {};
var setting = {};
options.port = 80;
setting.path = "/";
if( value.length === 1 ) {
options.host = value[0];
} else {
if( value[0] === "http:" ||
value[0] === "https:" ) {
options.host = value[2];
setting.path += value.slice(3).join("/");
}
}
setting.request = 'GET ' + setting.path + ' HTTP/1.1\r\nUser-Agent: Mozilla 5.0\r\nHost: ' + options.host + '\r\n\r\n';
initiateSocket(options, setting, loop);
}
function initiateSocket(options, setting, loop) {
socket = net.connect(options, function() {
socket.write(setting.request);
});
socket.on('data', function(data) {
data = data.toString();
if( data.indexOf("301 Moved Permanently") === -1 &&
data.indexOf("302 Moved Temporarily") === -1 &&
data.indexOf("302 Found") === -1 ) {
//No Redirect
socket.write(data);
//console.log(data);
if( data.indexOf("</html>") !== -1 ) {
socket.end();
}
} else {
loop = true;
socket.end();
var startIndex = data.indexOf("\nLocation: ") + "Location: ".length;
var endIndex = data.indexOf("\n", startIndex);
var newPlace = data.slice(startIndex + 1, endIndex);
processValue(newPlace, true);
}
});
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment