Created
March 17, 2013 05:08
-
-
Save kmod-midori/5180224 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Generated by CoffeeScript 1.6.1 | |
(function() { | |
var cur_page, flowless, fs, get_tags, jquery, jsdom, keywords, start, _; | |
fs = require('fs'); | |
jsdom = require('jsdom'); | |
flowless = require('flowless'); | |
jquery = fs.readFileSync('../jquery.js').toString(); | |
_ = require('underscore'); | |
cur_page = 1; | |
keywords = ["GB", "BIG5", "MP4", "PC", "PV", "PSP", "RMVB", "720P", "480P"]; | |
start = function(callback) { | |
return flowless.runSeq([ | |
function(cb) { | |
return jsdom.env({ | |
html: "http://share.dmhy.org/topics/list/page/" + cur_page, | |
src: [jquery], | |
done: cb | |
}); | |
}, function(window, cb) { | |
var $, rows; | |
$ = window.$; | |
rows = []; | |
$('#topic_list td.title').each(function(i, v) { | |
var tags, title; | |
v = $(v); | |
title = v.children('a').text(); | |
console.log('===================================='); | |
tags = get_tags(title); | |
return console.log(_.flatten(tags)); | |
}); | |
return cb(); | |
} | |
], function(err) { | |
return console.log(err); | |
}); | |
}; | |
exports.start = start; | |
start(); | |
get_tags = function(raw) { | |
var pattern, patterns, sp, tag, tmp, val, _i, _len, _raw; | |
if (raw === '') { | |
return ''; | |
} | |
if (raw == null) { | |
return ''; | |
} | |
raw = raw.replace(/☪|★|☆/g, ''); | |
pattern = /\[(.+?)\]|【(.+?)\】|\((.+?)\)|『(.+?)\』|~(.+?)\~|◆(.+?)◇/g; | |
tmp = raw.match(pattern); | |
tmp = _.compact(tmp); | |
if (tmp != null) { | |
tmp = (function() { | |
var _i, _len, _results; | |
_results = []; | |
for (_i = 0, _len = tmp.length; _i < _len; _i++) { | |
val = tmp[_i]; | |
_results.push(val.slice(1, -1)); | |
} | |
return _results; | |
})(); | |
} else { | |
tmp = []; | |
} | |
_raw = raw.replace(pattern, '').split(/\s+/g); | |
if ((_raw != null) && _raw.length > 0) { | |
for (_i = 0, _len = _raw.length; _i < _len; _i++) { | |
val = _raw[_i]; | |
if (val.replace(/\s/g, '' === !val)) { | |
tmp.push(val); | |
} | |
} | |
} | |
tmp = (function() { | |
var _j, _len1, _results; | |
_results = []; | |
for (_j = 0, _len1 = tmp.length; _j < _len1; _j++) { | |
tag = tmp[_j]; | |
tag = tag.replace(/\s|&|&|&|\/|/|_|\|/g, ' '); | |
_results.push(tag = tag.split(/\s/g)); | |
} | |
return _results; | |
})(); | |
tmp = _.flatten(tmp); | |
patterns = [/(第(\d+)话)/, /(第(\d+)話)/, /(第(\d+)集)/, /(CH(\d+))/i, /(VOL\.?(\d+))/i]; | |
return tmp = (function() { | |
var _j, _len1, _results; | |
_results = []; | |
for (_j = 0, _len1 = tmp.length; _j < _len1; _j++) { | |
tag = tmp[_j]; | |
_results.push((function() { | |
var _k, _len2, _results1; | |
_results1 = []; | |
for (_k = 0, _len2 = patterns.length; _k < _len2; _k++) { | |
pattern = patterns[_k]; | |
sp = tag.split(pattern); | |
if (sp.length === 1) { | |
break; | |
} else { | |
_results1.push(void 0); | |
} | |
} | |
return _results1; | |
})()); | |
} | |
return _results; | |
})(); | |
}; | |
/* | |
restore_english = (arr)-> | |
arr = _.compact arr | |
return arr[0] if arr.length is 1 | |
console.log '+=+=+=' | |
tmp = [] | |
for val in arr | |
if /^[A-Za-z0-9!]+$/.test val then tmp.push val else tmp.push false | |
tmp = _.compact tmp | |
console.log tmp.join '|' | |
arr | |
*/ | |
}).call(this); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment