Last active
June 24, 2016 22:59
-
-
Save akirattii/efb54a532d6c8ee0c5cbc5747ca6cbf0 to your computer and use it in GitHub Desktop.
Getting some related words from wikipedia(ja) on Meteor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// server/hoge.js | |
const re = /\[\[(.+?)\]\]/g; // matches '*[[hoge]]' or '* [[hoge]]' | |
const reRedirect = /^\#REDIRECT\s?\[\[(.+)\]\]/; // matches '#REDIRECT [[hoge]]' | |
const reRelatedHeader = /\=\=\s?関連項目\s?\=\=/; | |
function getEndpoint(title) { | |
title = encodeURIComponent(title); | |
return `https://ja.wikipedia.org/w/api.php?action=query&titles=${title}&prop=revisions&rvprop=content&format=json`; | |
} | |
function getFirstProp(jsonObj) { | |
for (let key in jsonObj) { | |
if (jsonObj.hasOwnProperty(key)) { | |
return jsonObj[key]; | |
} | |
} | |
} | |
function extract(resultContent) { | |
let json = JSON.parse(resultContent); | |
// console.log(json.query["pages"]); | |
let page = getFirstProp(json.query["pages"]); | |
console.log("page", page); | |
contents = page["revisions"][0]["*"]; | |
console.log(contents); | |
let redirectArr = contents.match(reRedirect); | |
if (redirectArr) { | |
title = redirectArr[1]; | |
console.log("redirect:", title); | |
return getRelatedWords(title); | |
} | |
let lines = contents.split("\n"); | |
let startFlg = false; | |
let line; | |
let testFailCnt = 0; | |
let tmp = []; | |
let ret = []; | |
for (let i = 0; i < lines.length; i++) { | |
line = lines[i].trim(); | |
if (startFlg === false && line.match(reRelatedHeader)) { | |
// console.log("start line!"); | |
startFlg = true; | |
continue; | |
} | |
if (startFlg) { | |
// 収集開始 | |
tmp = line.match(re); // returns like this: [[aaa]], [[bbb|ccc]] | |
if (!tmp) { | |
testFailCnt += 1; | |
continue; | |
} | |
// tmp配列の値の "[[", "]]", "Category:"などをtriming | |
tmp.forEach(function(e){ | |
let x = e.replace(/\[\[|\]\]/g, "").split("|")[0].trim(); | |
// Category: ではないならpush | |
if (x.indexOf("Category:") === -1) { | |
console.log(x); | |
ret.push(x); | |
} | |
}); | |
} | |
if (startFlg && testFailCnt >= 2) { // 関連項目行開始後、無関係な行("*[[hoge]]"ではない行)が2回続いたら抜ける | |
// console.log("end line!"); | |
break; | |
} | |
} | |
return ret; | |
} | |
/** | |
* wikiの関連項目を取得する (async) | |
* | |
* @param {string} title | |
* @return {function} callback(array) | |
*/ | |
getRelatedWordsAsync = function(title, cb) { | |
// console.log("getRelatedWordsAsync!"); | |
let url = getEndpoint(title); | |
result = Meteor.http.get(url, function(err, res) { | |
let arr = extract(res.content); | |
return cb(arr); | |
}); | |
} | |
/** | |
* wikiの関連項目を取得する (sync) | |
* | |
* @param {string} title | |
* @return {Array} 関連項目一覧 | |
*/ | |
getRelatedWords = function(title) { | |
// console.log("getRelatedWords!"); | |
let url = getEndpoint(title); | |
res = Meteor.http.get(url); | |
return extract(res.content); | |
} | |
// server/main.js | |
import { Meteor } from 'meteor/meteor'; | |
Future = Npm.require('fibers/future'); | |
Meteor.startup(() => { | |
console.log("meteor starting"); | |
// let arr = getRelatedWords("自民党"); | |
// let arr = getRelatedWords("円相場"); | |
// getRelatedWordsAsync("ほげ", function(arr) { | |
// console.log(arr); | |
// }); | |
// console.log(arr); | |
Meteor.methods({ | |
'hello': function() { | |
console.log("hello called"); | |
let future = new Future(); // for using async func returns result to client | |
getRelatedWordsAsync("ほげ", function(arr) { | |
console.log(arr); | |
future.return(arr); | |
}); | |
return future.wait(); | |
} | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment