Last active
July 5, 2016 15:44
-
-
Save dlwr/c1da36161e6f151af5c3e2c897dd56db to your computer and use it in GitHub Desktop.
wikihubのリンクとかをjsonにする 吐かれるjson https://gist.githubusercontent.com/dlwr/701e0e0beec67c73df974b6dccc8e2b5/raw/d3082afb5568d7b7589804b974d8074a12cc2953/wikihub-paths.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict' | |
const fetch = require('node-fetch') | |
const client = require('cheerio-httpcli') | |
const BASE_URL = 'https://wikihub.io' | |
const COMMUNITIES_URL = BASE_URL + '/communities' | |
const TOKEN = '0b8c09f94a89e1ce3d0c1fbd35cd6ac327a104b3' | |
const paths = {} | |
function fetchPages(community, url) { | |
console.log(`begin fetch ${community} pages`) | |
return new Promise((resolve, reject) => { | |
function recursivelyFetch (community, url) { | |
let nextUrl | |
url || (url = `https://${community}.wikihub.io/api/v1/pages`) | |
paths[community] || (paths[community] = {}) | |
paths[community].pages || (paths[community].pages = []) | |
console.log(`fetch ${url}`) | |
fetch(url, { | |
headers: { Authorization: `Bearer ${TOKEN}` } | |
}).then(response => { | |
if(response.status != 200) return resolve(true) | |
const match = response.headers.get('link').match(/<(https:\/\/[^<>]*\.wikihub.io¥/[^<>]*)>;\s*rel="next"/) | |
match && (nextUrl = match.pop()) | |
return response.json() | |
}).then(json => { | |
json.forEach(page => { | |
paths[community].pages.push(page.title) | |
}) | |
nextUrl ? recursivelyFetch(community, nextUrl) : resolve(true) | |
}) | |
} | |
recursivelyFetch(community) | |
}) | |
} | |
function fetchArticles(community) { | |
console.log(`begin fetch ${community} articles`) | |
return new Promise((resolve, reject) => { | |
function recursivelyFetch (community, url) { | |
let nextUrl | |
url || (url = `https://${community}.wikihub.io/api/v1/articles`) | |
paths[community] || (paths[community] = {}) | |
paths[community].articles || (paths[community].articles = {}) | |
console.log(`fetch ${url}`) | |
fetch(url, { | |
headers: { Authorization: `Bearer ${TOKEN}` } | |
}).then(response => { | |
if(response.status != 200) return resolve(true) | |
const match = response.headers.get('link').match(/<(https:\/\/[^<>]*\.wikihub.io\/[^<>]*)>;\s*rel="next"/) | |
match && (nextUrl = match.pop()) | |
return response.json() | |
}).then(json => { | |
json.forEach(article => { | |
const date = new Date(Date.parse(article.created_at)) | |
const fakeUtcDate = new Date(date.getTime() + (date.getTimezoneOffset() * 60000)) | |
const path = [ | |
fakeUtcDate.getFullYear(), | |
('0' + (fakeUtcDate.getMonth() + 1)).slice(-2), | |
('0' + fakeUtcDate.getDate()).slice(-2), | |
('0' + fakeUtcDate.getHours()).slice(-2), | |
('0' + fakeUtcDate.getMinutes()).slice(-2), | |
('0' + fakeUtcDate.getSeconds()).slice(-2), | |
].join('') | |
paths[community].articles[`@${article.user.name}`] || (paths[community].articles[`@${article.user.name}`] = {}) | |
paths[community].articles[`@${article.user.name}`][path] = article.title | |
}) | |
nextUrl ? recursivelyFetch(community, nextUrl) : resolve(true) | |
}) | |
} | |
recursivelyFetch(community) | |
}) | |
} | |
client.fetch(COMMUNITIES_URL) | |
.then(result => { | |
const links = result.$('a.list-group-item') | |
links.each((_, link) => { | |
const community = link.attribs.href.replace(/(https?:\/\/|\.wikihub\.io\/?)/g, '') | |
paths[community] = { title: result.$(link).find('.lgi-heading').text(), pages: [], articles: {} } | |
}) | |
return links.map((_, link) => link.attribs.href.replace(/(https?:\/\/|\.wikihub\.io\/?)/g, '')).get() | |
}) | |
.then(communities => { | |
return Promise.all(communities.map(community => fetchPages(community)).concat(communities.map(community => fetchArticles(community)))).then(arr => { | |
console.log(JSON.stringify(paths)) | |
console.log('finished') | |
}) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment