Created
July 19, 2019 12:34
-
-
Save spattanaik75/8637fabf61ef06b7f0c0de1127fd37c4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// we set up the data fetch and hand the data | |
// to our main function | |
const fetch = require('node-fetch'); | |
const getRedditJSONUrl = url => url.replace(/\/?$/, '.json'); | |
const fetchData = url => fetch(url).then(response => response.json()); | |
const extractPosts = redditPage => redditPage.data.children; | |
const extractPostTextAndTitle = post => post.data.title + '\n' + post.data.selftext; | |
const _wordCount = require('@iarna/word-count'); | |
const countWords = text => _wordCount(text) | |
const numberValueSorter = (a, b) => a - b; | |
const calculateMedian = list => { | |
// an empty list has no median | |
if (list.length == 0) return undefined; | |
// sort the values | |
const sorted = Array.from(list).sort(numberValueSorter); | |
if (sorted.length % 2 == 0) { | |
// we're dealing with an even-sized set, so take the midpoint | |
// of the middle two values | |
const a = sorted.length / 2 - 1; | |
const b = a + 1; | |
return (list[a] + list[b]) / 2; | |
} else { | |
// pick the middle value | |
const i = Math.floor(sorted.length / 2); | |
return list[i]; | |
} | |
} | |
const countComments = post => post.data.num_comments; | |
const hasImageAttached = post => post.data.post_hint == 'image'; | |
const calculateRatio = array => { | |
if (array.length == 0) return undefined; | |
return array.filter(value => !!value).length / array.length; | |
}; | |
const map = (...mappers) => | |
async array => { // we now have to return an async function | |
const results = []; | |
for (const value of array) { // for each value of the array, | |
let result = value; // set the first intermediate result to the first value; | |
for (const mapper of mappers) // take each mapper; | |
result = await mapper(result); // and pass the intermediate result to the next; | |
results.push(result); // and push the result onto the results array; | |
} | |
return results; // return the final array | |
}; | |
const pipeline = (...steps) => { // take a list of steps defining the process | |
return async input => { // and return an async function that takes input; | |
let result = input; // the first intermediate result is the input; | |
for (const step of steps) // iterate over each step; | |
result = await step(result); // run the step on the result and update it; | |
return result; // return the last result! | |
}; | |
}; | |
const fork = (...pipelines) => // a function that takes a list of pipelines, | |
async value => // returns an async function that takes a value; | |
await Promise.all( // it returns the results of promises... | |
pipelines.map( // ...mapped over pipelines... | |
pipeline => pipeline(value) // ...that are passed the value. | |
) | |
); | |
const distribute = pipeline => values => Promise.all(values.map(pipeline)); | |
const getMedianWordCount = pipeline( | |
map( | |
extractPostTextAndTitle, | |
countWords | |
), | |
calculateMedian | |
); | |
const getMedianCommentCount = pipeline( | |
map(countComments), | |
calculateMedian | |
); | |
const getImagePresentRatio = pipeline( | |
map(hasImageAttached), | |
calculateRatio | |
); | |
// this is a convenience function that associates names to the results returned | |
const joinResults = ([ | |
medianWordCount, | |
medianCommentCount, | |
imagePresentRatio | |
]) => ({ | |
medianWordCount, | |
medianCommentCount, | |
imagePresentRatio | |
}); | |
// the process function, now with forking! | |
const getSubredditMetrics = pipeline( | |
getRedditJSONUrl, | |
fetchData, | |
extractPosts, | |
fork( | |
getMedianWordCount, | |
getMedianCommentCount, | |
getImagePresentRatio | |
), | |
joinResults | |
); | |
const URLs = [ | |
'https://www.reddit.com/r/dataisbeautiful/', | |
'https://www.reddit.com/r/proceduralgeneration/' | |
]; | |
const getAllReports = distribute(getSubredditMetrics); | |
getAllReports (URLs) | |
.then(results => { | |
const reports = results.map((report, idx) => ({ | |
url: URLs[idx], | |
report | |
})); | |
console.log(reports); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment