Based on https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/
Generated using the script attached to this gist
Based on https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/
Generated using the script attached to this gist
var fs = require("fs"); | |
var path = require("path"); | |
var file = process.argv[2]; | |
if (!file) { | |
console.log("Provide a file"); | |
process.exit(); | |
} | |
var accum = ''; | |
var count = 1; | |
var commentParts; | |
var comment; | |
var subreddits = {}; | |
function recordSubreddit(subreddit) { | |
if (!subreddits[subreddit]) { | |
subreddits[subreddit] = 0; | |
} | |
subreddits[subreddit]++; | |
} | |
var stream = fs.createReadStream(file, {encoding: 'utf8'}); | |
stream.on("data", function(data) { | |
accum += data; | |
if (accum.indexOf("\n")) { | |
commentParts = accum.split("\n"); | |
accum = commentParts[commentParts.length-1]; | |
for (var i = 0; i < commentParts.length-1; i++) { | |
count++; | |
comment = JSON.parse(commentParts[i]); | |
recordSubreddit(comment.subreddit); | |
} | |
} | |
}); | |
stream.on("end", function() { | |
console.log(subreddits); | |
}); |
{ 'reddit.com': 88601, | |
politics: 34088, | |
programming: 16162, | |
gaming: 572, | |
business: 526, | |
science: 8326, | |
de: 15, | |
tr: 41, | |
entertainment: 819, | |
gadgets: 314, | |
sports: 300, | |
it: 31, | |
features: 19, | |
bugs: 104, | |
ru: 17, | |
netsec: 43, | |
freeculture: 5, | |
eo: 1, | |
ads: 50, | |
ja: 33, | |
nsfw: 287, | |
fr: 5, | |
joel: 24, | |
es: 1, | |
request: 11, | |
'lipstick.com': 1, | |
sv: 1, | |
id: 20, | |
sl: 4, | |
arxiv: 4, | |
no: 1, | |
zh: 3 } |
{ 'reddit.com': 223377, | |
politics: 88473, | |
gadgets: 1033, | |
programming: 32838, | |
business: 2156, | |
science: 17082, | |
entertainment: 2450, | |
gaming: 1780, | |
nsfw: 459, | |
netsec: 134, | |
ja: 136, | |
sports: 913, | |
ads: 187, | |
id: 42, | |
sl: 14, | |
bugs: 112, | |
it: 82, | |
freeculture: 126, | |
request: 99, | |
tr: 65, | |
slate: 15, | |
ru: 137, | |
fr: 120, | |
olympics: 24, | |
de: 247, | |
joel: 249, | |
es: 187, | |
no: 35, | |
features: 213, | |
zh: 14, | |
obama: 21, | |
arxiv: 3, | |
'lipstick.com': 41, | |
eo: 26, | |
ca: 8, | |
nl: 11, | |
pl: 19, | |
vi: 26, | |
nytimes: 13, | |
sv: 5, | |
eu: 2, | |
hu: 6, | |
ko: 3 } |
{ politics: 112025, | |
'reddit.com': 195181, | |
gaming: 1915, | |
entertainment: 2517, | |
programming: 31726, | |
science: 15996, | |
ads: 173, | |
gadgets: 929, | |
business: 1526, | |
sports: 689, | |
netsec: 100, | |
nsfw: 167, | |
ja: 72, | |
it: 32, | |
request: 9, | |
features: 17, | |
fr: 92, | |
tr: 50, | |
joel: 94, | |
freeculture: 2, | |
de: 23, | |
no: 1, | |
sl: 5, | |
nl: 1, | |
'lipstick.com': 6, | |
ru: 34, | |
bugs: 2, | |
es: 1, | |
obama: 3, | |
arxiv: 1, | |
eo: 1 } |