Based on https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/
Generated using the script attached to this gist
Based on https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/
Generated using the script attached to this gist
| var fs = require("fs"); | |
| var path = require("path"); | |
| var file = process.argv[2]; | |
| if (!file) { | |
| console.log("Provide a file"); | |
| process.exit(); | |
| } | |
| var accum = ''; | |
| var count = 1; | |
| var commentParts; | |
| var comment; | |
| var subreddits = {}; | |
| function recordSubreddit(subreddit) { | |
| if (!subreddits[subreddit]) { | |
| subreddits[subreddit] = 0; | |
| } | |
| subreddits[subreddit]++; | |
| } | |
| var stream = fs.createReadStream(file, {encoding: 'utf8'}); | |
| stream.on("data", function(data) { | |
| accum += data; | |
| if (accum.indexOf("\n")) { | |
| commentParts = accum.split("\n"); | |
| accum = commentParts[commentParts.length-1]; | |
| for (var i = 0; i < commentParts.length-1; i++) { | |
| count++; | |
| comment = JSON.parse(commentParts[i]); | |
| recordSubreddit(comment.subreddit); | |
| } | |
| } | |
| }); | |
| stream.on("end", function() { | |
| console.log(subreddits); | |
| }); |
| { 'reddit.com': 88601, | |
| politics: 34088, | |
| programming: 16162, | |
| gaming: 572, | |
| business: 526, | |
| science: 8326, | |
| de: 15, | |
| tr: 41, | |
| entertainment: 819, | |
| gadgets: 314, | |
| sports: 300, | |
| it: 31, | |
| features: 19, | |
| bugs: 104, | |
| ru: 17, | |
| netsec: 43, | |
| freeculture: 5, | |
| eo: 1, | |
| ads: 50, | |
| ja: 33, | |
| nsfw: 287, | |
| fr: 5, | |
| joel: 24, | |
| es: 1, | |
| request: 11, | |
| 'lipstick.com': 1, | |
| sv: 1, | |
| id: 20, | |
| sl: 4, | |
| arxiv: 4, | |
| no: 1, | |
| zh: 3 } |
| { 'reddit.com': 223377, | |
| politics: 88473, | |
| gadgets: 1033, | |
| programming: 32838, | |
| business: 2156, | |
| science: 17082, | |
| entertainment: 2450, | |
| gaming: 1780, | |
| nsfw: 459, | |
| netsec: 134, | |
| ja: 136, | |
| sports: 913, | |
| ads: 187, | |
| id: 42, | |
| sl: 14, | |
| bugs: 112, | |
| it: 82, | |
| freeculture: 126, | |
| request: 99, | |
| tr: 65, | |
| slate: 15, | |
| ru: 137, | |
| fr: 120, | |
| olympics: 24, | |
| de: 247, | |
| joel: 249, | |
| es: 187, | |
| no: 35, | |
| features: 213, | |
| zh: 14, | |
| obama: 21, | |
| arxiv: 3, | |
| 'lipstick.com': 41, | |
| eo: 26, | |
| ca: 8, | |
| nl: 11, | |
| pl: 19, | |
| vi: 26, | |
| nytimes: 13, | |
| sv: 5, | |
| eu: 2, | |
| hu: 6, | |
| ko: 3 } |
| { politics: 112025, | |
| 'reddit.com': 195181, | |
| gaming: 1915, | |
| entertainment: 2517, | |
| programming: 31726, | |
| science: 15996, | |
| ads: 173, | |
| gadgets: 929, | |
| business: 1526, | |
| sports: 689, | |
| netsec: 100, | |
| nsfw: 167, | |
| ja: 72, | |
| it: 32, | |
| request: 9, | |
| features: 17, | |
| fr: 92, | |
| tr: 50, | |
| joel: 94, | |
| freeculture: 2, | |
| de: 23, | |
| no: 1, | |
| sl: 5, | |
| nl: 1, | |
| 'lipstick.com': 6, | |
| ru: 34, | |
| bugs: 2, | |
| es: 1, | |
| obama: 3, | |
| arxiv: 1, | |
| eo: 1 } |